[otbn] Split up insn_yaml.py

This was getting a bit unwieldy: split it into smaller files.

Signed-off-by: Rupert Swarbrick <rswarbrick@lowrisc.org>
diff --git a/hw/ip/otbn/util/Makefile b/hw/ip/otbn/util/Makefile
index c3569d9..ec93281 100644
--- a/hw/ip/otbn/util/Makefile
+++ b/hw/ip/otbn/util/Makefile
@@ -14,7 +14,7 @@
 $(build-dir) $(cs-build-dir) $(lint-build-dir):
 	mkdir -p $@
 
-pylibs := shared/insn_yaml.py shared/mem_layout.py
+pylibs := $(wildcard shared/*.py)
 pyscripts := yaml_to_doc.py otbn-as otbn-ld otbn-objdump
 
 lint-stamps := $(foreach s,$(pyscripts),$(lint-build-dir)/$(s).stamp)
diff --git a/hw/ip/otbn/util/otbn-as b/hw/ip/otbn/util/otbn-as
index 23e8117..6698e26 100755
--- a/hw/ip/otbn/util/otbn-as
+++ b/hw/ip/otbn/util/otbn-as
@@ -24,8 +24,10 @@
 import tempfile
 from typing import Dict, List, Optional, Set, TextIO, Tuple
 
-from shared.insn_yaml import (BitRanges, Encoding, Insn, InsnsFile, Operand,
-                              RegOperandType, load_file)
+from shared.bit_ranges import BitRanges
+from shared.encoding import Encoding
+from shared.insn_yaml import Insn, InsnsFile, load_file
+from shared.operand import RegOperandType, Operand
 
 
 class RVFmt:
diff --git a/hw/ip/otbn/util/otbn-objdump b/hw/ip/otbn/util/otbn-objdump
index 0cc2ac4..3ffc924 100755
--- a/hw/ip/otbn/util/otbn-objdump
+++ b/hw/ip/otbn/util/otbn-objdump
@@ -11,7 +11,8 @@
 import sys
 from typing import Dict, List, Optional, Tuple
 
-from shared.insn_yaml import Encoding, Insn, InsnsFile, load_file
+from shared.encoding import Encoding
+from shared.insn_yaml import Insn, InsnsFile, load_file
 
 
 def snoop_disasm_flags(argv: List[str]) -> bool:
diff --git a/hw/ip/otbn/util/shared/bit_ranges.py b/hw/ip/otbn/util/shared/bit_ranges.py
new file mode 100644
index 0000000..56c3d69
--- /dev/null
+++ b/hw/ip/otbn/util/shared/bit_ranges.py
@@ -0,0 +1,113 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+import re
+from typing import List, Tuple
+
+
+class BitRanges:
+    '''Represents the bit ranges used for a field in an encoding scheme'''
+    def __init__(self,
+                 mask: int,
+                 ranges: List[Tuple[int, int]],
+                 width: int) -> None:
+        self.mask = mask
+        self.ranges = ranges
+        self.width = width
+
+    @staticmethod
+    def from_list(ranges: List[Tuple[int, int]]) -> 'BitRanges':
+        mask = 0
+        width = 0
+        for msb, lsb in ranges:
+            assert 0 <= lsb <= msb <= 31
+            rng_mask = (1 << (msb + 1)) - (1 << lsb)
+            assert not (rng_mask & mask)
+            mask |= rng_mask
+            width += msb - lsb + 1
+
+        return BitRanges(mask, ranges, width)
+
+    @staticmethod
+    def from_yaml(as_string: str, what: str) -> 'BitRanges':
+        #   ranges ::= range
+        #            | range ',' ranges
+        #
+        #   range ::= num
+        #           | num ':' num
+        #
+        # Ranges are assumed to be msb:lsb (with msb >= lsb). Bit indices are
+        # at most 31 and ranges are disjoint.
+
+        if not as_string:
+            raise ValueError('Empty string as bits for {}'.format(what))
+
+        overlaps = 0
+
+        mask = 0
+        ranges = []
+        width = 0
+
+        for rng in as_string.split(','):
+            match = re.match(r'([0-9]+)(?:-([0-9]+))?$', rng)
+            if match is None:
+                raise ValueError('Range {!r} in bits for {} is malformed.'
+                                 .format(rng, what))
+
+            msb = int(match.group(1))
+            maybe_lsb = match.group(2)
+            lsb = msb if maybe_lsb is None else int(maybe_lsb)
+
+            if msb < lsb:
+                raise ValueError('Range {!r} in bits for {} has msb < lsb.'
+                                 .format(rng, what))
+
+            if msb >= 32:
+                raise ValueError('Range {!r} in bits for {} has msb >= 32.'
+                                 .format(rng, what))
+
+            rng_mask = (1 << (msb + 1)) - (1 << lsb)
+            overlaps |= rng_mask & mask
+            mask |= rng_mask
+
+            ranges.append((msb, lsb))
+            width += msb - lsb + 1
+
+        if overlaps:
+            raise ValueError('Bits for {} have overlapping ranges '
+                             '(mask: {:#08x})'
+                             .format(what, overlaps))
+
+        return BitRanges(mask, ranges, width)
+
+    def __eq__(self, other: object) -> bool:
+        return isinstance(other, BitRanges) and self.ranges == other.ranges
+
+    def encode(self, value: int) -> int:
+        '''Encode the given value as bit fields'''
+        ret = 0
+        bits_taken = 0
+        for msb, lsb in self.ranges:
+            rng_width = msb - lsb + 1
+            value_msb = self.width - 1 - bits_taken
+            value_lsb = value_msb - rng_width + 1
+
+            rng_mask = (1 << rng_width) - 1
+            rng_value = (value >> value_lsb) & rng_mask
+            ret |= rng_value << lsb
+            bits_taken += rng_width
+
+        assert bits_taken == self.width
+        return ret
+
+    def decode(self, raw: int) -> int:
+        '''Extract the bit fields from the given value'''
+        ret = 0
+        for msb, lsb in self.ranges:
+            width = msb - lsb + 1
+            mask = (1 << width) - 1
+
+            ret <<= width
+            ret |= (raw >> lsb) & mask
+        return ret
diff --git a/hw/ip/otbn/util/shared/bool_literal.py b/hw/ip/otbn/util/shared/bool_literal.py
new file mode 100644
index 0000000..78a8b97
--- /dev/null
+++ b/hw/ip/otbn/util/shared/bool_literal.py
@@ -0,0 +1,66 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+
+class BoolLiteral:
+    '''Represents a boolean literal, with possible 'x characters
+
+    We represent this as 2 masks: "ones" and "xs". The ones mask is the bits
+    that are marked 1. The xs mask is the bits that are marked x. Then you can
+    test whether a particular value matches the literal by zeroing all bits in
+    the x mask and then comparing with the ones mask.
+
+    '''
+    def __init__(self, ones: int, xs: int, width: int) -> None:
+        assert width > 0
+        assert (ones >> width) == 0
+        assert (xs >> width) == 0
+
+        self.ones = ones
+        self.xs = xs
+        self.width = width
+
+    @staticmethod
+    def from_string(as_string: str, what: str) -> 'BoolLiteral':
+        ones = 0
+        xs = 0
+        width = 0
+
+        # The literal should always start with a 'b'
+        if not as_string.startswith('b'):
+            raise ValueError("Boolean literal for {} doesn't start with a 'b'."
+                             .format(what))
+
+        for char in as_string[1:]:
+            if char == '_':
+                continue
+
+            ones <<= 1
+            xs <<= 1
+            width += 1
+
+            if char == '0':
+                continue
+            elif char == '1':
+                ones |= 1
+            elif char == 'x':
+                xs |= 1
+            else:
+                raise ValueError('Boolean literal for {} has '
+                                 'unsupported character: {!r}.'
+                                 .format(what, char))
+
+        if not width:
+            raise ValueError('Empty boolean literal for {}.'.format(what))
+
+        return BoolLiteral(ones, xs, width)
+
+    def char_for_bit(self, bit: int) -> str:
+        '''Return 0, 1 or x for the bit at the given position'''
+        assert bit < self.width
+        if (self.ones >> bit) & 1:
+            return '1'
+        if (self.xs >> bit) & 1:
+            return 'x'
+        return '0'
diff --git a/hw/ip/otbn/util/shared/encoding.py b/hw/ip/otbn/util/shared/encoding.py
new file mode 100644
index 0000000..9d2945a
--- /dev/null
+++ b/hw/ip/otbn/util/shared/encoding.py
@@ -0,0 +1,203 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+import re
+from typing import Dict, Tuple, Union
+
+from .bool_literal import BoolLiteral
+from .encoding_scheme import EncSchemeField, EncSchemes
+from .operand import Operand
+from .yaml_parse_helpers import check_keys, check_str
+
+
+class EncodingField:
+    '''A single element of an encoding's mapping'''
+    def __init__(self,
+                 value: Union[BoolLiteral, str],
+                 scheme_field: EncSchemeField) -> None:
+        self.value = value
+        self.scheme_field = scheme_field
+
+    @staticmethod
+    def from_yaml(as_str: str,
+                  scheme_field: EncSchemeField,
+                  name_to_operand: Dict[str, Operand],
+                  what: str) -> 'EncodingField':
+        # The value should either be a boolean literal ("000xx11" or similar)
+        # or should be a name, which is taken as the name of an operand.
+        if not as_str:
+            raise ValueError('Empty string as {}.'.format(what))
+
+        # Set self.value to be either the bool literal or the name of the
+        # operand.
+        value_width = None
+        value = ''  # type: Union[BoolLiteral, str]
+        if re.match(r'b[01x_]+$', as_str):
+            value = BoolLiteral.from_string(as_str, what)
+            value_width = value.width
+            value_type = 'a literal value'
+        else:
+            operand = name_to_operand.get(as_str)
+            if operand is None:
+                raise ValueError('Unknown operand, {!r}, as {}'
+                                 .format(as_str, what))
+            value_width = operand.op_type.width
+            value = as_str
+            value_type = 'an operand'
+
+        # Unless we had an operand of type 'imm' (unknown width), we now have
+        # an expected width. Check it matches the width of the schema field.
+        if value_width is not None:
+            if scheme_field.bits.width != value_width:
+                raise ValueError('{} is mapped to {} with width {}, but the '
+                                 'encoding schema field has width {}.'
+                                 .format(what, value_type, value_width,
+                                         scheme_field.bits.width))
+
+        # Track the scheme field as well (so we don't have to keep track of a
+        # scheme once we've made an encoding object)
+        return EncodingField(value, scheme_field)
+
+
+class Encoding:
+    '''The encoding for an instruction'''
+    def __init__(self,
+                 yml: object,
+                 schemes: EncSchemes,
+                 name_to_operand: Dict[str, Operand],
+                 mnemonic: str):
+        what = 'encoding for instruction {!r}'.format(mnemonic)
+        yd = check_keys(yml, what, ['scheme', 'mapping'], [])
+
+        scheme_what = 'encoding scheme for instruction {!r}'.format(mnemonic)
+        scheme_name = check_str(yd['scheme'], scheme_what)
+        scheme_fields = schemes.resolve(scheme_name, mnemonic)
+
+        what = 'encoding mapping for instruction {!r}'.format(mnemonic)
+
+        # Check we've got exactly the right fields for the scheme
+        ydm = check_keys(yd['mapping'], what, list(scheme_fields.op_fields), [])
+
+        # Track the set of operand names that were used in some field
+        operands_used = set()
+
+        self.fields = {}
+        for field_name, scheme_field in scheme_fields.fields.items():
+            if scheme_field.value is not None:
+                field = EncodingField(scheme_field.value, scheme_field)
+            else:
+                field_what = ('value for {} field in encoding for instruction {!r}'
+                              .format(field_name, mnemonic))
+                field = EncodingField.from_yaml(check_str(ydm[field_name], field_what),
+                                                scheme_fields.fields[field_name],
+                                                name_to_operand,
+                                                field_what)
+
+                # If the field's value is an operand rather than a literal, it
+                # will have type str. Track the operands that we've used.
+                if isinstance(field.value, str):
+                    operands_used.add(field.value)
+
+            self.fields[field_name] = field
+
+        # We know that every field in the encoding scheme has a value. But we
+        # still need to check that every operand ended up in some field.
+        assert operands_used <= set(name_to_operand.keys())
+        unused_ops = set(name_to_operand.keys()) - operands_used
+        if unused_ops:
+            raise ValueError('Not all operands used in {} (missing: {}).'
+                             .format(what, ', '.join(list(unused_ops))))
+
+    def get_masks(self) -> Tuple[int, int]:
+        '''Return zeros/ones masks for encoding
+
+        Returns a pair (m0, m1) where m0 is the "zeros mask": a mask where a
+        bit is set if there is an bit pattern matching this encoding with that
+        bit zero. m1 is the ones mask: equivalent, but for that bit one.
+
+        '''
+        m0 = 0
+        m1 = 0
+        for field_name, field in self.fields.items():
+            if isinstance(field.value, str):
+                m0 |= field.scheme_field.bits.mask
+                m1 |= field.scheme_field.bits.mask
+            else:
+                # Match up the bits in the value with the ranges in the scheme.
+                assert field.value.width > 0
+                assert field.value.width == field.scheme_field.bits.width
+                bits_seen = 0
+                for msb, lsb in field.scheme_field.bits.ranges:
+                    val_msb = field.scheme_field.bits.width - 1 - bits_seen
+                    val_lsb = val_msb - msb + lsb
+                    bits_seen += msb - lsb + 1
+
+                    for idx in range(0, msb - lsb + 1):
+                        desc = field.value.char_for_bit(val_lsb + idx)
+                        if desc in ['0', 'x']:
+                            m0 |= 1 << (idx + lsb)
+                        if desc in ['1', 'x']:
+                            m1 |= 1 << (idx + lsb)
+
+        all_bits = (1 << 32) - 1
+        assert (m0 | m1) == all_bits
+        return (m0, m1)
+
+    def get_ones_mask(self) -> int:
+        '''Return the mask of fixed bits that are set
+
+        For literal values of x (unused bits in the encoding), we'll prefer
+        '0'.
+
+        '''
+        m0, m1 = self.get_masks()
+        return m1 & ~m0
+
+    def assemble(self, op_to_idx: Dict[str, int]) -> int:
+        '''Assemble an instruction
+
+        op_to_idx should map each operand in the encoding to some integer
+        index, which should be small enough to fit in the width of the
+        operand's type and should be representable after any shift. Will raise
+        a ValueError if not.
+
+        '''
+        val = self.get_ones_mask()
+        for field_name, field in self.fields.items():
+            if not isinstance(field.value, str):
+                # We've done this field already (in get_ones_mask)
+                continue
+
+            # Try to get the operand value for the field. If this is an
+            # optional operand, we might not have one, and just encode zero.
+            field_val = op_to_idx.get(field.value, 0)
+
+            # Are there any low bits that shouldn't be there?
+            shift_mask = (1 << field.scheme_field.shift) - 1
+            if field_val & shift_mask:
+                raise ValueError("operand field {} has a shift of {}, "
+                                 "so can't represent the value {:#x}."
+                                 .format(field.value,
+                                         field.scheme_field.shift,
+                                         field_val))
+
+            shifted = field_val >> field.scheme_field.shift
+
+            # Is the number too big? At the moment, we are assuming immediates
+            # are unsigned (because the OTBN big number instructions all have
+            # unsigned immediates).
+            if shifted >> field.scheme_field.bits.width:
+                shift_msg = ((' (shifted right by {} bits from {:#x})'
+                              .format(field.scheme_field.shift, field_val))
+                             if field.scheme_field.shift
+                             else '')
+                raise ValueError("operand field {} has a width of {}, "
+                                 "so can't represent the value {:#x}{}."
+                                 .format(field.value,
+                                         field.scheme_field.bits.width,
+                                         shifted, shift_msg))
+
+            val |= field.scheme_field.bits.encode(shifted)
+
+        return val
diff --git a/hw/ip/otbn/util/shared/encoding_scheme.py b/hw/ip/otbn/util/shared/encoding_scheme.py
new file mode 100644
index 0000000..0e6d4ce
--- /dev/null
+++ b/hw/ip/otbn/util/shared/encoding_scheme.py
@@ -0,0 +1,356 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+'''Code for handling instruction encoding schemes'''
+
+import re
+from typing import Dict, List, Optional, Set
+
+from .bit_ranges import BitRanges
+from .bool_literal import BoolLiteral
+from .yaml_parse_helpers import check_keys, check_str, check_list, index_list
+
+
+class EncSchemeField:
+    '''Represents a single field in an encoding scheme'''
+    def __init__(self,
+                 bits: BitRanges,
+                 value: Optional[BoolLiteral],
+                 shift: int) -> None:
+        self.bits = bits
+        self.value = value
+        self.shift = shift
+
+    @staticmethod
+    def from_yaml(yml: object, what: str) -> 'EncSchemeField':
+        # This is either represented as a dict in the YAML or as a bare string.
+        bits_what = 'bits for {}'.format(what)
+        value_what = 'value for {}'.format(what)
+        shift_what = 'shift for {}'.format(what)
+
+        shift = 0
+
+        if isinstance(yml, dict):
+            yd = check_keys(yml, what, ['bits'], ['value', 'shift'])
+
+            bits_yml = yd['bits']
+            if not (isinstance(bits_yml, str) or isinstance(bits_yml, int)):
+                raise ValueError('{} is of type {}, not a string or int.'
+                                 .format(bits_what, type(bits_yml).__name__))
+
+            # We require value to be given as a string because it's supposed to
+            # be in base 2, and PyYAML will parse 111 as one-hundred and
+            # eleven, 011 as 9 and 0x11 as 17. Aargh!
+            raw_value = None
+            val_yml = yd.get('value')
+            if val_yml is not None:
+                if not isinstance(val_yml, str):
+                    raise ValueError("{} is of type {}, but must be a string "
+                                     "(we don't allow automatic conversion "
+                                     "because YAML's int conversion assumes "
+                                     "base 10 and value should be in base 2)."
+                                     .format(value_what,
+                                             type(val_yml).__name__))
+                raw_value = val_yml
+
+            # shift, on the other hand, is written in base 10. Allow an
+            # integer.
+            shift_yml = yd.get('shift')
+            if shift_yml is None:
+                pass
+            elif isinstance(shift_yml, str):
+                if not re.match(r'[0-9]+$', shift_yml):
+                    raise ValueError('{} is {!r} but should be a '
+                                     'non-negative integer.'
+                                     .format(shift_what, shift_yml))
+                shift = int(shift_yml)
+            elif isinstance(shift_yml, int):
+                if shift_yml < 0:
+                    raise ValueError('{} is {!r} but should be a '
+                                     'non-negative integer.'
+                                     .format(shift_what, shift_yml))
+                shift = shift_yml
+            else:
+                raise ValueError("{} is of type {}, but must be a string "
+                                 "or non-negative integer."
+                                 .format(shift_what, type(shift_yml).__name__))
+        elif isinstance(yml, str) or isinstance(yml, int):
+            bits_yml = yml
+            raw_value = None
+        else:
+            raise ValueError('{} is a {}, but should be a '
+                             'dict, string or integer.'
+                             .format(what, type(yml).__name__))
+
+        # The bits field is usually parsed as a string ("10-4", or similar).
+        # But if it's a bare integer then YAML will parse it as an int. That's
+        # fine, but we turn it back into a string to be re-parsed by BitRanges.
+        assert isinstance(bits_yml, str) or isinstance(bits_yml, int)
+
+        bits = BitRanges.from_yaml(str(bits_yml), bits_what)
+        value = None
+        if raw_value is not None:
+            value = BoolLiteral.from_string(raw_value, value_what)
+            if bits.width != value.width:
+                raise ValueError('{} has bits that imply a width of {}, but '
+                                 'a value with width {}.'
+                                 .format(what, bits.width, value.width))
+
+        return EncSchemeField(bits, value, shift)
+
+
+class EncSchemeImport:
+    '''An object representing inheritance of a parent scheme
+
+    When importing a parent scheme, we can set some of its fields with
+    immediate values. These are stored in the settings field.
+
+    '''
+    def __init__(self, yml: object, importer_name: str) -> None:
+        as_str = check_str(yml,
+                           'value for import in encoding scheme {!r}'
+                           .format(importer_name))
+
+        # The supported syntax is
+        #
+        #    - parent0(field0=b111, field1=b10)
+        #    - parent1()
+        #    - parent2
+
+        match = re.match(r'([^ (]+)[ ]*(?:\(([^)]+)\))?$', as_str)
+        if not match:
+            raise ValueError('Malformed encoding scheme '
+                             'inheritance by scheme {!r}: {!r}.'
+                             .format(importer_name, as_str))
+
+        self.parent = match.group(1)
+        self.settings = {}  # type: Dict[str, BoolLiteral]
+
+        when = ('When inheriting from {!r} in encoding scheme {!r}'
+                .format(self.parent, importer_name))
+
+        if match.group(2) is not None:
+            args = match.group(2).split(',')
+            for arg in args:
+                arg = arg.strip()
+                arg_parts = arg.split('=')
+                if len(arg_parts) != 2:
+                    raise ValueError('{}, found an argument with {} '
+                                     'equals signs (should have exactly one).'
+                                     .format(when, len(arg_parts) - 1))
+
+                field_name = arg_parts[0]
+                field_what = ('literal value for field {!r} when inheriting '
+                              'from {!r} in encoding scheme {!r}'
+                              .format(arg_parts[0], self.parent, importer_name))
+                field_value = BoolLiteral.from_string(arg_parts[1], field_what)
+
+                if field_name in self.settings:
+                    raise ValueError('{}, found multiple arguments assigning '
+                                     'values to the field {!r}.'
+                                     .format(when, field_name))
+
+                self.settings[field_name] = field_value
+
+    def apply_settings(self,
+                       esf: 'EncSchemeFields', what: str) -> 'EncSchemeFields':
+        # Copy and set values in anything that has a setting
+        fields = {}
+        for name, literal in self.settings.items():
+            old_field = esf.fields.get(name)
+            if old_field is None:
+                raise ValueError('{} sets unknown field {!r} from {!r}.'
+                                 .format(what, name, self.parent))
+
+            if old_field.bits.width != literal.width:
+                raise ValueError('{} sets field {!r} from {!r} with a literal '
+                                 'of width {}, but the field has width {}.'
+                                 .format(what, name, self.parent,
+                                         literal.width, old_field.bits.width))
+
+            fields[name] = EncSchemeField(old_field.bits,
+                                          literal,
+                                          old_field.shift)
+
+        # Copy anything else
+        op_fields = set()
+        for name, old_field in esf.fields.items():
+            if name in fields:
+                continue
+            op_fields.add(name)
+            fields[name] = old_field
+
+        return EncSchemeFields(fields, op_fields, esf.mask)
+
+
+class EncSchemeFields:
+    '''An object representing some fields in an encoding scheme'''
+    def __init__(self,
+                 fields: Dict[str, EncSchemeField],
+                 op_fields: Set[str],
+                 mask: int) -> None:
+        self.fields = fields
+        self.op_fields = op_fields
+        self.mask = mask
+
+    @staticmethod
+    def empty() -> 'EncSchemeFields':
+        return EncSchemeFields({}, set(), 0)
+
+    @staticmethod
+    def from_yaml(yml: object, name: str) -> 'EncSchemeFields':
+        if not isinstance(yml, dict):
+            raise ValueError('fields for encoding scheme {!r} should be a '
+                             'dict, but we saw a {}.'
+                             .format(name, type(yml).__name__))
+
+        fields = {}
+        op_fields = set()  # type: Set[str]
+        mask = 0
+
+        overlaps = 0
+
+        for key, val in yml.items():
+            if not isinstance(key, str):
+                raise ValueError('{!r} is a bad key for a field name of '
+                                 'encoding scheme {} (should be str, not {}).'
+                                 .format(key, name, type(key).__name__))
+
+            fld_what = 'field {!r} of encoding scheme {}'.format(key, name)
+            field = EncSchemeField.from_yaml(val, fld_what)
+
+            overlaps |= mask & field.bits.mask
+            mask |= field.bits.mask
+
+            fields[key] = field
+            if field.value is None:
+                op_fields.add(key)
+
+        if overlaps:
+            raise ValueError('Direct fields for encoding scheme {} have '
+                             'overlapping ranges (mask: {:#08x})'
+                             .format(name, overlaps))
+
+        return EncSchemeFields(fields, op_fields, mask)
+
+    def merge_in(self, right: 'EncSchemeFields', when: str) -> None:
+        for name, field in right.fields.items():
+            if name in self.fields:
+                raise ValueError('Duplicate field name: {!r} {}.'
+                                 .format(name, when))
+
+            overlap = self.mask & field.bits.mask
+            if overlap:
+                raise ValueError('Overlapping bit ranges '
+                                 '(masks: {:08x} and {:08x} have '
+                                 'intersection {:08x}) {}.'
+                                 .format(self.mask,
+                                         field.bits.mask, overlap, when))
+
+            self.fields[name] = field
+            self.mask |= field.bits.mask
+            if field.value is None:
+                assert name not in self.op_fields
+                self.op_fields.add(name)
+
+
+class EncScheme:
+    def __init__(self, yml: object, name: str) -> None:
+        what = 'encoding scheme {!r}'.format(name)
+        yd = check_keys(yml, what, [], ['parents', 'fields'])
+
+        if not yd:
+            raise ValueError('{} has no parents or fields.'.format(what))
+
+        fields_yml = yd.get('fields')
+        self.direct_fields = (EncSchemeFields.from_yaml(fields_yml, name)
+                              if fields_yml is not None
+                              else EncSchemeFields.empty())
+
+        parents_yml = yd.get('parents')
+        parents_what = 'parents of {}'.format(what)
+        parents = ([EncSchemeImport(y, name)
+                    for y in check_list(parents_yml, parents_what)]
+                   if parents_yml is not None
+                   else [])
+        self.parents = index_list(parents_what,
+                                  parents,
+                                  lambda imp: imp.parent)
+
+
+class EncSchemes:
+    def __init__(self, yml: object) -> None:
+        if not isinstance(yml, dict):
+            raise ValueError("value for encoding-schemes is expected to be "
+                             "a dict, but was actually a {}."
+                             .format(type(yml).__name__))
+
+        self.schemes = {}  # type: Dict[str, EncScheme]
+        self.resolved = {}  # type: Dict[str, EncSchemeFields]
+
+        for key, val in yml.items():
+            if not isinstance(key, str):
+                raise ValueError('{!r} is a bad key for an encoding scheme '
+                                 'name (should be str, not {}).'
+                                 .format(key, type(key).__name__))
+            self.schemes[key] = EncScheme(val, key)
+
+    def _resolve(self,
+                 name: str,
+                 user: str,
+                 stack: List[str]) -> EncSchemeFields:
+        # Have we resolved this before?
+        resolved = self.resolved.get(name)
+        if resolved is not None:
+            return resolved
+
+        # Spot any circular inheritance
+        if name in stack:
+            raise RuntimeError('Circular inheritance of encoding '
+                               'schemes: {}'
+                               .format(' -> '.join(stack + [name])))
+
+        # Does the scheme actually exist?
+        scheme = self.schemes.get(name)
+        if scheme is None:
+            raise ValueError('{} requires undefined encoding scheme {!r}.'
+                             .format(user, name))
+
+        # Recursively try to resolve each parent scheme, applying any import
+        # settings
+        resolved_parents = {}
+        new_stack = stack + [name]
+        what = 'Import list of encoding scheme {!r}'.format(name)
+        for pname, pimport in scheme.parents.items():
+            resolved = self._resolve(pimport.parent, what, new_stack)
+            resolved_parents[pname] = pimport.apply_settings(resolved, what)
+
+        # Now try to merge the resolved imports
+        merged = EncSchemeFields.empty()
+        parent_names_so_far = []  # type: List[str]
+        for pname, pfields in resolved_parents.items():
+            when = ('merging fields of scheme {} into '
+                    'already merged fields of {}'
+                    .format(pname, ', '.join(parent_names_so_far)))
+            merged.merge_in(pfields, when)
+            parent_names_so_far.append(repr(pname))
+
+        # Now try to merge in any direct fields
+        when = ('merging direct fields of scheme {} into fields from parents'
+                .format(name))
+        merged.merge_in(scheme.direct_fields, when)
+
+        return merged
+
+    def resolve(self, name: str, mnemonic: str) -> EncSchemeFields:
+        fields = self._resolve(name, 'Instruction {!r}'.format(mnemonic), [])
+
+        # Check completeness
+        missing = ((1 << 32) - 1) & ~fields.mask
+        if missing:
+            raise ValueError('Fields for encoding scheme {} miss some bits '
+                             '(mask: {:#08x})'
+                             .format(name, missing))
+
+        return fields
diff --git a/hw/ip/otbn/util/shared/insn_yaml.py b/hw/ip/otbn/util/shared/insn_yaml.py
index 22edb45..a397e6d 100644
--- a/hw/ip/otbn/util/shared/insn_yaml.py
+++ b/hw/ip/otbn/util/shared/insn_yaml.py
@@ -6,118 +6,16 @@
 
 import itertools
 import re
-from typing import (Callable, Dict, List, Optional,
-                    Sequence, Set, Tuple, TypeVar, Union, cast)
+from typing import Dict, List, Optional, Tuple, cast
 
 import yaml
 
-
-T = TypeVar('T')
-
-
-def check_keys(obj: object,
-               what: str,
-               required_keys: List[str],
-               optional_keys: List[str]) -> Dict[str, object]:
-    '''Check that obj is a dict object with the expected keys
-
-    If not, raise a ValueError; the what argument names the object.
-
-    '''
-    if not isinstance(obj, dict):
-        raise ValueError("{} is expected to be a dict, but was actually a {}."
-                         .format(what, type(obj).__name__))
-
-    allowed = set()
-    missing = []
-    for key in required_keys:
-        assert key not in allowed
-        allowed.add(key)
-        if key not in obj:
-            missing.append(key)
-
-    for key in optional_keys:
-        assert key not in allowed
-        allowed.add(key)
-
-    unexpected = []
-    for key in obj:
-        if key not in allowed:
-            unexpected.append(key)
-
-    if missing or unexpected:
-        mstr = ('The following required fields were missing: {}.'
-                .format(', '.join(missing)) if missing else '')
-        ustr = ('The following unexpected fields were found: {}.'
-                .format(', '.join(unexpected)) if unexpected else '')
-        raise ValueError("{} doesn't have the right keys. {}{}{}"
-                         .format(what,
-                                 mstr,
-                                 ' ' if mstr and ustr else '',
-                                 ustr))
-
-    return obj
-
-
-def check_str(obj: object, what: str) -> str:
-    '''Check that the given object is a string
-
-    If not, raise a ValueError; the what argument names the object.
-
-    '''
-    if not isinstance(obj, str):
-        raise ValueError('{} is of type {}, not a string.'
-                         .format(what, type(obj).__name__))
-    return obj
-
-
-def check_optional_str(obj: object, what: str) -> Optional[str]:
-    '''Check that the given object is a string or None
-
-    If not, raise a ValueError; the what argument names the object.
-
-    '''
-    if obj is not None and not isinstance(obj, str):
-        raise ValueError('{} is of type {}, not a string.'
-                         .format(what, type(obj).__name__))
-    return obj
-
-
-def check_bool(obj: object, what: str) -> bool:
-    '''Check that the given object is a bool
-
-    If not, raise a ValueError; the what argument names the object.
-
-    '''
-    if obj is not True and obj is not False:
-        raise ValueError('{} is of type {}, not a string.'
-                         .format(what, type(obj).__name__))
-    return obj
-
-
-def check_list(obj: object, what: str) -> List[object]:
-    '''Check that the given object is a list
-
-    If not, raise a ValueError; the what argument names the object.
-
-    '''
-    if not isinstance(obj, list):
-        raise ValueError('{} is of type {}, not a list.'
-                         .format(what, type(obj).__name__))
-    return obj
-
-
-def index_list(what: str,
-               objs: Sequence[T],
-               get_key: Callable[[T], str]) -> Dict[str, T]:
-    ret = {}
-    for obj in objs:
-        key = get_key(obj)
-        if key in ret:
-            raise ValueError('Duplicate object with key {} in {}.'
-                             .format(key, what))
-        ret[key] = obj
-    return ret
+from .encoding import Encoding
+from .encoding_scheme import EncSchemes
+from .operand import Operand
+from .syntax import InsnSyntax
+from .yaml_parse_helpers import (check_keys, check_str, check_bool,
+                                 check_list, index_list, get_optional_str)
 
 
 class InsnGroup:
@@ -143,1333 +41,6 @@
         return self.groups[0].key
 
 
-class BitRanges:
-    '''Represents the bit ranges used for a field in an encoding scheme'''
-    def __init__(self,
-                 mask: int,
-                 ranges: List[Tuple[int, int]],
-                 width: int) -> None:
-        self.mask = mask
-        self.ranges = ranges
-        self.width = width
-
-    @staticmethod
-    def from_list(ranges: List[Tuple[int, int]]) -> 'BitRanges':
-        mask = 0
-        width = 0
-        for msb, lsb in ranges:
-            assert 0 <= lsb <= msb <= 31
-            rng_mask = (1 << (msb + 1)) - (1 << lsb)
-            assert not (rng_mask & mask)
-            mask |= rng_mask
-            width += msb - lsb + 1
-
-        return BitRanges(mask, ranges, width)
-
-    @staticmethod
-    def from_yaml(as_string: str, what: str) -> 'BitRanges':
-        #   ranges ::= range
-        #            | range ',' ranges
-        #
-        #   range ::= num
-        #           | num ':' num
-        #
-        # Ranges are assumed to be msb:lsb (with msb >= lsb). Bit indices are
-        # at most 31 and ranges are disjoint.
-
-        if not as_string:
-            raise ValueError('Empty string as bits for {}'.format(what))
-
-        overlaps = 0
-
-        mask = 0
-        ranges = []
-        width = 0
-
-        for rng in as_string.split(','):
-            match = re.match(r'([0-9]+)(?:-([0-9]+))?$', rng)
-            if match is None:
-                raise ValueError('Range {!r} in bits for {} is malformed.'
-                                 .format(rng, what))
-
-            msb = int(match.group(1))
-            maybe_lsb = match.group(2)
-            lsb = msb if maybe_lsb is None else int(maybe_lsb)
-
-            if msb < lsb:
-                raise ValueError('Range {!r} in bits for {} has msb < lsb.'
-                                 .format(rng, what))
-
-            if msb >= 32:
-                raise ValueError('Range {!r} in bits for {} has msb >= 32.'
-                                 .format(rng, what))
-
-            rng_mask = (1 << (msb + 1)) - (1 << lsb)
-            overlaps |= rng_mask & mask
-            mask |= rng_mask
-
-            ranges.append((msb, lsb))
-            width += msb - lsb + 1
-
-        if overlaps:
-            raise ValueError('Bits for {} have overlapping ranges '
-                             '(mask: {:#08x})'
-                             .format(what, overlaps))
-
-        return BitRanges(mask, ranges, width)
-
-    def __eq__(self, other: object) -> bool:
-        return isinstance(other, BitRanges) and self.ranges == other.ranges
-
-    def encode(self, value: int) -> int:
-        '''Encode the given value as bit fields'''
-        ret = 0
-        bits_taken = 0
-        for msb, lsb in self.ranges:
-            rng_width = msb - lsb + 1
-            value_msb = self.width - 1 - bits_taken
-            value_lsb = value_msb - rng_width + 1
-
-            rng_mask = (1 << rng_width) - 1
-            rng_value = (value >> value_lsb) & rng_mask
-            ret |= rng_value << lsb
-            bits_taken += rng_width
-
-        assert bits_taken == self.width
-        return ret
-
-    def decode(self, raw: int) -> int:
-        '''Extract the bit fields from the given value'''
-        ret = 0
-        for msb, lsb in self.ranges:
-            width = msb - lsb + 1
-            mask = (1 << width) - 1
-
-            ret <<= width
-            ret |= (raw >> lsb) & mask
-        return ret
-
-
-class BoolLiteral:
-    '''Represents a boolean literal, with possible 'x characters
-
-    We represent this as 2 masks: "ones" and "xs". The ones mask is the bits
-    that are marked 1. The xs mask is the bits that are marked x. Then you can
-    test whether a particular value matches the literal by zeroing all bits in
-    the x mask and then comparing with the ones mask.
-
-    '''
-    def __init__(self, ones: int, xs: int, width: int) -> None:
-        assert width > 0
-        assert (ones >> width) == 0
-        assert (xs >> width) == 0
-
-        self.ones = ones
-        self.xs = xs
-        self.width = width
-
-    @staticmethod
-    def from_string(as_string: str, what: str) -> 'BoolLiteral':
-        ones = 0
-        xs = 0
-        width = 0
-
-        # The literal should always start with a 'b'
-        if not as_string.startswith('b'):
-            raise ValueError("Boolean literal for {} doesn't start with a 'b'."
-                             .format(what))
-
-        for char in as_string[1:]:
-            if char == '_':
-                continue
-
-            ones <<= 1
-            xs <<= 1
-            width += 1
-
-            if char == '0':
-                continue
-            elif char == '1':
-                ones |= 1
-            elif char == 'x':
-                xs |= 1
-            else:
-                raise ValueError('Boolean literal for {} has '
-                                 'unsupported character: {!r}.'
-                                 .format(what, char))
-
-        if not width:
-            raise ValueError('Empty boolean literal for {}.'.format(what))
-
-        return BoolLiteral(ones, xs, width)
-
-    def char_for_bit(self, bit: int) -> str:
-        '''Return 0, 1 or x for the bit at the given position'''
-        assert bit < self.width
-        if (self.ones >> bit) & 1:
-            return '1'
-        if (self.xs >> bit) & 1:
-            return 'x'
-        return '0'
-
-
-class EncSchemeField:
-    '''Represents a single field in an encoding scheme'''
-    def __init__(self,
-                 bits: BitRanges,
-                 value: Optional[BoolLiteral],
-                 shift: int) -> None:
-        self.bits = bits
-        self.value = value
-        self.shift = shift
-
-    @staticmethod
-    def from_yaml(yml: object, what: str) -> 'EncSchemeField':
-        # This is either represented as a dict in the YAML or as a bare string.
-        bits_what = 'bits for {}'.format(what)
-        value_what = 'value for {}'.format(what)
-        shift_what = 'shift for {}'.format(what)
-
-        shift = 0
-
-        if isinstance(yml, dict):
-            yd = check_keys(yml, what, ['bits'], ['value', 'shift'])
-
-            bits_yml = yd['bits']
-            if not (isinstance(bits_yml, str) or isinstance(bits_yml, int)):
-                raise ValueError('{} is of type {}, not a string or int.'
-                                 .format(bits_what, type(bits_yml).__name__))
-
-            # We require value to be given as a string because it's supposed to
-            # be in base 2, and PyYAML will parse 111 as one-hundred and
-            # eleven, 011 as 9 and 0x11 as 17. Aargh!
-            raw_value = None
-            val_yml = yd.get('value')
-            if val_yml is not None:
-                if not isinstance(val_yml, str):
-                    raise ValueError("{} is of type {}, but must be a string "
-                                     "(we don't allow automatic conversion "
-                                     "because YAML's int conversion assumes "
-                                     "base 10 and value should be in base 2)."
-                                     .format(value_what,
-                                             type(val_yml).__name__))
-                raw_value = val_yml
-
-            # shift, on the other hand, is written in base 10. Allow an
-            # integer.
-            shift_yml = yd.get('shift')
-            if shift_yml is None:
-                pass
-            elif isinstance(shift_yml, str):
-                if not re.match(r'[0-9]+$', shift_yml):
-                    raise ValueError('{} is {!r} but should be a '
-                                     'non-negative integer.'
-                                     .format(shift_what, shift_yml))
-                shift = int(shift_yml)
-            elif isinstance(shift_yml, int):
-                if shift_yml < 0:
-                    raise ValueError('{} is {!r} but should be a '
-                                     'non-negative integer.'
-                                     .format(shift_what, shift_yml))
-                shift = shift_yml
-            else:
-                raise ValueError("{} is of type {}, but must be a string "
-                                 "or non-negative integer."
-                                 .format(shift_what, type(shift_yml).__name__))
-        elif isinstance(yml, str) or isinstance(yml, int):
-            bits_yml = yml
-            raw_value = None
-        else:
-            raise ValueError('{} is a {}, but should be a '
-                             'dict, string or integer.'
-                             .format(what, type(yml).__name__))
-
-        # The bits field is usually parsed as a string ("10-4", or similar).
-        # But if it's a bare integer then YAML will parse it as an int. That's
-        # fine, but we turn it back into a string to be re-parsed by BitRanges.
-        assert isinstance(bits_yml, str) or isinstance(bits_yml, int)
-
-        bits = BitRanges.from_yaml(str(bits_yml), bits_what)
-        value = None
-        if raw_value is not None:
-            value = BoolLiteral.from_string(raw_value, value_what)
-            if bits.width != value.width:
-                raise ValueError('{} has bits that imply a width of {}, but '
-                                 'a value with width {}.'
-                                 .format(what, bits.width, value.width))
-
-        return EncSchemeField(bits, value, shift)
-
-
-class EncSchemeImport:
-    '''An object representing inheritance of a parent scheme
-
-    When importing a parent scheme, we can set some of its fields with
-    immediate values. These are stored in the settings field.
-
-    '''
-    def __init__(self, yml: object, importer_name: str) -> None:
-        as_str = check_str(yml,
-                           'value for import in encoding scheme {!r}'
-                           .format(importer_name))
-
-        # The supported syntax is
-        #
-        #    - parent0(field0=b111, field1=b10)
-        #    - parent1()
-        #    - parent2
-
-        match = re.match(r'([^ (]+)[ ]*(?:\(([^)]+)\))?$', as_str)
-        if not match:
-            raise ValueError('Malformed encoding scheme '
-                             'inheritance by scheme {!r}: {!r}.'
-                             .format(importer_name, as_str))
-
-        self.parent = match.group(1)
-        self.settings = {}  # type: Dict[str, BoolLiteral]
-
-        when = ('When inheriting from {!r} in encoding scheme {!r}'
-                .format(self.parent, importer_name))
-
-        if match.group(2) is not None:
-            args = match.group(2).split(',')
-            for arg in args:
-                arg = arg.strip()
-                arg_parts = arg.split('=')
-                if len(arg_parts) != 2:
-                    raise ValueError('{}, found an argument with {} '
-                                     'equals signs (should have exactly one).'
-                                     .format(when, len(arg_parts) - 1))
-
-                field_name = arg_parts[0]
-                field_what = ('literal value for field {!r} when inheriting '
-                              'from {!r} in encoding scheme {!r}'
-                              .format(arg_parts[0], self.parent, importer_name))
-                field_value = BoolLiteral.from_string(arg_parts[1], field_what)
-
-                if field_name in self.settings:
-                    raise ValueError('{}, found multiple arguments assigning '
-                                     'values to the field {!r}.'
-                                     .format(when, field_name))
-
-                self.settings[field_name] = field_value
-
-    def apply_settings(self,
-                       esf: 'EncSchemeFields', what: str) -> 'EncSchemeFields':
-        # Copy and set values in anything that has a setting
-        fields = {}
-        for name, literal in self.settings.items():
-            old_field = esf.fields.get(name)
-            if old_field is None:
-                raise ValueError('{} sets unknown field {!r} from {!r}.'
-                                 .format(what, name, self.parent))
-
-            if old_field.bits.width != literal.width:
-                raise ValueError('{} sets field {!r} from {!r} with a literal '
-                                 'of width {}, but the field has width {}.'
-                                 .format(what, name, self.parent,
-                                         literal.width, old_field.bits.width))
-
-            fields[name] = EncSchemeField(old_field.bits,
-                                          literal,
-                                          old_field.shift)
-
-        # Copy anything else
-        op_fields = set()
-        for name, old_field in esf.fields.items():
-            if name in fields:
-                continue
-            op_fields.add(name)
-            fields[name] = old_field
-
-        return EncSchemeFields(fields, op_fields, esf.mask)
-
-
-class EncSchemeFields:
-    '''An object representing some fields in an encoding scheme'''
-    def __init__(self,
-                 fields: Dict[str, EncSchemeField],
-                 op_fields: Set[str],
-                 mask: int) -> None:
-        self.fields = fields
-        self.op_fields = op_fields
-        self.mask = mask
-
-    @staticmethod
-    def empty() -> 'EncSchemeFields':
-        return EncSchemeFields({}, set(), 0)
-
-    @staticmethod
-    def from_yaml(yml: object, name: str) -> 'EncSchemeFields':
-        if not isinstance(yml, dict):
-            raise ValueError('fields for encoding scheme {!r} should be a '
-                             'dict, but we saw a {}.'
-                             .format(name, type(yml).__name__))
-
-        fields = {}
-        op_fields = set()  # type: Set[str]
-        mask = 0
-
-        overlaps = 0
-
-        for key, val in yml.items():
-            if not isinstance(key, str):
-                raise ValueError('{!r} is a bad key for a field name of '
-                                 'encoding scheme {} (should be str, not {}).'
-                                 .format(key, name, type(key).__name__))
-
-            fld_what = 'field {!r} of encoding scheme {}'.format(key, name)
-            field = EncSchemeField.from_yaml(val, fld_what)
-
-            overlaps |= mask & field.bits.mask
-            mask |= field.bits.mask
-
-            fields[key] = field
-            if field.value is None:
-                op_fields.add(key)
-
-        if overlaps:
-            raise ValueError('Direct fields for encoding scheme {} have '
-                             'overlapping ranges (mask: {:#08x})'
-                             .format(name, overlaps))
-
-        return EncSchemeFields(fields, op_fields, mask)
-
-    def merge_in(self, right: 'EncSchemeFields', when: str) -> None:
-        for name, field in right.fields.items():
-            if name in self.fields:
-                raise ValueError('Duplicate field name: {!r} {}.'
-                                 .format(name, when))
-
-            overlap = self.mask & field.bits.mask
-            if overlap:
-                raise ValueError('Overlapping bit ranges '
-                                 '(masks: {:08x} and {:08x} have '
-                                 'intersection {:08x}) {}.'
-                                 .format(self.mask,
-                                         field.bits.mask, overlap, when))
-
-            self.fields[name] = field
-            self.mask |= field.bits.mask
-            if field.value is None:
-                assert name not in self.op_fields
-                self.op_fields.add(name)
-
-
-class EncScheme:
-    def __init__(self, yml: object, name: str) -> None:
-        what = 'encoding scheme {!r}'.format(name)
-        yd = check_keys(yml, what, [], ['parents', 'fields'])
-
-        if not yd:
-            raise ValueError('{} has no parents or fields.'.format(what))
-
-        fields_yml = yd.get('fields')
-        self.direct_fields = (EncSchemeFields.from_yaml(fields_yml, name)
-                              if fields_yml is not None
-                              else EncSchemeFields.empty())
-
-        parents_yml = yd.get('parents')
-        parents_what = 'parents of {}'.format(what)
-        parents = ([EncSchemeImport(y, name)
-                    for y in check_list(parents_yml, parents_what)]
-                   if parents_yml is not None
-                   else [])
-        self.parents = index_list(parents_what,
-                                  parents,
-                                  lambda imp: imp.parent)
-
-
-class EncSchemes:
-    def __init__(self, yml: object) -> None:
-        if not isinstance(yml, dict):
-            raise ValueError("value for encoding-schemes is expected to be "
-                             "a dict, but was actually a {}."
-                             .format(type(yml).__name__))
-
-        self.schemes = {}  # type: Dict[str, EncScheme]
-        self.resolved = {}  # type: Dict[str, EncSchemeFields]
-
-        for key, val in yml.items():
-            if not isinstance(key, str):
-                raise ValueError('{!r} is a bad key for an encoding scheme '
-                                 'name (should be str, not {}).'
-                                 .format(key, type(key).__name__))
-            self.schemes[key] = EncScheme(val, key)
-
-    def _resolve(self,
-                 name: str,
-                 user: str,
-                 stack: List[str]) -> EncSchemeFields:
-        # Have we resolved this before?
-        resolved = self.resolved.get(name)
-        if resolved is not None:
-            return resolved
-
-        # Spot any circular inheritance
-        if name in stack:
-            raise RuntimeError('Circular inheritance of encoding '
-                               'schemes: {}'
-                               .format(' -> '.join(stack + [name])))
-
-        # Does the scheme actually exist?
-        scheme = self.schemes.get(name)
-        if scheme is None:
-            raise ValueError('{} requires undefined encoding scheme {!r}.'
-                             .format(user, name))
-
-        # Recursively try to resolve each parent scheme, applying any import
-        # settings
-        resolved_parents = {}
-        new_stack = stack + [name]
-        what = 'Import list of encoding scheme {!r}'.format(name)
-        for pname, pimport in scheme.parents.items():
-            resolved = self._resolve(pimport.parent, what, new_stack)
-            resolved_parents[pname] = pimport.apply_settings(resolved, what)
-
-        # Now try to merge the resolved imports
-        merged = EncSchemeFields.empty()
-        parent_names_so_far = []  # type: List[str]
-        for pname, pfields in resolved_parents.items():
-            when = ('merging fields of scheme {} into '
-                    'already merged fields of {}'
-                    .format(pname, ', '.join(parent_names_so_far)))
-            merged.merge_in(pfields, when)
-            parent_names_so_far.append(repr(pname))
-
-        # Now try to merge in any direct fields
-        when = ('merging direct fields of scheme {} into fields from parents'
-                .format(name))
-        merged.merge_in(scheme.direct_fields, when)
-
-        return merged
-
-    def resolve(self, name: str, mnemonic: str) -> EncSchemeFields:
-        fields = self._resolve(name, 'Instruction {!r}'.format(mnemonic), [])
-
-        # Check completeness
-        missing = ((1 << 32) - 1) & ~fields.mask
-        if missing:
-            raise ValueError('Fields for encoding scheme {} miss some bits '
-                             '(mask: {:#08x})'
-                             .format(name, missing))
-
-        return fields
-
-
-class OperandType:
-    '''The base class for some sort of operand type'''
-    def __init__(self, width: Optional[int]) -> None:
-        assert width is None or width > 0
-        self.width = width
-
-    def markdown_doc(self) -> Optional[str]:
-        '''Generate any (markdown) documentation for this operand type
-
-        The base class returns None, but subclasses might return something
-        useful.
-
-        '''
-        return None
-
-    def syntax_determines_value(self) -> bool:
-        '''Can the value of this operand always be inferred from asm syntax?
-
-        This is true for things like registers (the value "5" only comes from
-        "r5", for example), but false for arbitrary immediates: an immediate
-        operand might have a value that comes from a relocation.
-
-        '''
-        return False
-
-    def read_index(self, as_str: str) -> Optional[int]:
-        '''Try to read the given syntax as an actual integer index
-
-        Raises a ValueError on definite failure ("found cabbage when I expected
-        a register name"). Returns None on a soft failure: "this is a
-        complicated looking expression, but it might be a sensible immediate".
-
-        '''
-        return None
-
-    def render_val(self, value: int) -> str:
-        '''Render the given value as a string.
-
-        The default implementation prints it as a decimal number. Register
-        operands, for example, will want to print 3 as "x3" and so on.
-
-        '''
-        return str(value)
-
-
-class RegOperandType(OperandType):
-    '''A class representing a register operand type'''
-    TYPE_FMTS = {
-        'gpr': (5, 'x'),
-        'wdr': (5, 'w'),
-        'csr': (12, None),
-        'wsr': (8, None)
-    }
-
-    def __init__(self, reg_type: str, is_dest: bool):
-        fmt = RegOperandType.TYPE_FMTS.get(reg_type)
-        assert fmt is not None
-        width, _ = fmt
-        super().__init__(width)
-
-        self.reg_type = reg_type
-        self.is_dest = is_dest
-
-    def syntax_determines_value(self) -> bool:
-        return True
-
-    def read_index(self, as_str: str) -> int:
-        width, pfx = RegOperandType.TYPE_FMTS[self.reg_type]
-
-        re_pfx = '' if pfx is None else re.escape(pfx)
-        match = re.match(re_pfx + '([0-9]+)$', as_str)
-        if match is None:
-            raise ValueError("Expression {!r} can't be parsed as a {}."
-                             .format(as_str, self.reg_type))
-
-        idx = int(match.group(1))
-        assert 0 <= idx
-        if idx >> width:
-            raise ValueError("Invalid register of type {}: {!r}."
-                             .format(self.reg_type, as_str))
-
-        return idx
-
-    def render_val(self, value: int) -> str:
-        fmt = RegOperandType.TYPE_FMTS.get(self.reg_type)
-        assert fmt is not None
-        _, pfx = fmt
-
-        if pfx is None:
-            return super().render_val(value)
-
-        return '{}{}'.format(pfx, value)
-
-
-class ImmOperandType(OperandType):
-    '''A class representing an immediate operand type'''
-    def markdown_doc(self) -> Optional[str]:
-        # Override from OperandType base class
-        if self.width is None:
-            return None
-
-        return 'Valid range: `0..{}`'.format((1 << self.width) - 1)
-
-    def read_index(self, as_str: str) -> Optional[int]:
-        # We only support simple integer literals.
-        try:
-            return int(as_str)
-        except ValueError:
-            return None
-
-
-class EnumOperandType(ImmOperandType):
-    '''A class representing an enum operand type'''
-    def __init__(self, items: List[str]):
-        assert items
-        super().__init__(int.bit_length(len(items) - 1))
-        self.items = items
-
-    def markdown_doc(self) -> Optional[str]:
-        # Override from OperandType base class
-        parts = ['Syntax table:\n\n'
-                 '| Syntax | Value of immediate |\n'
-                 '|--------|--------------------|\n']
-        for idx, item in enumerate(self.items):
-            parts.append('| `{}` | `{}` |\n'
-                         .format(item, idx))
-        return ''.join(parts)
-
-    def syntax_determines_value(self) -> bool:
-        return True
-
-    def read_index(self, as_str: str) -> Optional[int]:
-        for idx, item in enumerate(self.items):
-            if as_str == item:
-                return idx
-
-        known_vals = ', '.join(repr(item) for item in self.items)
-        raise ValueError('Invalid enum value, {!r}. '
-                         'Supported values: {}.'
-                         .format(as_str, known_vals))
-
-    def render_val(self, value: int) -> str:
-        # On a bad value, we have to return *something*. Since this is just
-        # going into disassembly, let's be vaguely helpful and return something
-        # that looks clearly bogus.
-        #
-        # Note that if the number of items in the enum is not a power of 2,
-        # this could happen with a bad binary, despite good tools.
-        if value < 0 or value >= len(self.items):
-            return '???'
-
-        return self.items[value]
-
-
-class OptionOperandType(ImmOperandType):
-    '''A class representing an option operand type'''
-    def __init__(self, option: str):
-        super().__init__(1)
-        self.option = option
-
-    def markdown_doc(self) -> Optional[str]:
-        # Override from OperandType base class
-        return 'To specify, use the literal syntax `{}`\n'.format(self.option)
-
-    def syntax_determines_value(self) -> bool:
-        return True
-
-    def read_index(self, as_str: str) -> Optional[int]:
-        if as_str == self.option:
-            return 1
-
-        raise ValueError('Invalid option value, {!r}. '
-                         'If specified, it should have been {!r}.'
-                         .format(as_str, self.option))
-
-    def render_val(self, value: int) -> str:
-        # Option types are always 1 bit wide, so the value should be 0 or 1.
-        assert value in [0, 1]
-        return self.option if value else ''
-
-
-def parse_operand_type(fmt: str) -> OperandType:
-    '''Make sense of the operand type syntax'''
-    # Registers
-    if fmt == 'grs':
-        return RegOperandType('gpr', False)
-    if fmt == 'grd':
-        return RegOperandType('gpr', True)
-    if fmt == 'wrs':
-        return RegOperandType('wdr', False)
-    if fmt == 'wrd':
-        return RegOperandType('wdr', True)
-    if fmt == 'csr':
-        return RegOperandType('csr', True)
-    if fmt == 'wsr':
-        return RegOperandType('wsr', True)
-
-    # Immediates
-    if fmt == 'imm':
-        return ImmOperandType(None)
-    m = re.match(r'imm([1-9][0-9]*)$', fmt)
-    if m:
-        return ImmOperandType(int(m.group(1)))
-    m = re.match(r'enum\(([^\)]+)\)$', fmt)
-    if m:
-        return EnumOperandType([item.strip()
-                                for item in m.group(1).split(',')])
-    m = re.match(r'option\(([^\)]+)\)$', fmt)
-    if m:
-        return OptionOperandType(m.group(1).strip())
-
-    raise ValueError("Operand type description {!r} "
-                     "didn't match any recognised format."
-                     .format(fmt))
-
-
-def infer_operand_type(name: str) -> OperandType:
-    '''Try to guess an operand's type from its name'''
-
-    if re.match(r'grs[0-9]*$', name):
-        return parse_operand_type('grs')
-    if name in ['grd', 'wrd', 'csr', 'wsr']:
-        return parse_operand_type(name)
-    if re.match(r'wrs[0-9]*$', name):
-        return parse_operand_type('wrs')
-    if re.match(r'imm[0-9]*$', name):
-        return parse_operand_type('imm')
-    if name == 'offset':
-        return parse_operand_type('imm')
-
-    raise ValueError("Operand name {!r} doesn't imply an operand type: "
-                     "you'll have to set the type explicitly."
-                     .format(name))
-
-
-def make_operand_type(yml: object, operand_name: str) -> OperandType:
-    '''Construct a type for an operand
-
-    This is either based on the type, if given, or inferred from the name
-    otherwise.
-
-    '''
-    return (parse_operand_type(check_str(yml,
-                                         'type for {} operand'
-                                         .format(operand_name)))
-            if yml is not None
-            else infer_operand_type(operand_name))
-
-
-def get_optional_str(data: Dict[str, object],
-                     key: str, what: str) -> Optional[str]:
-    return check_optional_str(data.get(key), '{} field for {}'.format(key, what))
-
-
-class Operand:
-    def __init__(self, yml: object, insn_name: str) -> None:
-        # The YAML representation should be a string (a bare operand name) or a
-        # dict.
-        what = 'operand for {!r} instruction'.format(insn_name)
-        if isinstance(yml, str):
-            name = yml
-            op_type = None
-            doc = None
-        elif isinstance(yml, dict):
-            yd = check_keys(yml, what, ['name'], ['type', 'doc'])
-            name = check_str(yd['name'], 'name of ' + what)
-
-            op_what = '{!r} {}'.format(name, what)
-            op_type = get_optional_str(yd, 'type', op_what)
-            doc = get_optional_str(yd, 'doc', op_what)
-
-        op_what = '{!r} {}'.format(name, what)
-        self.name = name
-        self.op_type = make_operand_type(op_type, name)
-        self.doc = doc
-
-
-class SyntaxToken:
-    '''An object representing a single token in an instruction's syntax
-
-    See InsnSyntax for more details. The is_literal attribute is true if this
-    is a literal hunk of text (rather than an operand name). The text attribute
-    either holds the literal syntax or the operand name.
-
-    '''
-    def __init__(self, is_literal: bool, text: str) -> None:
-        assert text
-        self.is_literal = is_literal
-        # Make whitespace canonical for literals
-        self.text = re.sub(r'\s+', ' ', text) if is_literal else text
-
-    def render_doc(self) -> str:
-        '''Return how this syntax token should look in the documentation'''
-        if self.is_literal:
-            return self.text
-        else:
-            return '<{}>'.format(self.text)
-
-    def asm_pattern(self) -> str:
-        '''Return a regex pattern that can be used for matching this token
-
-        If the token represents an operand, the pattern is wrapped in a group
-        (to capture the operand). For more details about the syntax, see
-        InsnSyntax.
-
-        '''
-        if self.is_literal:
-            # A literal that is pure whitespace "requires the whitespace".
-            # Otherwise, replace all internal whitespace with \s+ and allow
-            # optional whitespace afterwards. To do this easily, we split the
-            # literal on whitespace. The result is empty iff it was just
-            # whitespace in the first place.
-            words = self.text.split()
-            if not words:
-                return r'\s+'
-
-            # For non-whitespace literals, we disallow leading space and add
-            # optional trailing space. This convention should avoid lots of
-            # \s*\s* pairs.
-            parts = [re.escape(words[0])]
-            for w in words[1:]:
-                parts.append(r'\s+')
-                parts.append(re.escape(w))
-            parts.append(r'\s*')
-
-            return ''.join(parts)
-
-        # Otherwise, this is an operand. For now, at least, we're very
-        # restrictive for operands. No spaces and no commas (the second rule
-        # avoids silliness like "a, b, c" matching a syntax with only two
-        # operands by setting the second to "b, c").
-        #
-        # We also split out ++ and -- separately, to disambiguate things like
-        # x1++, which must be parsed as x1 followed by ++.
-        #
-        # If we want to do better and allow things like
-        #
-        #    addi x0, x1, 1 + 3
-        #
-        # then we need to use something more serious than just regexes for
-        # parsing.
-        return r'(-?[^ ,+\-]+|[+\-]+)\s*'
-
-    def render_vals(self,
-                    op_vals: Dict[str, int],
-                    operands: Dict[str, Operand]) -> str:
-        '''Return an assembly listing for the given operand fields
-
-        '''
-        if self.is_literal:
-            return self.text
-
-        assert self.text in op_vals
-        assert self.text in operands
-
-        return operands[self.text].op_type.render_val(op_vals[self.text])
-
-
-class SyntaxHunk:
-    '''An object representing a hunk of syntax that might be optional'''
-    def __init__(self,
-                 is_optional: bool,
-                 tokens: List[SyntaxToken],
-                 op_list: List[str],
-                 op_set: Set[str]) -> None:
-        assert tokens
-        self.is_optional = is_optional
-        self.tokens = tokens
-        self.op_list = op_list
-        self.op_set = op_set
-
-    @staticmethod
-    def from_list(operands: List[str]) -> 'SyntaxHunk':
-        '''Smart constructor for a list of operands with "normal" syntax'''
-        assert operands
-        comma = SyntaxToken(True, ', ')
-        tokens = [SyntaxToken(False, operands[0])]
-        for op in operands[1:]:
-            tokens.append(comma)
-            tokens.append(SyntaxToken(False, op))
-
-        op_set = set(operands)
-        assert len(op_set) == len(operands)
-
-        return SyntaxHunk(False, tokens, operands, op_set)
-
-    @staticmethod
-    def from_string(mnemonic: str, optional: bool, raw: str) -> 'SyntaxHunk':
-        '''Smart constructor that parses YAML syntax (see InsnSyntax)'''
-        assert raw
-
-        tokens = []
-        op_list = []
-        op_set = set()
-
-        parts = re.split(r'<([^>]+)>', raw)
-        for idx, part in enumerate(parts):
-            # The matches for the regex appear in positions 1, 3, 5, ...
-            is_literal = not (idx & 1)
-            if ('<' in part or '>' in part) and not is_literal:
-                raise ValueError("Syntax for {!r} has hunk {!r} which doesn't "
-                                 "seem to surround <operand>s properly."
-                                 .format(mnemonic, raw))
-
-            if not is_literal:
-                assert part
-                if part in op_set:
-                    raise ValueError("Syntax for {!r} has hunk {!r} with "
-                                     "more than one occurrence of <{}>."
-                                     .format(mnemonic, raw, part))
-                op_list.append(part)
-                op_set.add(part)
-
-            # Only allow empty parts (and skip their tokens) if at one end or
-            # the other
-            if not part and idx not in [0, len(parts) - 1]:
-                raise ValueError("Syntax for {!r} has two adjacent operand "
-                                 "tokens, with no intervening syntax."
-                                 .format(mnemonic))
-
-            if part:
-                tokens.append(SyntaxToken(is_literal, part))
-
-        return SyntaxHunk(optional, tokens, op_list, op_set)
-
-    def render_doc(self) -> str:
-        '''Return how this hunk should look in the documentation'''
-        parts = []
-        for token in self.tokens:
-            parts.append(token.render_doc())
-
-        body = ''.join(parts)
-        return '[{}]'.format(body) if self.is_optional else body
-
-    def asm_pattern(self) -> str:
-        '''Return a regex pattern that can be used for matching this hunk
-
-        The result will have a group per operand. It allows trailing, but not
-        leading, space within the hunk.
-
-        '''
-        parts = []
-        for token in self.tokens:
-            parts.append(token.asm_pattern())
-        body = ''.join(parts)
-
-        # For an optional hunk, we build it up in the form "(?:foo)?". This
-        # puts a non-capturing group around foo and then applies "?"
-        # (one-or-more) to it.
-        return '(?:{})?'.format(body) if self.is_optional else body
-
-    def render_vals(self,
-                    op_vals: Dict[str, int],
-                    operands: Dict[str, Operand]) -> str:
-        '''Return an assembly listing for the hunk given operand values
-
-        If this hunk is optional and all its operands are zero, the hunk is
-        omitted (so this function returns the empty string).
-
-        '''
-        if self.is_optional:
-            required = False
-            for op_name in self.op_list:
-                if op_vals[op_name] != 0:
-                    required = True
-                    break
-
-            if not required:
-                return ''
-
-        return ''.join(token.render_vals(op_vals, operands)
-                       for token in self.tokens)
-
-
-class InsnSyntax:
-    '''A class representing the syntax of an instruction
-
-    An instruction's syntax is specified in the YAML file by writing it out
-    with operand names surrounded by angle brackets. For example, a simple NOT
-    instruction might have a syntax of
-
-        <dst>, <src>
-
-    which should be interpreted as the following tokens:
-
-        - Operand called 'dst'
-        - A literal ','
-        - Operand called 'src'
-
-    Between the tokens, whitespace is optional (so "x0 , x1" and "x0,x1" both
-    match the syntax above) unless a literal token is just a space, in which
-    case some whitespace is required. For example
-
-        <dst> <src>
-
-    would match "x0 x1" but not "x0x1". Whitespace within literal syntax tokens
-    means that some space is required, matching the regex \\s+. For example,
-    the (rather strange) syntax
-
-       <dst> + - <src>
-
-    would match "x0 + - x1" or "x0+ -x1", but not "x0 +- x1".
-
-    Some operands (and surrounding syntax) might be optional. The optional
-    syntax is surrounded by square brackets. Nesting is not supported. For
-    example:
-
-       <dst>, <src>[, <offset>]
-
-    would match "x0, x1, 123" or "x0, x1".
-
-    Note that a given syntax might be ambiguous. For example,
-
-       <dst>, <src>[, <offset>][, <flavour>]
-
-    With "x0, x1, 123", is 123 an offset or a flavour? (We choose not to embed
-    typing information into the syntax, because that results in very confusing
-    assembler error messages). We break ties in the same way as the underlying
-    regex engine, assigning the operand to the first group, so 123 is an offset
-    in this case. Such syntaxes are rather confusing though, so probably not a
-    good idea.
-
-    The parsed syntax is stored as a list of "hunks". Each hunk contains a flag
-    showing whether the hunk is optional or required and also a list of
-    SyntaxToken objects.
-
-    '''
-    def __init__(self,
-                 hunks: List[SyntaxHunk],
-                 op_list: List[str],
-                 op_set: Set[str]) -> None:
-        self.hunks = hunks
-        self.op_list = op_list
-        self.op_set = op_set
-
-    @staticmethod
-    def from_list(operands: List[str]) -> 'InsnSyntax':
-        '''Smart constructor for a list of operands with "normal" syntax'''
-        if not operands:
-            return InsnSyntax([], [], set())
-
-        hunk = SyntaxHunk.from_list(operands)
-        return InsnSyntax([hunk], hunk.op_list, hunk.op_set)
-
-    @staticmethod
-    def from_yaml(mnemonic: str, raw: str) -> 'InsnSyntax':
-        '''Parse the syntax in the YAML file'''
-
-        # The raw syntax looks something like
-        #
-        #    <op0>, <op1>[(<op2>)]
-        #
-        # to mean that you either have "x0, x1" or "x0, x2(x3)". First, split
-        # out the bracketed parts.
-        by_left = raw.split('[')
-        parts = [(False, by_left[0])]
-        for after_left in by_left[1:]:
-            split = after_left.split(']', 1)
-            if len(split) != 2:
-                raise ValueError('Unbalanced or nested [] in instruction '
-                                 'syntax for {!r}.'
-                                 .format(mnemonic))
-
-            parts += [(True, split[0]), (False, split[1])]
-
-        # Now parts contains a list of pairs (required, txt) where txt is a
-        # hunk of the syntax and req is true if this hunk is required. A part
-        # might be empty. For example, "[a]b c[d]" with both lead and trail
-        # with an empty part. But it shouldn't be empty if it's marked
-        # optional: that would be something like "a[]b", which doesn't make
-        # much sense.
-        hunks = []
-        for optional, raw in parts:
-            if raw:
-                hunks.append(SyntaxHunk.from_string(mnemonic, optional, raw))
-            elif optional:
-                raise ValueError('Empty [] in instruction syntax for {!r}.'
-                                 .format(mnemonic))
-
-        # Collect up operands across the hunks
-        op_list = []
-        op_set = set()
-        for hunk in hunks:
-            op_list += hunk.op_list
-            op_set |= hunk.op_set
-
-        if len(op_list) != len(op_set):
-            raise ValueError('Instruction syntax for {!r} is not '
-                             'linear in its operands.'
-                             .format(mnemonic))
-
-        return InsnSyntax(hunks, op_list, op_set)
-
-    def render_doc(self) -> str:
-        '''Return how this syntax should look in the documentation'''
-        return ''.join(hunk.render_doc() for hunk in self.hunks)
-
-    def asm_pattern(self) -> Tuple[str, Dict[str, int]]:
-        '''Return a regex pattern and a group name map for this syntax'''
-        parts = [r'\s*']
-        for hunk in self.hunks:
-            parts.append(hunk.asm_pattern())
-        parts.append('$')
-        pattern = ''.join(parts)
-
-        op_to_grp = {}
-        for idx, op in enumerate(self.op_list):
-            op_to_grp[op] = 1 + idx
-
-        return (pattern, op_to_grp)
-
-    def render_vals(self,
-                    op_vals: Dict[str, int],
-                    operands: Dict[str, Operand]) -> str:
-        '''Return an assembly listing for the given operand fields'''
-        parts = []
-        for hunk in self.hunks:
-            parts.append(hunk.render_vals(op_vals, operands))
-        return ''.join(parts)
-
-
-class EncodingField:
-    '''A single element of an encoding's mapping'''
-    def __init__(self,
-                 value: Union[BoolLiteral, str],
-                 scheme_field: EncSchemeField) -> None:
-        self.value = value
-        self.scheme_field = scheme_field
-
-    @staticmethod
-    def from_yaml(as_str: str,
-                  scheme_field: EncSchemeField,
-                  name_to_operand: Dict[str, Operand],
-                  what: str) -> 'EncodingField':
-        # The value should either be a boolean literal ("000xx11" or similar)
-        # or should be a name, which is taken as the name of an operand.
-        if not as_str:
-            raise ValueError('Empty string as {}.'.format(what))
-
-        # Set self.value to be either the bool literal or the name of the
-        # operand.
-        value_width = None
-        value = ''  # type: Union[BoolLiteral, str]
-        if re.match(r'b[01x_]+$', as_str):
-            value = BoolLiteral.from_string(as_str, what)
-            value_width = value.width
-            value_type = 'a literal value'
-        else:
-            operand = name_to_operand.get(as_str)
-            if operand is None:
-                raise ValueError('Unknown operand, {!r}, as {}'
-                                 .format(as_str, what))
-            value_width = operand.op_type.width
-            value = as_str
-            value_type = 'an operand'
-
-        # Unless we had an operand of type 'imm' (unknown width), we now have
-        # an expected width. Check it matches the width of the schema field.
-        if value_width is not None:
-            if scheme_field.bits.width != value_width:
-                raise ValueError('{} is mapped to {} with width {}, but the '
-                                 'encoding schema field has width {}.'
-                                 .format(what, value_type, value_width,
-                                         scheme_field.bits.width))
-
-        # Track the scheme field as well (so we don't have to keep track of a
-        # scheme once we've made an encoding object)
-        return EncodingField(value, scheme_field)
-
-
-class Encoding:
-    '''The encoding for an instruction'''
-    def __init__(self,
-                 yml: object,
-                 schemes: EncSchemes,
-                 name_to_operand: Dict[str, Operand],
-                 mnemonic: str):
-        what = 'encoding for instruction {!r}'.format(mnemonic)
-        yd = check_keys(yml, what, ['scheme', 'mapping'], [])
-
-        scheme_what = 'encoding scheme for instruction {!r}'.format(mnemonic)
-        scheme_name = check_str(yd['scheme'], scheme_what)
-        scheme_fields = schemes.resolve(scheme_name, mnemonic)
-
-        what = 'encoding mapping for instruction {!r}'.format(mnemonic)
-
-        # Check we've got exactly the right fields for the scheme
-        ydm = check_keys(yd['mapping'], what, list(scheme_fields.op_fields), [])
-
-        # Track the set of operand names that were used in some field
-        operands_used = set()
-
-        self.fields = {}
-        for field_name, scheme_field in scheme_fields.fields.items():
-            if scheme_field.value is not None:
-                field = EncodingField(scheme_field.value, scheme_field)
-            else:
-                field_what = ('value for {} field in encoding for instruction {!r}'
-                              .format(field_name, mnemonic))
-                field = EncodingField.from_yaml(check_str(ydm[field_name], field_what),
-                                                scheme_fields.fields[field_name],
-                                                name_to_operand,
-                                                field_what)
-
-                # If the field's value is an operand rather than a literal, it
-                # will have type str. Track the operands that we've used.
-                if isinstance(field.value, str):
-                    operands_used.add(field.value)
-
-            self.fields[field_name] = field
-
-        # We know that every field in the encoding scheme has a value. But we
-        # still need to check that every operand ended up in some field.
-        assert operands_used <= set(name_to_operand.keys())
-        unused_ops = set(name_to_operand.keys()) - operands_used
-        if unused_ops:
-            raise ValueError('Not all operands used in {} (missing: {}).'
-                             .format(what, ', '.join(list(unused_ops))))
-
-    def get_masks(self) -> Tuple[int, int]:
-        '''Return zeros/ones masks for encoding
-
-        Returns a pair (m0, m1) where m0 is the "zeros mask": a mask where a
-        bit is set if there is an bit pattern matching this encoding with that
-        bit zero. m1 is the ones mask: equivalent, but for that bit one.
-
-        '''
-        m0 = 0
-        m1 = 0
-        for field_name, field in self.fields.items():
-            if isinstance(field.value, str):
-                m0 |= field.scheme_field.bits.mask
-                m1 |= field.scheme_field.bits.mask
-            else:
-                # Match up the bits in the value with the ranges in the scheme.
-                assert field.value.width > 0
-                assert field.value.width == field.scheme_field.bits.width
-                bits_seen = 0
-                for msb, lsb in field.scheme_field.bits.ranges:
-                    val_msb = field.scheme_field.bits.width - 1 - bits_seen
-                    val_lsb = val_msb - msb + lsb
-                    bits_seen += msb - lsb + 1
-
-                    for idx in range(0, msb - lsb + 1):
-                        desc = field.value.char_for_bit(val_lsb + idx)
-                        if desc in ['0', 'x']:
-                            m0 |= 1 << (idx + lsb)
-                        if desc in ['1', 'x']:
-                            m1 |= 1 << (idx + lsb)
-
-        all_bits = (1 << 32) - 1
-        assert (m0 | m1) == all_bits
-        return (m0, m1)
-
-    def get_ones_mask(self) -> int:
-        '''Return the mask of fixed bits that are set
-
-        For literal values of x (unused bits in the encoding), we'll prefer
-        '0'.
-
-        '''
-        m0, m1 = self.get_masks()
-        return m1 & ~m0
-
-    def assemble(self, op_to_idx: Dict[str, int]) -> int:
-        '''Assemble an instruction
-
-        op_to_idx should map each operand in the encoding to some integer
-        index, which should be small enough to fit in the width of the
-        operand's type and should be representable after any shift. Will raise
-        a ValueError if not.
-
-        '''
-        val = self.get_ones_mask()
-        for field_name, field in self.fields.items():
-            if not isinstance(field.value, str):
-                # We've done this field already (in get_ones_mask)
-                continue
-
-            # Try to get the operand value for the field. If this is an
-            # optional operand, we might not have one, and just encode zero.
-            field_val = op_to_idx.get(field.value, 0)
-
-            # Are there any low bits that shouldn't be there?
-            shift_mask = (1 << field.scheme_field.shift) - 1
-            if field_val & shift_mask:
-                raise ValueError("operand field {} has a shift of {}, "
-                                 "so can't represent the value {:#x}."
-                                 .format(field.value,
-                                         field.scheme_field.shift,
-                                         field_val))
-
-            shifted = field_val >> field.scheme_field.shift
-
-            # Is the number too big? At the moment, we are assuming immediates
-            # are unsigned (because the OTBN big number instructions all have
-            # unsigned immediates).
-            if shifted >> field.scheme_field.bits.width:
-                shift_msg = ((' (shifted right by {} bits from {:#x})'
-                              .format(field.scheme_field.shift, field_val))
-                             if field.scheme_field.shift
-                             else '')
-                raise ValueError("operand field {} has a width of {}, "
-                                 "so can't represent the value {:#x}{}."
-                                 .format(field.value,
-                                         field.scheme_field.bits.width,
-                                         shifted, shift_msg))
-
-            val |= field.scheme_field.bits.encode(shifted)
-
-        return val
-
-
 class Insn:
     def __init__(self,
                  yml: object,
diff --git a/hw/ip/otbn/util/shared/operand.py b/hw/ip/otbn/util/shared/operand.py
new file mode 100644
index 0000000..e20289c
--- /dev/null
+++ b/hw/ip/otbn/util/shared/operand.py
@@ -0,0 +1,279 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+import re
+from typing import List, Optional
+from .yaml_parse_helpers import check_keys, check_str, get_optional_str
+
+
+class OperandType:
+    '''The base class for some sort of operand type'''
+    def __init__(self, width: Optional[int]) -> None:
+        assert width is None or width > 0
+        self.width = width
+
+    def markdown_doc(self) -> Optional[str]:
+        '''Generate any (markdown) documentation for this operand type
+
+        The base class returns None, but subclasses might return something
+        useful.
+
+        '''
+        return None
+
+    def syntax_determines_value(self) -> bool:
+        '''Can the value of this operand always be inferred from asm syntax?
+
+        This is true for things like registers (the value "5" only comes from
+        "r5", for example), but false for arbitrary immediates: an immediate
+        operand might have a value that comes from a relocation.
+
+        '''
+        return False
+
+    def read_index(self, as_str: str) -> Optional[int]:
+        '''Try to read the given syntax as an actual integer index
+
+        Raises a ValueError on definite failure ("found cabbage when I expected
+        a register name"). Returns None on a soft failure: "this is a
+        complicated looking expression, but it might be a sensible immediate".
+
+        '''
+        return None
+
+    def render_val(self, value: int) -> str:
+        '''Render the given value as a string.
+
+        The default implementation prints it as a decimal number. Register
+        operands, for example, will want to print 3 as "x3" and so on.
+
+        '''
+        return str(value)
+
+
+class RegOperandType(OperandType):
+    '''A class representing a register operand type'''
+    TYPE_FMTS = {
+        'gpr': (5, 'x'),
+        'wdr': (5, 'w'),
+        'csr': (12, None),
+        'wsr': (8, None)
+    }
+
+    def __init__(self, reg_type: str, is_dest: bool):
+        fmt = RegOperandType.TYPE_FMTS.get(reg_type)
+        assert fmt is not None
+        width, _ = fmt
+        super().__init__(width)
+
+        self.reg_type = reg_type
+        self.is_dest = is_dest
+
+    def syntax_determines_value(self) -> bool:
+        return True
+
+    def read_index(self, as_str: str) -> int:
+        width, pfx = RegOperandType.TYPE_FMTS[self.reg_type]
+
+        re_pfx = '' if pfx is None else re.escape(pfx)
+        match = re.match(re_pfx + '([0-9]+)$', as_str)
+        if match is None:
+            raise ValueError("Expression {!r} can't be parsed as a {}."
+                             .format(as_str, self.reg_type))
+
+        idx = int(match.group(1))
+        assert 0 <= idx
+        if idx >> width:
+            raise ValueError("Invalid register of type {}: {!r}."
+                             .format(self.reg_type, as_str))
+
+        return idx
+
+    def render_val(self, value: int) -> str:
+        fmt = RegOperandType.TYPE_FMTS.get(self.reg_type)
+        assert fmt is not None
+        _, pfx = fmt
+
+        if pfx is None:
+            return super().render_val(value)
+
+        return '{}{}'.format(pfx, value)
+
+
+class ImmOperandType(OperandType):
+    '''A class representing an immediate operand type'''
+    def markdown_doc(self) -> Optional[str]:
+        # Override from OperandType base class
+        if self.width is None:
+            return None
+
+        return 'Valid range: `0..{}`'.format((1 << self.width) - 1)
+
+    def read_index(self, as_str: str) -> Optional[int]:
+        # We only support simple integer literals.
+        try:
+            return int(as_str)
+        except ValueError:
+            return None
+
+
+class EnumOperandType(ImmOperandType):
+    '''A class representing an enum operand type'''
+    def __init__(self, items: List[str]):
+        assert items
+        super().__init__(int.bit_length(len(items) - 1))
+        self.items = items
+
+    def markdown_doc(self) -> Optional[str]:
+        # Override from OperandType base class
+        parts = ['Syntax table:\n\n'
+                 '| Syntax | Value of immediate |\n'
+                 '|--------|--------------------|\n']
+        for idx, item in enumerate(self.items):
+            parts.append('| `{}` | `{}` |\n'
+                         .format(item, idx))
+        return ''.join(parts)
+
+    def syntax_determines_value(self) -> bool:
+        return True
+
+    def read_index(self, as_str: str) -> Optional[int]:
+        for idx, item in enumerate(self.items):
+            if as_str == item:
+                return idx
+
+        known_vals = ', '.join(repr(item) for item in self.items)
+        raise ValueError('Invalid enum value, {!r}. '
+                         'Supported values: {}.'
+                         .format(as_str, known_vals))
+
+    def render_val(self, value: int) -> str:
+        # On a bad value, we have to return *something*. Since this is just
+        # going into disassembly, let's be vaguely helpful and return something
+        # that looks clearly bogus.
+        #
+        # Note that if the number of items in the enum is not a power of 2,
+        # this could happen with a bad binary, despite good tools.
+        if value < 0 or value >= len(self.items):
+            return '???'
+
+        return self.items[value]
+
+
+class OptionOperandType(ImmOperandType):
+    '''A class representing an option operand type'''
+    def __init__(self, option: str):
+        super().__init__(1)
+        self.option = option
+
+    def markdown_doc(self) -> Optional[str]:
+        # Override from OperandType base class
+        return 'To specify, use the literal syntax `{}`\n'.format(self.option)
+
+    def syntax_determines_value(self) -> bool:
+        return True
+
+    def read_index(self, as_str: str) -> Optional[int]:
+        if as_str == self.option:
+            return 1
+
+        raise ValueError('Invalid option value, {!r}. '
+                         'If specified, it should have been {!r}.'
+                         .format(as_str, self.option))
+
+    def render_val(self, value: int) -> str:
+        # Option types are always 1 bit wide, so the value should be 0 or 1.
+        assert value in [0, 1]
+        return self.option if value else ''
+
+
+def parse_operand_type(fmt: str) -> OperandType:
+    '''Make sense of the operand type syntax'''
+    # Registers
+    if fmt == 'grs':
+        return RegOperandType('gpr', False)
+    if fmt == 'grd':
+        return RegOperandType('gpr', True)
+    if fmt == 'wrs':
+        return RegOperandType('wdr', False)
+    if fmt == 'wrd':
+        return RegOperandType('wdr', True)
+    if fmt == 'csr':
+        return RegOperandType('csr', True)
+    if fmt == 'wsr':
+        return RegOperandType('wsr', True)
+
+    # Immediates
+    if fmt == 'imm':
+        return ImmOperandType(None)
+    m = re.match(r'imm([1-9][0-9]*)$', fmt)
+    if m:
+        return ImmOperandType(int(m.group(1)))
+    m = re.match(r'enum\(([^\)]+)\)$', fmt)
+    if m:
+        return EnumOperandType([item.strip()
+                                for item in m.group(1).split(',')])
+    m = re.match(r'option\(([^\)]+)\)$', fmt)
+    if m:
+        return OptionOperandType(m.group(1).strip())
+
+    raise ValueError("Operand type description {!r} "
+                     "didn't match any recognised format."
+                     .format(fmt))
+
+
+def infer_operand_type(name: str) -> OperandType:
+    '''Try to guess an operand's type from its name'''
+
+    if re.match(r'grs[0-9]*$', name):
+        return parse_operand_type('grs')
+    if name in ['grd', 'wrd', 'csr', 'wsr']:
+        return parse_operand_type(name)
+    if re.match(r'wrs[0-9]*$', name):
+        return parse_operand_type('wrs')
+    if re.match(r'imm[0-9]*$', name):
+        return parse_operand_type('imm')
+    if name == 'offset':
+        return parse_operand_type('imm')
+
+    raise ValueError("Operand name {!r} doesn't imply an operand type: "
+                     "you'll have to set the type explicitly."
+                     .format(name))
+
+
+def make_operand_type(yml: object, operand_name: str) -> OperandType:
+    '''Construct a type for an operand
+
+    This is either based on the type, if given, or inferred from the name
+    otherwise.
+
+    '''
+    return (parse_operand_type(check_str(yml,
+                                         'type for {} operand'
+                                         .format(operand_name)))
+            if yml is not None
+            else infer_operand_type(operand_name))
+
+
+class Operand:
+    def __init__(self, yml: object, insn_name: str) -> None:
+        # The YAML representation should be a string (a bare operand name) or a
+        # dict.
+        what = 'operand for {!r} instruction'.format(insn_name)
+        if isinstance(yml, str):
+            name = yml
+            op_type = None
+            doc = None
+        elif isinstance(yml, dict):
+            yd = check_keys(yml, what, ['name'], ['type', 'doc'])
+            name = check_str(yd['name'], 'name of ' + what)
+
+            op_what = '{!r} {}'.format(name, what)
+            op_type = get_optional_str(yd, 'type', op_what)
+            doc = get_optional_str(yd, 'doc', op_what)
+
+        op_what = '{!r} {}'.format(name, what)
+        self.name = name
+        self.op_type = make_operand_type(op_type, name)
+        self.doc = doc
diff --git a/hw/ip/otbn/util/shared/syntax.py b/hw/ip/otbn/util/shared/syntax.py
new file mode 100644
index 0000000..d715366
--- /dev/null
+++ b/hw/ip/otbn/util/shared/syntax.py
@@ -0,0 +1,354 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+'''Code for making sense of instruction syntax as defined in insns.yml'''
+
+import re
+from typing import Dict, List, Set, Tuple
+
+from .operand import Operand
+
+
+class SyntaxToken:
+    '''An object representing a single token in an instruction's syntax
+
+    See InsnSyntax for more details. The is_literal attribute is true if this
+    is a literal hunk of text (rather than an operand name). The text attribute
+    either holds the literal syntax or the operand name.
+
+    '''
+    def __init__(self, is_literal: bool, text: str) -> None:
+        assert text
+        self.is_literal = is_literal
+        # Make whitespace canonical for literals
+        self.text = re.sub(r'\s+', ' ', text) if is_literal else text
+
+    def render_doc(self) -> str:
+        '''Return how this syntax token should look in the documentation'''
+        if self.is_literal:
+            return self.text
+        else:
+            return '<{}>'.format(self.text)
+
+    def asm_pattern(self) -> str:
+        '''Return a regex pattern that can be used for matching this token
+
+        If the token represents an operand, the pattern is wrapped in a group
+        (to capture the operand). For more details about the syntax, see
+        InsnSyntax.
+
+        '''
+        if self.is_literal:
+            # A literal that is pure whitespace "requires the whitespace".
+            # Otherwise, replace all internal whitespace with \s+ and allow
+            # optional whitespace afterwards. To do this easily, we split the
+            # literal on whitespace. The result is empty iff it was just
+            # whitespace in the first place.
+            words = self.text.split()
+            if not words:
+                return r'\s+'
+
+            # For non-whitespace literals, we disallow leading space and add
+            # optional trailing space. This convention should avoid lots of
+            # \s*\s* pairs.
+            parts = [re.escape(words[0])]
+            for w in words[1:]:
+                parts.append(r'\s+')
+                parts.append(re.escape(w))
+            parts.append(r'\s*')
+
+            return ''.join(parts)
+
+        # Otherwise, this is an operand. For now, at least, we're very
+        # restrictive for operands. No spaces and no commas (the second rule
+        # avoids silliness like "a, b, c" matching a syntax with only two
+        # operands by setting the second to "b, c").
+        #
+        # We also split out ++ and -- separately, to disambiguate things like
+        # x1++, which must be parsed as x1 followed by ++.
+        #
+        # If we want to do better and allow things like
+        #
+        #    addi x0, x1, 1 + 3
+        #
+        # then we need to use something more serious than just regexes for
+        # parsing.
+        return r'(-?[^ ,+\-]+|[+\-]+)\s*'
+
+    def render_vals(self,
+                    op_vals: Dict[str, int],
+                    operands: Dict[str, Operand]) -> str:
+        '''Return an assembly listing for the given operand fields
+
+        '''
+        if self.is_literal:
+            return self.text
+
+        assert self.text in op_vals
+        assert self.text in operands
+
+        return operands[self.text].op_type.render_val(op_vals[self.text])
+
+
+class SyntaxHunk:
+    '''An object representing a hunk of syntax that might be optional'''
+    def __init__(self,
+                 is_optional: bool,
+                 tokens: List[SyntaxToken],
+                 op_list: List[str],
+                 op_set: Set[str]) -> None:
+        assert tokens
+        self.is_optional = is_optional
+        self.tokens = tokens
+        self.op_list = op_list
+        self.op_set = op_set
+
+    @staticmethod
+    def from_list(operands: List[str]) -> 'SyntaxHunk':
+        '''Smart constructor for a list of operands with "normal" syntax'''
+        assert operands
+        comma = SyntaxToken(True, ', ')
+        tokens = [SyntaxToken(False, operands[0])]
+        for op in operands[1:]:
+            tokens.append(comma)
+            tokens.append(SyntaxToken(False, op))
+
+        op_set = set(operands)
+        assert len(op_set) == len(operands)
+
+        return SyntaxHunk(False, tokens, operands, op_set)
+
+    @staticmethod
+    def from_string(mnemonic: str, optional: bool, raw: str) -> 'SyntaxHunk':
+        '''Smart constructor that parses YAML syntax (see InsnSyntax)'''
+        assert raw
+
+        tokens = []
+        op_list = []
+        op_set = set()
+
+        parts = re.split(r'<([^>]+)>', raw)
+        for idx, part in enumerate(parts):
+            # The matches for the regex appear in positions 1, 3, 5, ...
+            is_literal = not (idx & 1)
+            if ('<' in part or '>' in part) and not is_literal:
+                raise ValueError("Syntax for {!r} has hunk {!r} which doesn't "
+                                 "seem to surround <operand>s properly."
+                                 .format(mnemonic, raw))
+
+            if not is_literal:
+                assert part
+                if part in op_set:
+                    raise ValueError("Syntax for {!r} has hunk {!r} with "
+                                     "more than one occurrence of <{}>."
+                                     .format(mnemonic, raw, part))
+                op_list.append(part)
+                op_set.add(part)
+
+            # Only allow empty parts (and skip their tokens) if at one end or
+            # the other
+            if not part and idx not in [0, len(parts) - 1]:
+                raise ValueError("Syntax for {!r} has two adjacent operand "
+                                 "tokens, with no intervening syntax."
+                                 .format(mnemonic))
+
+            if part:
+                tokens.append(SyntaxToken(is_literal, part))
+
+        return SyntaxHunk(optional, tokens, op_list, op_set)
+
+    def render_doc(self) -> str:
+        '''Return how this hunk should look in the documentation'''
+        parts = []
+        for token in self.tokens:
+            parts.append(token.render_doc())
+
+        body = ''.join(parts)
+        return '[{}]'.format(body) if self.is_optional else body
+
+    def asm_pattern(self) -> str:
+        '''Return a regex pattern that can be used for matching this hunk
+
+        The result will have a group per operand. It allows trailing, but not
+        leading, space within the hunk.
+
+        '''
+        parts = []
+        for token in self.tokens:
+            parts.append(token.asm_pattern())
+        body = ''.join(parts)
+
+        # For an optional hunk, we build it up in the form "(?:foo)?". This
+        # puts a non-capturing group around foo and then applies "?"
+        # (one-or-more) to it.
+        return '(?:{})?'.format(body) if self.is_optional else body
+
+    def render_vals(self,
+                    op_vals: Dict[str, int],
+                    operands: Dict[str, Operand]) -> str:
+        '''Return an assembly listing for the hunk given operand values
+
+        If this hunk is optional and all its operands are zero, the hunk is
+        omitted (so this function returns the empty string).
+
+        '''
+        if self.is_optional:
+            required = False
+            for op_name in self.op_list:
+                if op_vals[op_name] != 0:
+                    required = True
+                    break
+
+            if not required:
+                return ''
+
+        return ''.join(token.render_vals(op_vals, operands)
+                       for token in self.tokens)
+
+
+class InsnSyntax:
+    '''A class representing the syntax of an instruction
+
+    An instruction's syntax is specified in the YAML file by writing it out
+    with operand names surrounded by angle brackets. For example, a simple NOT
+    instruction might have a syntax of
+
+        <dst>, <src>
+
+    which should be interpreted as the following tokens:
+
+        - Operand called 'dst'
+        - A literal ','
+        - Operand called 'src'
+
+    Between the tokens, whitespace is optional (so "x0 , x1" and "x0,x1" both
+    match the syntax above) unless a literal token is just a space, in which
+    case some whitespace is required. For example
+
+        <dst> <src>
+
+    would match "x0 x1" but not "x0x1". Whitespace within literal syntax tokens
+    means that some space is required, matching the regex \\s+. For example,
+    the (rather strange) syntax
+
+       <dst> + - <src>
+
+    would match "x0 + - x1" or "x0+ -x1", but not "x0 +- x1".
+
+    Some operands (and surrounding syntax) might be optional. The optional
+    syntax is surrounded by square brackets. Nesting is not supported. For
+    example:
+
+       <dst>, <src>[, <offset>]
+
+    would match "x0, x1, 123" or "x0, x1".
+
+    Note that a given syntax might be ambiguous. For example,
+
+       <dst>, <src>[, <offset>][, <flavour>]
+
+    With "x0, x1, 123", is 123 an offset or a flavour? (We choose not to embed
+    typing information into the syntax, because that results in very confusing
+    assembler error messages). We break ties in the same way as the underlying
+    regex engine, assigning the operand to the first group, so 123 is an offset
+    in this case. Such syntaxes are rather confusing though, so probably not a
+    good idea.
+
+    The parsed syntax is stored as a list of "hunks". Each hunk contains a flag
+    showing whether the hunk is optional or required and also a list of
+    SyntaxToken objects.
+
+    '''
+    def __init__(self,
+                 hunks: List[SyntaxHunk],
+                 op_list: List[str],
+                 op_set: Set[str]) -> None:
+        self.hunks = hunks
+        self.op_list = op_list
+        self.op_set = op_set
+
+    @staticmethod
+    def from_list(operands: List[str]) -> 'InsnSyntax':
+        '''Smart constructor for a list of operands with "normal" syntax'''
+        if not operands:
+            return InsnSyntax([], [], set())
+
+        hunk = SyntaxHunk.from_list(operands)
+        return InsnSyntax([hunk], hunk.op_list, hunk.op_set)
+
+    @staticmethod
+    def from_yaml(mnemonic: str, raw: str) -> 'InsnSyntax':
+        '''Parse the syntax in the YAML file'''
+
+        # The raw syntax looks something like
+        #
+        #    <op0>, <op1>[(<op2>)]
+        #
+        # to mean that you either have "x0, x1" or "x0, x2(x3)". First, split
+        # out the bracketed parts.
+        by_left = raw.split('[')
+        parts = [(False, by_left[0])]
+        for after_left in by_left[1:]:
+            split = after_left.split(']', 1)
+            if len(split) != 2:
+                raise ValueError('Unbalanced or nested [] in instruction '
+                                 'syntax for {!r}.'
+                                 .format(mnemonic))
+
+            parts += [(True, split[0]), (False, split[1])]
+
+        # Now parts contains a list of pairs (required, txt) where txt is a
+        # hunk of the syntax and req is true if this hunk is required. A part
+        # might be empty. For example, "[a]b c[d]" with both lead and trail
+        # with an empty part. But it shouldn't be empty if it's marked
+        # optional: that would be something like "a[]b", which doesn't make
+        # much sense.
+        hunks = []
+        for optional, raw in parts:
+            if raw:
+                hunks.append(SyntaxHunk.from_string(mnemonic, optional, raw))
+            elif optional:
+                raise ValueError('Empty [] in instruction syntax for {!r}.'
+                                 .format(mnemonic))
+
+        # Collect up operands across the hunks
+        op_list = []
+        op_set = set()
+        for hunk in hunks:
+            op_list += hunk.op_list
+            op_set |= hunk.op_set
+
+        if len(op_list) != len(op_set):
+            raise ValueError('Instruction syntax for {!r} is not '
+                             'linear in its operands.'
+                             .format(mnemonic))
+
+        return InsnSyntax(hunks, op_list, op_set)
+
+    def render_doc(self) -> str:
+        '''Return how this syntax should look in the documentation'''
+        return ''.join(hunk.render_doc() for hunk in self.hunks)
+
+    def asm_pattern(self) -> Tuple[str, Dict[str, int]]:
+        '''Return a regex pattern and a group name map for this syntax'''
+        parts = [r'\s*']
+        for hunk in self.hunks:
+            parts.append(hunk.asm_pattern())
+        parts.append('$')
+        pattern = ''.join(parts)
+
+        op_to_grp = {}
+        for idx, op in enumerate(self.op_list):
+            op_to_grp[op] = 1 + idx
+
+        return (pattern, op_to_grp)
+
+    def render_vals(self,
+                    op_vals: Dict[str, int],
+                    operands: Dict[str, Operand]) -> str:
+        '''Return an assembly listing for the given operand fields'''
+        parts = []
+        for hunk in self.hunks:
+            parts.append(hunk.render_vals(op_vals, operands))
+        return ''.join(parts)
diff --git a/hw/ip/otbn/util/shared/yaml_parse_helpers.py b/hw/ip/otbn/util/shared/yaml_parse_helpers.py
new file mode 100644
index 0000000..9ef1158
--- /dev/null
+++ b/hw/ip/otbn/util/shared/yaml_parse_helpers.py
@@ -0,0 +1,120 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+'''Code to help make typed objects out of parsed YAML'''
+
+from typing import Callable, Dict, List, Optional, Sequence, TypeVar
+
+
+T = TypeVar('T')
+
+
+def check_keys(obj: object,
+               what: str,
+               required_keys: List[str],
+               optional_keys: List[str]) -> Dict[str, object]:
+    '''Check that obj is a dict object with the expected keys
+
+    If not, raise a ValueError; the what argument names the object.
+
+    '''
+    if not isinstance(obj, dict):
+        raise ValueError("{} is expected to be a dict, but was actually a {}."
+                         .format(what, type(obj).__name__))
+
+    allowed = set()
+    missing = []
+    for key in required_keys:
+        assert key not in allowed
+        allowed.add(key)
+        if key not in obj:
+            missing.append(key)
+
+    for key in optional_keys:
+        assert key not in allowed
+        allowed.add(key)
+
+    unexpected = []
+    for key in obj:
+        if key not in allowed:
+            unexpected.append(key)
+
+    if missing or unexpected:
+        mstr = ('The following required fields were missing: {}.'
+                .format(', '.join(missing)) if missing else '')
+        ustr = ('The following unexpected fields were found: {}.'
+                .format(', '.join(unexpected)) if unexpected else '')
+        raise ValueError("{} doesn't have the right keys. {}{}{}"
+                         .format(what,
+                                 mstr,
+                                 ' ' if mstr and ustr else '',
+                                 ustr))
+
+    return obj
+
+
+def check_str(obj: object, what: str) -> str:
+    '''Check that the given object is a string
+
+    If not, raise a ValueError; the what argument names the object.
+
+    '''
+    if not isinstance(obj, str):
+        raise ValueError('{} is of type {}, not a string.'
+                         .format(what, type(obj).__name__))
+    return obj
+
+
+def check_optional_str(obj: object, what: str) -> Optional[str]:
+    '''Check that the given object is a string or None
+
+    If not, raise a ValueError; the what argument names the object.
+
+    '''
+    if obj is not None and not isinstance(obj, str):
+        raise ValueError('{} is of type {}, not a string.'
+                         .format(what, type(obj).__name__))
+    return obj
+
+
+def check_bool(obj: object, what: str) -> bool:
+    '''Check that the given object is a bool
+
+    If not, raise a ValueError; the what argument names the object.
+
+    '''
+    if obj is not True and obj is not False:
+        raise ValueError('{} is of type {}, not a string.'
+                         .format(what, type(obj).__name__))
+    return obj
+
+
+def check_list(obj: object, what: str) -> List[object]:
+    '''Check that the given object is a list
+
+    If not, raise a ValueError; the what argument names the object.
+
+    '''
+    if not isinstance(obj, list):
+        raise ValueError('{} is of type {}, not a list.'
+                         .format(what, type(obj).__name__))
+    return obj
+
+
+def index_list(what: str,
+               objs: Sequence[T],
+               get_key: Callable[[T], str]) -> Dict[str, T]:
+    ret = {}
+    for obj in objs:
+        key = get_key(obj)
+        if key in ret:
+            raise ValueError('Duplicate object with key {} in {}.'
+                             .format(key, what))
+        ret[key] = obj
+    return ret
+
+
+def get_optional_str(data: Dict[str, object],
+                     key: str, what: str) -> Optional[str]:
+    return check_optional_str(data.get(key), '{} field for {}'.format(key, what))
diff --git a/hw/ip/otbn/util/yaml_to_doc.py b/hw/ip/otbn/util/yaml_to_doc.py
index 30d815a..b1d5898 100755
--- a/hw/ip/otbn/util/yaml_to_doc.py
+++ b/hw/ip/otbn/util/yaml_to_doc.py
@@ -9,8 +9,10 @@
 import sys
 from typing import List
 
-from shared.insn_yaml import (BoolLiteral, Encoding, Insn, InsnsFile, Operand,
-                              load_file)
+from shared.bool_literal import BoolLiteral
+from shared.encoding import Encoding
+from shared.insn_yaml import Insn, InsnsFile, load_file
+from shared.operand import Operand
 
 
 def render_operand_row(operand: Operand) -> str: