hw/ip/otbn/util/insn_yaml.py - 3p/lowrisc/opentitan - Git at Google

 # Copyright lowRISC contributors.
 # Licensed under the Apache License, Version 2.0, see LICENSE for details.
 # SPDX-License-Identifier: Apache-2.0

 '''Support code for reading the instruction database in insns.yml'''

 import itertools
 import re
 from typing import (Callable, Dict, List, Optional,
                     Sequence, Set, Tuple, TypeVar, Union)

 import yaml


 T = TypeVar('T')


 def check_keys(obj: object,
                what: str,
                required_keys: List[str],
                optional_keys: List[str]) -> Dict[str, object]:
     '''Check that obj is a dict object with the expected keys

     If not, raise a ValueError; the what argument names the object.

     '''
     if not isinstance(obj, dict):
         raise ValueError("{} is expected to be a dict, but was actually a {}."
                          .format(what, type(obj).__name__))

     allowed = set()
     missing = []
     for key in required_keys:
         assert key not in allowed
         allowed.add(key)
         if key not in obj:
             missing.append(key)

     for key in optional_keys:
         assert key not in allowed
         allowed.add(key)

     unexpected = []
     for key in obj:
         if key not in allowed:
             unexpected.append(key)

     if missing or unexpected:
         mstr = ('The following required fields were missing: {}.'
                 .format(', '.join(missing)) if missing else '')
         ustr = ('The following unexpected fields were found: {}.'
                 .format(', '.join(unexpected)) if unexpected else '')
         raise ValueError("{} doesn't have the right keys. {}{}{}"
                          .format(what,
                                  mstr,
                                  ' ' if mstr and ustr else '',
                                  ustr))

     return obj


 def check_str(obj: object, what: str) -> str:
     '''Check that the given object is a string

     If not, raise a ValueError; the what argument names the object.

     '''
     if not isinstance(obj, str):
         raise ValueError('{} is of type {}, not a string.'
                          .format(what, type(obj).__name__))
     return obj


 def check_optional_str(obj: object, what: str) -> Optional[str]:
     '''Check that the given object is a string or None

     If not, raise a ValueError; the what argument names the object.

     '''
     if obj is not None and not isinstance(obj, str):
         raise ValueError('{} is of type {}, not a string.'
                          .format(what, type(obj).__name__))
     return obj


 def check_bool(obj: object, what: str) -> bool:
     '''Check that the given object is a bool

     If not, raise a ValueError; the what argument names the object.

     '''
     if obj is not True and obj is not False:
         raise ValueError('{} is of type {}, not a string.'
                          .format(what, type(obj).__name__))
     return obj


 def check_list(obj: object, what: str) -> List[object]:
     '''Check that the given object is a list

     If not, raise a ValueError; the what argument names the object.

     '''
     if not isinstance(obj, list):
         raise ValueError('{} is of type {}, not a list.'
                          .format(what, type(obj).__name__))
     return obj


 def index_list(what: str,
                objs: Sequence[T],
                get_key: Callable[[T], str]) -> Dict[str, T]:
     ret = {}
     for obj in objs:
         key = get_key(obj)
         if key in ret:
             raise ValueError('Duplicate object with key {} in {}.'
                              .format(key, what))
         ret[key] = obj
     return ret


 class InsnGroup:
     def __init__(self, yml: object) -> None:
         yd = check_keys(yml, 'insn-group', ['key', 'title', 'doc'], [])
         self.key = check_str(yd['key'], 'insn-group key')
         self.title = check_str(yd['title'], 'insn-group title')
         self.doc = check_str(yd['doc'], 'insn-group doc')


 class InsnGroups:
     def __init__(self, yml: object) -> None:
         self.groups = [InsnGroup(y) for y in check_list(yml, 'insn-groups')]
         if not self.groups:
             raise ValueError('Empty list of instruction groups: '
                              'we need at least one as a base group.')
         self.key_to_group = index_list('insn-groups',
                                        self.groups, lambda ig: ig.key)

     def default_group(self) -> str:
         '''Get the name of the default instruction group'''
         assert self.groups
         return self.groups[0].key


 class BitRanges:
     '''Represents the bit ranges used for a field in an encoding scheme'''
     def __init__(self, as_string: str, what: str) -> None:
         #   ranges ::= range
         #            | range ',' ranges
         #
         #   range ::= num
         #           | num ':' num
         #
         # Ranges are assumed to be msb:lsb (with msb >= lsb). Bit indices are
         # at most 31 and ranges are disjoint.

         if not as_string:
             raise ValueError('Empty string as bits for {}'.format(what))

         overlaps = 0

         self.mask = 0
         self.ranges = []
         self.width = 0

         for rng in as_string.split(','):
             match = re.match(r'([0-9]+)(?:-([0-9]+))?$', rng)
             if match is None:
                 raise ValueError('Range {!r} in bits for {} is malformed.'
                                  .format(rng, what))

             msb = int(match.group(1))
             maybe_lsb = match.group(2)
             lsb = msb if maybe_lsb is None else int(maybe_lsb)

             if msb < lsb:
                 raise ValueError('Range {!r} in bits for {} has msb < lsb.'
                                  .format(rng, what))

             if msb >= 32:
                 raise ValueError('Range {!r} in bits for {} has msb >= 32.'
                                  .format(rng, what))

             rng_mask = (1 << (msb + 1)) - (1 << lsb)
             overlaps |= rng_mask & self.mask
             self.mask |= rng_mask

             self.ranges.append((msb, lsb))
             self.width += msb - lsb + 1

         if overlaps:
             raise ValueError('Bits for {} have overlapping ranges '
                              '(mask: {:#08x})'
                              .format(what, overlaps))


 class BoolLiteral:
     '''Represents a boolean literal, with possible 'x characters'''
     def __init__(self, as_string: str, what: str) -> None:
         # We represent this as 2 masks: "ones" and "x". The ones mask is the
         # bits that are marked 1. The x mask is the bits that are marked x.
         # Then you can test whether a particular value matches the literal by
         # zeroing all bits in the x mask and then comparing with the ones mask.
         self.ones = 0
         self.xs = 0
         self.width = 0

         # The literal should always start with a 'b'
         if not as_string.startswith('b'):
             raise ValueError("Boolean literal for {} doesn't start with a 'b'."
                              .format(what))

         for char in as_string[1:]:
             if char == '_':
                 continue

             self.ones <<= 1
             self.xs <<= 1
             self.width += 1

             if char == '0':
                 continue
             elif char == '1':
                 self.ones |= 1
             elif char == 'x':
                 self.xs |= 1
             else:
                 raise ValueError('Boolean literal for {} has '
                                  'unsupported character: {!r}.'
                                  .format(what, char))

         if not self.width:
             raise ValueError('Empty boolean literal for {}.'.format(what))

     def char_for_bit(self, bit: int) -> str:
         '''Return 0, 1 or x for the bit at the given position'''
         assert bit < self.width
         if (self.ones >> bit) & 1:
             return '1'
         if (self.xs >> bit) & 1:
             return 'x'
         return '0'


 class EncSchemeField:
     '''Represents a single field in an encoding scheme'''
     def __init__(self,
                  bits: BitRanges,
                  value: Optional[BoolLiteral],
                  shift: int) -> None:
         self.bits = bits
         self.value = value
         self.shift = shift

     @staticmethod
     def from_yaml(yml: object, what: str) -> 'EncSchemeField':
         # This is either represented as a dict in the YAML or as a bare string.
         bits_what = 'bits for {}'.format(what)
         value_what = 'value for {}'.format(what)
         shift_what = 'shift for {}'.format(what)

         shift = 0

         if isinstance(yml, dict):
             yd = check_keys(yml, what, ['bits'], ['value', 'shift'])

             bits_yml = yd['bits']
             if not (isinstance(bits_yml, str) or isinstance(bits_yml, int)):
                 raise ValueError('{} is of type {}, not a string or int.'
                                  .format(bits_what, type(bits_yml).__name__))

             # We require value to be given as a string because it's supposed to
             # be in base 2, and PyYAML will parse 111 as one-hundred and
             # eleven, 011 as 9 and 0x11 as 17. Aargh!
             raw_value = None
             val_yml = yd.get('value')
             if val_yml is not None:
                 if not isinstance(val_yml, str):
                     raise ValueError("{} is of type {}, but must be a string "
                                      "(we don't allow automatic conversion "
                                      "because YAML's int conversion assumes "
                                      "base 10 and value should be in base 2)."
                                      .format(value_what,
                                              type(val_yml).__name__))
                 raw_value = val_yml

             # shift, on the other hand, is written in base 10. Allow an
             # integer.
             shift_yml = yd.get('shift')
             if shift_yml is None:
                 pass
             elif isinstance(shift_yml, str):
                 if not re.match(r'[0-9]+$', shift_yml):
                     raise ValueError('{} is {!r} but should be a '
                                      'non-negative integer.'
                                      .format(shift_what, shift_yml))
                 shift = int(shift_yml)
             elif isinstance(shift_yml, int):
                 if shift_yml < 0:
                     raise ValueError('{} is {!r} but should be a '
                                      'non-negative integer.'
                                      .format(shift_what, shift_yml))
                 shift = shift_yml
             else:
                 raise ValueError("{} is of type {}, but must be a string "
                                  "or non-negative integer."
                                  .format(shift_what, type(shift_yml).__name__))
         elif isinstance(yml, str) or isinstance(yml, int):
             bits_yml = yml
             raw_value = None
         else:
             raise ValueError('{} is a {}, but should be a '
                              'dict, string or integer.'
                              .format(what, type(yml).__name__))

         # The bits field is usually parsed as a string ("10-4", or similar).
         # But if it's a bare integer then YAML will parse it as an int. That's
         # fine, but we turn it back into a string to be re-parsed by BitRanges.
         assert isinstance(bits_yml, str) or isinstance(bits_yml, int)

         bits = BitRanges(str(bits_yml), bits_what)
         value = None
         if raw_value is not None:
             value = BoolLiteral(raw_value, value_what)
             if bits.width != value.width:
                 raise ValueError('{} has bits that imply a width of {}, but '
                                  'a value with width {}.'
                                  .format(what, bits.width, value.width))

         return EncSchemeField(bits, value, shift)


 class EncSchemeImport:
     '''An object representing inheritance of a parent scheme

     When importing a parent scheme, we can set some of its fields with
     immediate values. These are stored in the settings field.

     '''
     def __init__(self, yml: object, importer_name: str) -> None:
         as_str = check_str(yml,
                            'value for import in encoding scheme {!r}'
                            .format(importer_name))

         # The supported syntax is
         #
         #    - parent0(field0=b111, field1=b10)
         #    - parent1()
         #    - parent2

         match = re.match(r'([^ (]+)[ ]*(?:\(([^)]+)\))?$', as_str)
         if not match:
             raise ValueError('Malformed encoding scheme '
                              'inheritance by scheme {!r}: {!r}.'
                              .format(importer_name, as_str))

         self.parent = match.group(1)
         self.settings = {}  # type: Dict[str, BoolLiteral]

         when = ('When inheriting from {!r} in encoding scheme {!r}'
                 .format(self.parent, importer_name))

         if match.group(2) is not None:
             args = match.group(2).split(',')
             for arg in args:
                 arg = arg.strip()
                 arg_parts = arg.split('=')
                 if len(arg_parts) != 2:
                     raise ValueError('{}, found an argument with {} '
                                      'equals signs (should have exactly one).'
                                      .format(when, len(arg_parts) - 1))

                 field_name = arg_parts[0]
                 field_what = ('literal value for field {!r} when inheriting '
                               'from {!r} in encoding scheme {!r}'
                               .format(arg_parts[0], self.parent, importer_name))
                 field_value = BoolLiteral(arg_parts[1], field_what)

                 if field_name in self.settings:
                     raise ValueError('{}, found multiple arguments assigning '
                                      'values to the field {!r}.'
                                      .format(when, field_name))

                 self.settings[field_name] = field_value

     def apply_settings(self,
                        esf: 'EncSchemeFields', what: str) -> 'EncSchemeFields':
         # Copy and set values in anything that has a setting
         fields = {}
         for name, literal in self.settings.items():
             old_field = esf.fields.get(name)
             if old_field is None:
                 raise ValueError('{} sets unknown field {!r} from {!r}.'
                                  .format(what, name, self.parent))

             if old_field.bits.width != literal.width:
                 raise ValueError('{} sets field {!r} from {!r} with a literal '
                                  'of width {}, but the field has width {}.'
                                  .format(what, name, self.parent,
                                          literal.width, old_field.bits.width))

             fields[name] = EncSchemeField(old_field.bits,
                                           literal,
                                           old_field.shift)

         # Copy anything else
         op_fields = set()
         for name, old_field in esf.fields.items():
             if name in fields:
                 continue
             op_fields.add(name)
             fields[name] = old_field

         return EncSchemeFields(fields, op_fields, esf.mask)


 class EncSchemeFields:
     '''An object representing some fields in an encoding scheme'''
     def __init__(self,
                  fields: Dict[str, EncSchemeField],
                  op_fields: Set[str],
                  mask: int) -> None:
         self.fields = fields
         self.op_fields = op_fields
         self.mask = mask

     @staticmethod
     def empty() -> 'EncSchemeFields':
         return EncSchemeFields({}, set(), 0)

     @staticmethod
     def from_yaml(yml: object, name: str) -> 'EncSchemeFields':
         if not isinstance(yml, dict):
             raise ValueError('fields for encoding scheme {!r} should be a '
                              'dict, but we saw a {}.'
                              .format(name, type(yml).__name__))

         fields = {}
         op_fields = set()  # type: Set[str]
         mask = 0

         overlaps = 0

         for key, val in yml.items():
             if not isinstance(key, str):
                 raise ValueError('{!r} is a bad key for a field name of '
                                  'encoding scheme {} (should be str, not {}).'
                                  .format(key, name, type(key).__name__))

             fld_what = 'field {!r} of encoding scheme {}'.format(key, name)
             field = EncSchemeField.from_yaml(val, fld_what)

             overlaps |= mask & field.bits.mask
             mask |= field.bits.mask

             fields[key] = field
             if field.value is None:
                 op_fields.add(key)

         if overlaps:
             raise ValueError('Direct fields for encoding scheme {} have '
                              'overlapping ranges (mask: {:#08x})'
                              .format(name, overlaps))

         return EncSchemeFields(fields, op_fields, mask)

     def merge_in(self, right: 'EncSchemeFields', when: str) -> None:
         for name, field in right.fields.items():
             if name in self.fields:
                 raise ValueError('Duplicate field name: {!r} {}.'
                                  .format(name, when))

             overlap = self.mask & field.bits.mask
             if overlap:
                 raise ValueError('Overlapping bit ranges '
                                  '(masks: {:08x} and {:08x} have '
                                  'intersection {:08x}) {}.'
                                  .format(self.mask,
                                          field.bits.mask, overlap, when))

             self.fields[name] = field
             self.mask |= field.bits.mask
             if field.value is None:
                 assert name not in self.op_fields
                 self.op_fields.add(name)


 class EncScheme:
     def __init__(self, yml: object, name: str) -> None:
         what = 'encoding scheme {!r}'.format(name)
         yd = check_keys(yml, what, [], ['parents', 'fields'])

         if not yd:
             raise ValueError('{} has no parents or fields.'.format(what))

         fields_yml = yd.get('fields')
         self.direct_fields = (EncSchemeFields.from_yaml(fields_yml, name)
                               if fields_yml is not None
                               else EncSchemeFields.empty())

         parents_yml = yd.get('parents')
         parents_what = 'parents of {}'.format(what)
         parents = ([EncSchemeImport(y, name)
                     for y in check_list(parents_yml, parents_what)]
                    if parents_yml is not None
                    else [])
         self.parents = index_list(parents_what,
                                   parents,
                                   lambda imp: imp.parent)


 class EncSchemes:
     def __init__(self, yml: object) -> None:
         if not isinstance(yml, dict):
             raise ValueError("value for encoding-schemes is expected to be "
                              "a dict, but was actually a {}."
                              .format(type(yml).__name__))

         self.schemes = {}  # type: Dict[str, EncScheme]
         self.resolved = {}  # type: Dict[str, EncSchemeFields]

         for key, val in yml.items():
             if not isinstance(key, str):
                 raise ValueError('{!r} is a bad key for an encoding scheme '
                                  'name (should be str, not {}).'
                                  .format(key, type(key).__name__))
             self.schemes[key] = EncScheme(val, key)

     def _resolve(self,
                  name: str,
                  user: str,
                  stack: List[str]) -> EncSchemeFields:
         # Have we resolved this before?
         resolved = self.resolved.get(name)
         if resolved is not None:
             return resolved

         # Spot any circular inheritance
         if name in stack:
             raise RuntimeError('Circular inheritance of encoding '
                                'schemes: {}'
                                .format(' -> '.join(stack + [name])))

         # Does the scheme actually exist?
         scheme = self.schemes.get(name)
         if scheme is None:
             raise ValueError('{} requires undefined encoding scheme {!r}.'
                              .format(user, name))

         # Recursively try to resolve each parent scheme, applying any import
         # settings
         resolved_parents = {}
         new_stack = stack + [name]
         what = 'Import list of encoding scheme {!r}'.format(name)
         for pname, pimport in scheme.parents.items():
             resolved = self._resolve(pimport.parent, what, new_stack)
             resolved_parents[pname] = pimport.apply_settings(resolved, what)

         # Now try to merge the resolved imports
         merged = EncSchemeFields.empty()
         parent_names_so_far = []  # type: List[str]
         for pname, pfields in resolved_parents.items():
             when = ('merging fields of scheme {} into '
                     'already merged fields of {}'
                     .format(pname, ', '.join(parent_names_so_far)))
             merged.merge_in(pfields, when)
             parent_names_so_far.append(repr(pname))

         # Now try to merge in any direct fields
         when = ('merging direct fields of scheme {} into fields from parents'
                 .format(name))
         merged.merge_in(scheme.direct_fields, when)

         return merged

     def resolve(self, name: str, mnemonic: str) -> EncSchemeFields:
         fields = self._resolve(name, 'Instruction {!r}'.format(mnemonic), [])

         # Check completeness
         missing = ((1 << 32) - 1) & ~fields.mask
         if missing:
             raise ValueError('Fields for encoding scheme {} miss some bits '
                              '(mask: {:#08x})'
                              .format(name, missing))

         return fields


 class OperandType:
     '''The base class for some sort of operand type'''
     def __init__(self, width: Optional[int]) -> None:
         assert width is None or width > 0
         self.width = width

     def markdown_doc(self) -> Optional[str]:
         '''Generate any (markdown) documentation for this operand type

         The base class returns None, but subclasses might return something
         useful.

         '''
         return None


 class RegOperandType(OperandType):
     '''A class representing a register operand type'''
     TYPE_WIDTHS = {'gpr': 5, 'wdr': 5, 'csr': 12, 'wsr': 8}

     def __init__(self, reg_type: str, is_dest: bool):
         type_width = RegOperandType.TYPE_WIDTHS.get(reg_type)
         assert type_width is not None
         super().__init__(type_width)

         self.reg_type = reg_type
         self.is_dest = is_dest


 class ImmOperandType(OperandType):
     '''A class representing an immediate operand type'''
     def markdown_doc(self) -> Optional[str]:
         # Override from OperandType base class
         if self.width is None:
             return None

         return 'Valid range: `0..{}`'.format((1 << self.width) - 1)


 class EnumOperandType(ImmOperandType):
     '''A class representing an enum operand type'''
     def __init__(self, items: List[str]):
         assert items
         super().__init__(int.bit_length(len(items) - 1))
         self.items = items

     def markdown_doc(self) -> Optional[str]:
         # Override from OperandType base class
         parts = ['Syntax table:\n\n'
                  '| Syntax | Value of immediate |\n'
                  '|--------|--------------------|\n']
         for idx, item in enumerate(self.items):
             parts.append('| `{}` | `{}` |\n'
                          .format(item, idx))
         return ''.join(parts)


 class OptionOperandType(ImmOperandType):
     '''A class representing an option operand type'''
     def __init__(self, option: str):
         super().__init__(1)
         self.option = option

     def markdown_doc(self) -> Optional[str]:
         # Override from OperandType base class
         return 'To specify, use the literal syntax `{}`\n'.format(self.option)


 def parse_operand_type(fmt: str) -> OperandType:
     '''Make sense of the operand type syntax'''
     # Registers
     if fmt == 'grs':
         return RegOperandType('gpr', False)
     if fmt == 'grd':
         return RegOperandType('gpr', True)
     if fmt == 'wrs':
         return RegOperandType('wdr', False)
     if fmt == 'wrd':
         return RegOperandType('wdr', True)
     if fmt == 'csr':
         return RegOperandType('csr', True)
     if fmt == 'wsr':
         return RegOperandType('wsr', True)

     # Immediates
     if fmt == 'imm':
         return ImmOperandType(None)
     m = re.match(r'imm([1-9][0-9]*)$', fmt)
     if m:
         return ImmOperandType(int(m.group(1)))
     m = re.match(r'enum\(([^\)]+)\)$', fmt)
     if m:
         return EnumOperandType([item.strip()
                                 for item in m.group(1).split(',')])
     m = re.match(r'option\(([^\)]+)\)$', fmt)
     if m:
         return OptionOperandType(m.group(1).strip())

     raise ValueError("Operand type description {!r} "
                      "didn't match any recognised format."
                      .format(fmt))


 def infer_operand_type(name: str) -> OperandType:
     '''Try to guess an operand's type from its name'''

     if re.match(r'grs[0-9]*$', name):
         return parse_operand_type('grs')
     if name in ['grd', 'wrd', 'csr', 'wsr']:
         return parse_operand_type(name)
     if re.match(r'wrs[0-9]*$', name):
         return parse_operand_type('wrs')
     if re.match(r'imm[0-9]*$', name):
         return parse_operand_type('imm')
     if name == 'offset':
         return parse_operand_type('imm')

     raise ValueError("Operand name {!r} doesn't imply an operand type: "
                      "you'll have to set the type explicitly."
                      .format(name))


 def make_operand_type(yml: object, operand_name: str) -> OperandType:
     '''Construct a type for an operand

     This is either based on the type, if given, or inferred from the name
     otherwise.

     '''
     return (parse_operand_type(check_str(yml,
                                          'type for {} operand'
                                          .format(operand_name)))
             if yml is not None
             else infer_operand_type(operand_name))


 def get_optional_str(data: Dict[str, object],
                      key: str, what: str) -> Optional[str]:
     return check_optional_str(data.get(key), '{} field for {}'.format(key, what))


 class Operand:
     def __init__(self, yml: object, insn_name: str) -> None:
         # The YAML representation should be a string (a bare operand name) or a
         # dict.
         what = 'operand for {!r} instruction'.format(insn_name)
         if isinstance(yml, str):
             name = yml
             op_type = None
             doc = None
         elif isinstance(yml, dict):
             yd = check_keys(yml, what, ['name'], ['type', 'doc'])
             name = check_str(yd['name'], 'name of ' + what)

             op_what = '{!r} {}'.format(name, what)
             op_type = get_optional_str(yd, 'type', op_what)
             doc = get_optional_str(yd, 'doc', op_what)

         op_what = '{!r} {}'.format(name, what)
         self.name = name
         self.op_type = make_operand_type(op_type, name)
         self.doc = doc


 class InsnSyntax:
     def __init__(self, raw: str) -> None:
         # The raw syntax looks something like "<foo> + <bar> (baz <qux>)". We
         # need to check that each <..> holds an operand name. We want to
         # tokenize the string to split out the operands. The easiest way to
         # encode this in the types is as a string followed by zero or more
         # pairs, (operand, string).
         #
         # Conveniently, re.split does exactly what we need, always yielding an
         # odd number of parts and starting with an empty string if there's a
         # match at the start.
         parts = re.split(r'<([^>]+)>', raw)
         self.prefix = parts[0]
         self.pairs = list(zip(parts[1::2], parts[2::2]))

         assert len(parts) == 1 + 2 * len(self.pairs)

         # Collect up the named operands that we've seen, checking for
         # duplicates
         self.operands = set()  # type: Set[str]
         for operand, _ in self.pairs:
             if operand in self.operands:
                 raise ValueError('Instruction syntax ({!r}) has duplicate '
                                  'occurrence of the {!r} operand.'
                                  .format(raw, operand))
             self.operands.add(operand)

     def raw_string(self) -> str:
         '''Return the raw string of the syntax'''
         parts = [self.prefix]
         for operand, suffix in self.pairs:
             parts.append('<{}>'.format(operand))
             parts.append(suffix)
         return ''.join(parts)


 class EncodingField:
     '''A single element of an encoding's mapping'''
     def __init__(self,
                  value: Union[BoolLiteral, str],
                  scheme_field: EncSchemeField) -> None:
         self.value = value
         self.scheme_field = scheme_field

     @staticmethod
     def from_yaml(as_str: str,
                   scheme_field: EncSchemeField,
                   name_to_operand: Dict[str, Operand],
                   what: str) -> 'EncodingField':
         # The value should either be a boolean literal ("000xx11" or similar)
         # or should be a name, which is taken as the name of an operand.
         if not as_str:
             raise ValueError('Empty string as {}.'.format(what))

         # Set self.value to be either the bool literal or the name of the
         # operand.
         value_width = None
         value = ''  # type: Union[BoolLiteral, str]
         if re.match(r'b[01x_]+$', as_str):
             value = BoolLiteral(as_str, what)
             value_width = value.width
             value_type = 'a literal value'
         else:
             operand = name_to_operand.get(as_str)
             if operand is None:
                 raise ValueError('Unknown operand, {!r}, as {}'
                                  .format(as_str, what))
             value_width = operand.op_type.width
             value = as_str
             value_type = 'an operand'

         # Unless we had an operand of type 'imm' (unknown width), we now have
         # an expected width. Check it matches the width of the schema field.
         if value_width is not None:
             if scheme_field.bits.width != value_width:
                 raise ValueError('{} is mapped to {} with width {}, but the '
                                  'encoding schema field has width {}.'
                                  .format(what, value_type, value_width,
                                          scheme_field.bits.width))

         # Track the scheme field as well (so we don't have to keep track of a
         # scheme once we've made an encoding object)
         return EncodingField(value, scheme_field)


 class Encoding:
     '''The encoding for an instruction'''
     def __init__(self,
                  yml: object,
                  schemes: EncSchemes,
                  name_to_operand: Dict[str, Operand],
                  mnemonic: str):
         what = 'encoding for instruction {!r}'.format(mnemonic)
         yd = check_keys(yml, what, ['scheme', 'mapping'], [])

         scheme_what = 'encoding scheme for instruction {!r}'.format(mnemonic)
         scheme_name = check_str(yd['scheme'], scheme_what)
         scheme_fields = schemes.resolve(scheme_name, mnemonic)

         what = 'encoding mapping for instruction {!r}'.format(mnemonic)

         # Check we've got exactly the right fields for the scheme
         ydm = check_keys(yd['mapping'], what, list(scheme_fields.op_fields), [])

         # Track the set of operand names that were used in some field
         operands_used = set()

         self.fields = {}
         for field_name, scheme_field in scheme_fields.fields.items():
             if scheme_field.value is not None:
                 field = EncodingField(scheme_field.value, scheme_field)
             else:
                 field_what = ('value for {} field in encoding for instruction {!r}'
                               .format(field_name, mnemonic))
                 field = EncodingField.from_yaml(check_str(ydm[field_name], field_what),
                                                 scheme_fields.fields[field_name],
                                                 name_to_operand,
                                                 field_what)

                 # If the field's value is an operand rather than a literal, it
                 # will have type str. Track the operands that we've used.
                 if isinstance(field.value, str):
                     operands_used.add(field.value)

             self.fields[field_name] = field

         # We know that every field in the encoding scheme has a value. But we
         # still need to check that every operand ended up in some field.
         assert operands_used <= set(name_to_operand.keys())
         unused_ops = set(name_to_operand.keys()) - operands_used
         if unused_ops:
             raise ValueError('Not all operands used in {} (missing: {}).'
                              .format(what, ', '.join(list(unused_ops))))

     def get_masks(self) -> Tuple[int, int]:
         '''Return zeros/ones masks for encoding

         Returns a pair (m0, m1) where m0 is the "zeros mask": a mask where a
         bit is set if there is an bit pattern matching this encoding with that
         bit zero. m1 is the ones mask: equivalent, but for that bit one.

         '''
         m0 = 0
         m1 = 0
         for field_name, field in self.fields.items():
             if isinstance(field.value, str):
                 m0 |= field.scheme_field.bits.mask
                 m1 |= field.scheme_field.bits.mask
             else:
                 # Match up the bits in the value with the ranges in the scheme.
                 assert field.value.width > 0
                 assert field.value.width == field.scheme_field.bits.width
                 bits_seen = 0
                 for msb, lsb in field.scheme_field.bits.ranges:
                     val_msb = field.scheme_field.bits.width - 1 - bits_seen
                     val_lsb = val_msb - msb + lsb
                     bits_seen += msb - lsb + 1

                     for idx in range(0, msb - lsb + 1):
                         desc = field.value.char_for_bit(val_lsb + idx)
                         if desc in ['0', 'x']:
                             m0 |= 1 << (idx + lsb)
                         if desc in ['1', 'x']:
                             m1 |= 1 << (idx + lsb)

         all_bits = (1 << 32) - 1
         assert (m0 | m1) == all_bits
         return (m0, m1)


 class Insn:
     def __init__(self,
                  yml: object,
                  groups: InsnGroups,
                  encoding_schemes: EncSchemes) -> None:
         yd = check_keys(yml, 'instruction',
                         ['mnemonic', 'operands'],
                         ['group', 'rv32i', 'synopsis',
                          'syntax', 'doc', 'note', 'trailing-doc',
                          'decode', 'operation', 'encoding', 'glued-ops'])

         self.mnemonic = check_str(yd['mnemonic'], 'mnemonic for instruction')

         what = 'instruction with mnemonic {!r}'.format(self.mnemonic)
         self.operands = [Operand(y, self.mnemonic)
                          for y in check_list(yd['operands'],
                                              'operands for ' + what)]
         self.name_to_operand = index_list('operands for ' + what,
                                           self.operands,
                                           lambda op: op.name)

         raw_group = get_optional_str(yd, 'group', what)
         self.group = groups.default_group() if raw_group is None else raw_group

         if self.group not in groups.key_to_group:
             raise ValueError('Unknown instruction group, {!r}, '
                              'for mnemonic {!r}.'
                              .format(self.group, self.mnemonic))

         self.rv32i = check_bool(yd.get('rv32i', False),
                                 'rv32i flag for ' + what)
         self.glued_ops = check_bool(yd.get('glued-ops', False),
                                     'glued-ops flag for ' + what)
         self.synopsis = get_optional_str(yd, 'synopsis', what)
         self.doc = get_optional_str(yd, 'doc', what)
         self.note = get_optional_str(yd, 'note', what)
         self.trailing_doc = get_optional_str(yd, 'trailing-doc', what)
         self.decode = get_optional_str(yd, 'decode', what)
         self.operation = get_optional_str(yd, 'operation', what)

         raw_syntax = get_optional_str(yd, 'syntax', what)
         self.syntax = None  # type: Optional[InsnSyntax]
         if raw_syntax is not None:
             self.syntax = InsnSyntax(raw_syntax)

             # Make sure we have exactly the operands we expect.
             if set(self.name_to_operand.keys()) != self.syntax.operands:
                 raise ValueError("Operand syntax for {!r} doesn't have the "
                                  "same list of operands as given in the "
                                  "operand list. The syntax uses {}, "
                                  "but the list of operands gives {}."
                                  .format(self.mnemonic,
                                          list(sorted(self.syntax.operands)),
                                          list(sorted(self.name_to_operand))))

         encoding_yml = yd.get('encoding')
         self.encoding = None
         if encoding_yml is not None:
             self.encoding = Encoding(encoding_yml, encoding_schemes,
                                      self.name_to_operand, self.mnemonic)


 def find_ambiguous_encodings(insns: List[Insn]) -> List[Tuple[str, str, int]]:
     '''Check for ambiguous instruction encodings

     Returns a list of ambiguous pairs (mnemonic0, mnemonic1, bits) where
     bits is a bit pattern that would match either instruction.

     '''
     masks = {}
     for insn in insns:
         if insn.encoding is not None:
             masks[insn.mnemonic] = insn.encoding.get_masks()

     ret = []
     for mnem0, mnem1 in itertools.combinations(masks.keys(), 2):
         m00, m01 = masks[mnem0]
         m10, m11 = masks[mnem1]

         # The pair of instructions is ambiguous if a bit pattern might be
         # either instruction. That happens if each bit index is either
         # allowed to be a 0 in both or allowed to be a 1 in both.
         # ambiguous_mask is the set of bits that don't distinguish the
         # instructions from each other.
         m0 = m00 & m10
         m1 = m01 & m11

         ambiguous_mask = m0 | m1
         if ambiguous_mask == (1 << 32) - 1:
             ret.append((mnem0, mnem1, m1 & ~m0))

     return ret


 class InsnsFile:
     def __init__(self, yml: object) -> None:
         yd = check_keys(yml, 'top-level',
                         ['insn-groups', 'encoding-schemes', 'insns'],
                         [])

         self.groups = InsnGroups(yd['insn-groups'])
         self.encoding_schemes = EncSchemes(yd['encoding-schemes'])
         self.insns = [Insn(i, self.groups, self.encoding_schemes)
                       for i in check_list(yd['insns'], 'insns')]
         self.mnemonic_to_insn = index_list('insns', self.insns,
                                            lambda insn: insn.mnemonic)

         ambiguous_encodings = find_ambiguous_encodings(self.insns)
         if ambiguous_encodings:
             ambiguity_msgs = []
             for mnem0, mnem1, bits in ambiguous_encodings:
                 ambiguity_msgs.append('{!r} and {!r} '
                                       'both match bit pattern {:#010x}'
                                       .format(mnem0, mnem1, bits))
             raise ValueError('Ambiguous instruction encodings: ' +
                              ', '.join(ambiguity_msgs))

     def grouped_insns(self) -> List[Tuple[InsnGroup, List[Insn]]]:
         '''Return the instructions in groups'''
         grp_to_insns = {}  # type: Dict[str, List[Insn]]
         for insn in self.insns:
             grp_to_insns.setdefault(insn.group, []).append(insn)

         ret = []
         for grp in self.groups.groups:
             ret.append((grp, grp_to_insns.get(grp.key, [])))

         # We should have picked up all the instructions, because we checked
         # that each instruction has a valid group in the Insn constructor. Just
         # in case something went wrong, check that the counts match.
         gti_count = sum(len(insns) for insns in grp_to_insns.values())
         ret_count = sum(len(insns) for _, insns in ret)
         assert ret_count == gti_count

         return ret


 def load_file(path: str) -> InsnsFile:
     '''Load the YAML file at path.

     Raises a RuntimeError on syntax or schema error.

     '''
     try:
         with open(path, 'r') as handle:
             return InsnsFile(yaml.load(handle, Loader=yaml.SafeLoader))
     except yaml.YAMLError as err:
         raise RuntimeError('Failed to parse YAML file at {!r}: {}'
                            .format(path, err)) from None
     except ValueError as err:
         raise RuntimeError('Invalid schema in YAML file at {!r}: {}'
                            .format(path, err)) from None