[otbn] Split up insn_yaml.py
This was getting a bit unwieldy: split it into smaller files.
Signed-off-by: Rupert Swarbrick <rswarbrick@lowrisc.org>
diff --git a/hw/ip/otbn/util/Makefile b/hw/ip/otbn/util/Makefile
index c3569d9..ec93281 100644
--- a/hw/ip/otbn/util/Makefile
+++ b/hw/ip/otbn/util/Makefile
@@ -14,7 +14,7 @@
$(build-dir) $(cs-build-dir) $(lint-build-dir):
mkdir -p $@
-pylibs := shared/insn_yaml.py shared/mem_layout.py
+pylibs := $(wildcard shared/*.py)
pyscripts := yaml_to_doc.py otbn-as otbn-ld otbn-objdump
lint-stamps := $(foreach s,$(pyscripts),$(lint-build-dir)/$(s).stamp)
diff --git a/hw/ip/otbn/util/otbn-as b/hw/ip/otbn/util/otbn-as
index 23e8117..6698e26 100755
--- a/hw/ip/otbn/util/otbn-as
+++ b/hw/ip/otbn/util/otbn-as
@@ -24,8 +24,10 @@
import tempfile
from typing import Dict, List, Optional, Set, TextIO, Tuple
-from shared.insn_yaml import (BitRanges, Encoding, Insn, InsnsFile, Operand,
- RegOperandType, load_file)
+from shared.bit_ranges import BitRanges
+from shared.encoding import Encoding
+from shared.insn_yaml import Insn, InsnsFile, load_file
+from shared.operand import RegOperandType, Operand
class RVFmt:
diff --git a/hw/ip/otbn/util/otbn-objdump b/hw/ip/otbn/util/otbn-objdump
index 0cc2ac4..3ffc924 100755
--- a/hw/ip/otbn/util/otbn-objdump
+++ b/hw/ip/otbn/util/otbn-objdump
@@ -11,7 +11,8 @@
import sys
from typing import Dict, List, Optional, Tuple
-from shared.insn_yaml import Encoding, Insn, InsnsFile, load_file
+from shared.encoding import Encoding
+from shared.insn_yaml import Insn, InsnsFile, load_file
def snoop_disasm_flags(argv: List[str]) -> bool:
diff --git a/hw/ip/otbn/util/shared/bit_ranges.py b/hw/ip/otbn/util/shared/bit_ranges.py
new file mode 100644
index 0000000..56c3d69
--- /dev/null
+++ b/hw/ip/otbn/util/shared/bit_ranges.py
@@ -0,0 +1,113 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+import re
+from typing import List, Tuple
+
+
+class BitRanges:
+ '''Represents the bit ranges used for a field in an encoding scheme'''
+ def __init__(self,
+ mask: int,
+ ranges: List[Tuple[int, int]],
+ width: int) -> None:
+ self.mask = mask
+ self.ranges = ranges
+ self.width = width
+
+ @staticmethod
+ def from_list(ranges: List[Tuple[int, int]]) -> 'BitRanges':
+ mask = 0
+ width = 0
+ for msb, lsb in ranges:
+ assert 0 <= lsb <= msb <= 31
+ rng_mask = (1 << (msb + 1)) - (1 << lsb)
+ assert not (rng_mask & mask)
+ mask |= rng_mask
+ width += msb - lsb + 1
+
+ return BitRanges(mask, ranges, width)
+
+ @staticmethod
+ def from_yaml(as_string: str, what: str) -> 'BitRanges':
+ # ranges ::= range
+ # | range ',' ranges
+ #
+ # range ::= num
+ # | num ':' num
+ #
+ # Ranges are assumed to be msb:lsb (with msb >= lsb). Bit indices are
+ # at most 31 and ranges are disjoint.
+
+ if not as_string:
+ raise ValueError('Empty string as bits for {}'.format(what))
+
+ overlaps = 0
+
+ mask = 0
+ ranges = []
+ width = 0
+
+ for rng in as_string.split(','):
+ match = re.match(r'([0-9]+)(?:-([0-9]+))?$', rng)
+ if match is None:
+ raise ValueError('Range {!r} in bits for {} is malformed.'
+ .format(rng, what))
+
+ msb = int(match.group(1))
+ maybe_lsb = match.group(2)
+ lsb = msb if maybe_lsb is None else int(maybe_lsb)
+
+ if msb < lsb:
+ raise ValueError('Range {!r} in bits for {} has msb < lsb.'
+ .format(rng, what))
+
+ if msb >= 32:
+ raise ValueError('Range {!r} in bits for {} has msb >= 32.'
+ .format(rng, what))
+
+ rng_mask = (1 << (msb + 1)) - (1 << lsb)
+ overlaps |= rng_mask & mask
+ mask |= rng_mask
+
+ ranges.append((msb, lsb))
+ width += msb - lsb + 1
+
+ if overlaps:
+ raise ValueError('Bits for {} have overlapping ranges '
+ '(mask: {:#08x})'
+ .format(what, overlaps))
+
+ return BitRanges(mask, ranges, width)
+
+ def __eq__(self, other: object) -> bool:
+ return isinstance(other, BitRanges) and self.ranges == other.ranges
+
+ def encode(self, value: int) -> int:
+ '''Encode the given value as bit fields'''
+ ret = 0
+ bits_taken = 0
+ for msb, lsb in self.ranges:
+ rng_width = msb - lsb + 1
+ value_msb = self.width - 1 - bits_taken
+ value_lsb = value_msb - rng_width + 1
+
+ rng_mask = (1 << rng_width) - 1
+ rng_value = (value >> value_lsb) & rng_mask
+ ret |= rng_value << lsb
+ bits_taken += rng_width
+
+ assert bits_taken == self.width
+ return ret
+
+ def decode(self, raw: int) -> int:
+ '''Extract the bit fields from the given value'''
+ ret = 0
+ for msb, lsb in self.ranges:
+ width = msb - lsb + 1
+ mask = (1 << width) - 1
+
+ ret <<= width
+ ret |= (raw >> lsb) & mask
+ return ret
diff --git a/hw/ip/otbn/util/shared/bool_literal.py b/hw/ip/otbn/util/shared/bool_literal.py
new file mode 100644
index 0000000..78a8b97
--- /dev/null
+++ b/hw/ip/otbn/util/shared/bool_literal.py
@@ -0,0 +1,66 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+
+class BoolLiteral:
+ '''Represents a boolean literal, with possible 'x characters
+
+ We represent this as 2 masks: "ones" and "xs". The ones mask is the bits
+ that are marked 1. The xs mask is the bits that are marked x. Then you can
+ test whether a particular value matches the literal by zeroing all bits in
+ the x mask and then comparing with the ones mask.
+
+ '''
+ def __init__(self, ones: int, xs: int, width: int) -> None:
+ assert width > 0
+ assert (ones >> width) == 0
+ assert (xs >> width) == 0
+
+ self.ones = ones
+ self.xs = xs
+ self.width = width
+
+ @staticmethod
+ def from_string(as_string: str, what: str) -> 'BoolLiteral':
+ ones = 0
+ xs = 0
+ width = 0
+
+ # The literal should always start with a 'b'
+ if not as_string.startswith('b'):
+ raise ValueError("Boolean literal for {} doesn't start with a 'b'."
+ .format(what))
+
+ for char in as_string[1:]:
+ if char == '_':
+ continue
+
+ ones <<= 1
+ xs <<= 1
+ width += 1
+
+ if char == '0':
+ continue
+ elif char == '1':
+ ones |= 1
+ elif char == 'x':
+ xs |= 1
+ else:
+ raise ValueError('Boolean literal for {} has '
+ 'unsupported character: {!r}.'
+ .format(what, char))
+
+ if not width:
+ raise ValueError('Empty boolean literal for {}.'.format(what))
+
+ return BoolLiteral(ones, xs, width)
+
+ def char_for_bit(self, bit: int) -> str:
+ '''Return 0, 1 or x for the bit at the given position'''
+ assert bit < self.width
+ if (self.ones >> bit) & 1:
+ return '1'
+ if (self.xs >> bit) & 1:
+ return 'x'
+ return '0'
diff --git a/hw/ip/otbn/util/shared/encoding.py b/hw/ip/otbn/util/shared/encoding.py
new file mode 100644
index 0000000..9d2945a
--- /dev/null
+++ b/hw/ip/otbn/util/shared/encoding.py
@@ -0,0 +1,203 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+import re
+from typing import Dict, Tuple, Union
+
+from .bool_literal import BoolLiteral
+from .encoding_scheme import EncSchemeField, EncSchemes
+from .operand import Operand
+from .yaml_parse_helpers import check_keys, check_str
+
+
+class EncodingField:
+ '''A single element of an encoding's mapping'''
+ def __init__(self,
+ value: Union[BoolLiteral, str],
+ scheme_field: EncSchemeField) -> None:
+ self.value = value
+ self.scheme_field = scheme_field
+
+ @staticmethod
+ def from_yaml(as_str: str,
+ scheme_field: EncSchemeField,
+ name_to_operand: Dict[str, Operand],
+ what: str) -> 'EncodingField':
+ # The value should either be a boolean literal ("000xx11" or similar)
+ # or should be a name, which is taken as the name of an operand.
+ if not as_str:
+ raise ValueError('Empty string as {}.'.format(what))
+
+ # Set self.value to be either the bool literal or the name of the
+ # operand.
+ value_width = None
+ value = '' # type: Union[BoolLiteral, str]
+ if re.match(r'b[01x_]+$', as_str):
+ value = BoolLiteral.from_string(as_str, what)
+ value_width = value.width
+ value_type = 'a literal value'
+ else:
+ operand = name_to_operand.get(as_str)
+ if operand is None:
+ raise ValueError('Unknown operand, {!r}, as {}'
+ .format(as_str, what))
+ value_width = operand.op_type.width
+ value = as_str
+ value_type = 'an operand'
+
+ # Unless we had an operand of type 'imm' (unknown width), we now have
+ # an expected width. Check it matches the width of the schema field.
+ if value_width is not None:
+ if scheme_field.bits.width != value_width:
+ raise ValueError('{} is mapped to {} with width {}, but the '
+ 'encoding schema field has width {}.'
+ .format(what, value_type, value_width,
+ scheme_field.bits.width))
+
+ # Track the scheme field as well (so we don't have to keep track of a
+ # scheme once we've made an encoding object)
+ return EncodingField(value, scheme_field)
+
+
+class Encoding:
+ '''The encoding for an instruction'''
+ def __init__(self,
+ yml: object,
+ schemes: EncSchemes,
+ name_to_operand: Dict[str, Operand],
+ mnemonic: str):
+ what = 'encoding for instruction {!r}'.format(mnemonic)
+ yd = check_keys(yml, what, ['scheme', 'mapping'], [])
+
+ scheme_what = 'encoding scheme for instruction {!r}'.format(mnemonic)
+ scheme_name = check_str(yd['scheme'], scheme_what)
+ scheme_fields = schemes.resolve(scheme_name, mnemonic)
+
+ what = 'encoding mapping for instruction {!r}'.format(mnemonic)
+
+ # Check we've got exactly the right fields for the scheme
+ ydm = check_keys(yd['mapping'], what, list(scheme_fields.op_fields), [])
+
+ # Track the set of operand names that were used in some field
+ operands_used = set()
+
+ self.fields = {}
+ for field_name, scheme_field in scheme_fields.fields.items():
+ if scheme_field.value is not None:
+ field = EncodingField(scheme_field.value, scheme_field)
+ else:
+ field_what = ('value for {} field in encoding for instruction {!r}'
+ .format(field_name, mnemonic))
+ field = EncodingField.from_yaml(check_str(ydm[field_name], field_what),
+ scheme_fields.fields[field_name],
+ name_to_operand,
+ field_what)
+
+ # If the field's value is an operand rather than a literal, it
+ # will have type str. Track the operands that we've used.
+ if isinstance(field.value, str):
+ operands_used.add(field.value)
+
+ self.fields[field_name] = field
+
+ # We know that every field in the encoding scheme has a value. But we
+ # still need to check that every operand ended up in some field.
+ assert operands_used <= set(name_to_operand.keys())
+ unused_ops = set(name_to_operand.keys()) - operands_used
+ if unused_ops:
+ raise ValueError('Not all operands used in {} (missing: {}).'
+ .format(what, ', '.join(list(unused_ops))))
+
+ def get_masks(self) -> Tuple[int, int]:
+ '''Return zeros/ones masks for encoding
+
+ Returns a pair (m0, m1) where m0 is the "zeros mask": a mask where a
+ bit is set if there is an bit pattern matching this encoding with that
+ bit zero. m1 is the ones mask: equivalent, but for that bit one.
+
+ '''
+ m0 = 0
+ m1 = 0
+ for field_name, field in self.fields.items():
+ if isinstance(field.value, str):
+ m0 |= field.scheme_field.bits.mask
+ m1 |= field.scheme_field.bits.mask
+ else:
+ # Match up the bits in the value with the ranges in the scheme.
+ assert field.value.width > 0
+ assert field.value.width == field.scheme_field.bits.width
+ bits_seen = 0
+ for msb, lsb in field.scheme_field.bits.ranges:
+ val_msb = field.scheme_field.bits.width - 1 - bits_seen
+ val_lsb = val_msb - msb + lsb
+ bits_seen += msb - lsb + 1
+
+ for idx in range(0, msb - lsb + 1):
+ desc = field.value.char_for_bit(val_lsb + idx)
+ if desc in ['0', 'x']:
+ m0 |= 1 << (idx + lsb)
+ if desc in ['1', 'x']:
+ m1 |= 1 << (idx + lsb)
+
+ all_bits = (1 << 32) - 1
+ assert (m0 | m1) == all_bits
+ return (m0, m1)
+
+ def get_ones_mask(self) -> int:
+ '''Return the mask of fixed bits that are set
+
+ For literal values of x (unused bits in the encoding), we'll prefer
+ '0'.
+
+ '''
+ m0, m1 = self.get_masks()
+ return m1 & ~m0
+
+ def assemble(self, op_to_idx: Dict[str, int]) -> int:
+ '''Assemble an instruction
+
+ op_to_idx should map each operand in the encoding to some integer
+ index, which should be small enough to fit in the width of the
+ operand's type and should be representable after any shift. Will raise
+ a ValueError if not.
+
+ '''
+ val = self.get_ones_mask()
+ for field_name, field in self.fields.items():
+ if not isinstance(field.value, str):
+ # We've done this field already (in get_ones_mask)
+ continue
+
+ # Try to get the operand value for the field. If this is an
+ # optional operand, we might not have one, and just encode zero.
+ field_val = op_to_idx.get(field.value, 0)
+
+ # Are there any low bits that shouldn't be there?
+ shift_mask = (1 << field.scheme_field.shift) - 1
+ if field_val & shift_mask:
+ raise ValueError("operand field {} has a shift of {}, "
+ "so can't represent the value {:#x}."
+ .format(field.value,
+ field.scheme_field.shift,
+ field_val))
+
+ shifted = field_val >> field.scheme_field.shift
+
+ # Is the number too big? At the moment, we are assuming immediates
+ # are unsigned (because the OTBN big number instructions all have
+ # unsigned immediates).
+ if shifted >> field.scheme_field.bits.width:
+ shift_msg = ((' (shifted right by {} bits from {:#x})'
+ .format(field.scheme_field.shift, field_val))
+ if field.scheme_field.shift
+ else '')
+ raise ValueError("operand field {} has a width of {}, "
+ "so can't represent the value {:#x}{}."
+ .format(field.value,
+ field.scheme_field.bits.width,
+ shifted, shift_msg))
+
+ val |= field.scheme_field.bits.encode(shifted)
+
+ return val
diff --git a/hw/ip/otbn/util/shared/encoding_scheme.py b/hw/ip/otbn/util/shared/encoding_scheme.py
new file mode 100644
index 0000000..0e6d4ce
--- /dev/null
+++ b/hw/ip/otbn/util/shared/encoding_scheme.py
@@ -0,0 +1,356 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+'''Code for handling instruction encoding schemes'''
+
+import re
+from typing import Dict, List, Optional, Set
+
+from .bit_ranges import BitRanges
+from .bool_literal import BoolLiteral
+from .yaml_parse_helpers import check_keys, check_str, check_list, index_list
+
+
+class EncSchemeField:
+ '''Represents a single field in an encoding scheme'''
+ def __init__(self,
+ bits: BitRanges,
+ value: Optional[BoolLiteral],
+ shift: int) -> None:
+ self.bits = bits
+ self.value = value
+ self.shift = shift
+
+ @staticmethod
+ def from_yaml(yml: object, what: str) -> 'EncSchemeField':
+ # This is either represented as a dict in the YAML or as a bare string.
+ bits_what = 'bits for {}'.format(what)
+ value_what = 'value for {}'.format(what)
+ shift_what = 'shift for {}'.format(what)
+
+ shift = 0
+
+ if isinstance(yml, dict):
+ yd = check_keys(yml, what, ['bits'], ['value', 'shift'])
+
+ bits_yml = yd['bits']
+ if not (isinstance(bits_yml, str) or isinstance(bits_yml, int)):
+ raise ValueError('{} is of type {}, not a string or int.'
+ .format(bits_what, type(bits_yml).__name__))
+
+ # We require value to be given as a string because it's supposed to
+ # be in base 2, and PyYAML will parse 111 as one-hundred and
+ # eleven, 011 as 9 and 0x11 as 17. Aargh!
+ raw_value = None
+ val_yml = yd.get('value')
+ if val_yml is not None:
+ if not isinstance(val_yml, str):
+ raise ValueError("{} is of type {}, but must be a string "
+ "(we don't allow automatic conversion "
+ "because YAML's int conversion assumes "
+ "base 10 and value should be in base 2)."
+ .format(value_what,
+ type(val_yml).__name__))
+ raw_value = val_yml
+
+ # shift, on the other hand, is written in base 10. Allow an
+ # integer.
+ shift_yml = yd.get('shift')
+ if shift_yml is None:
+ pass
+ elif isinstance(shift_yml, str):
+ if not re.match(r'[0-9]+$', shift_yml):
+ raise ValueError('{} is {!r} but should be a '
+ 'non-negative integer.'
+ .format(shift_what, shift_yml))
+ shift = int(shift_yml)
+ elif isinstance(shift_yml, int):
+ if shift_yml < 0:
+ raise ValueError('{} is {!r} but should be a '
+ 'non-negative integer.'
+ .format(shift_what, shift_yml))
+ shift = shift_yml
+ else:
+ raise ValueError("{} is of type {}, but must be a string "
+ "or non-negative integer."
+ .format(shift_what, type(shift_yml).__name__))
+ elif isinstance(yml, str) or isinstance(yml, int):
+ bits_yml = yml
+ raw_value = None
+ else:
+ raise ValueError('{} is a {}, but should be a '
+ 'dict, string or integer.'
+ .format(what, type(yml).__name__))
+
+ # The bits field is usually parsed as a string ("10-4", or similar).
+ # But if it's a bare integer then YAML will parse it as an int. That's
+ # fine, but we turn it back into a string to be re-parsed by BitRanges.
+ assert isinstance(bits_yml, str) or isinstance(bits_yml, int)
+
+ bits = BitRanges.from_yaml(str(bits_yml), bits_what)
+ value = None
+ if raw_value is not None:
+ value = BoolLiteral.from_string(raw_value, value_what)
+ if bits.width != value.width:
+ raise ValueError('{} has bits that imply a width of {}, but '
+ 'a value with width {}.'
+ .format(what, bits.width, value.width))
+
+ return EncSchemeField(bits, value, shift)
+
+
+class EncSchemeImport:
+ '''An object representing inheritance of a parent scheme
+
+ When importing a parent scheme, we can set some of its fields with
+ immediate values. These are stored in the settings field.
+
+ '''
+ def __init__(self, yml: object, importer_name: str) -> None:
+ as_str = check_str(yml,
+ 'value for import in encoding scheme {!r}'
+ .format(importer_name))
+
+ # The supported syntax is
+ #
+ # - parent0(field0=b111, field1=b10)
+ # - parent1()
+ # - parent2
+
+ match = re.match(r'([^ (]+)[ ]*(?:\(([^)]+)\))?$', as_str)
+ if not match:
+ raise ValueError('Malformed encoding scheme '
+ 'inheritance by scheme {!r}: {!r}.'
+ .format(importer_name, as_str))
+
+ self.parent = match.group(1)
+ self.settings = {} # type: Dict[str, BoolLiteral]
+
+ when = ('When inheriting from {!r} in encoding scheme {!r}'
+ .format(self.parent, importer_name))
+
+ if match.group(2) is not None:
+ args = match.group(2).split(',')
+ for arg in args:
+ arg = arg.strip()
+ arg_parts = arg.split('=')
+ if len(arg_parts) != 2:
+ raise ValueError('{}, found an argument with {} '
+ 'equals signs (should have exactly one).'
+ .format(when, len(arg_parts) - 1))
+
+ field_name = arg_parts[0]
+ field_what = ('literal value for field {!r} when inheriting '
+ 'from {!r} in encoding scheme {!r}'
+ .format(arg_parts[0], self.parent, importer_name))
+ field_value = BoolLiteral.from_string(arg_parts[1], field_what)
+
+ if field_name in self.settings:
+ raise ValueError('{}, found multiple arguments assigning '
+ 'values to the field {!r}.'
+ .format(when, field_name))
+
+ self.settings[field_name] = field_value
+
+ def apply_settings(self,
+ esf: 'EncSchemeFields', what: str) -> 'EncSchemeFields':
+ # Copy and set values in anything that has a setting
+ fields = {}
+ for name, literal in self.settings.items():
+ old_field = esf.fields.get(name)
+ if old_field is None:
+ raise ValueError('{} sets unknown field {!r} from {!r}.'
+ .format(what, name, self.parent))
+
+ if old_field.bits.width != literal.width:
+ raise ValueError('{} sets field {!r} from {!r} with a literal '
+ 'of width {}, but the field has width {}.'
+ .format(what, name, self.parent,
+ literal.width, old_field.bits.width))
+
+ fields[name] = EncSchemeField(old_field.bits,
+ literal,
+ old_field.shift)
+
+ # Copy anything else
+ op_fields = set()
+ for name, old_field in esf.fields.items():
+ if name in fields:
+ continue
+ op_fields.add(name)
+ fields[name] = old_field
+
+ return EncSchemeFields(fields, op_fields, esf.mask)
+
+
+class EncSchemeFields:
+ '''An object representing some fields in an encoding scheme'''
+ def __init__(self,
+ fields: Dict[str, EncSchemeField],
+ op_fields: Set[str],
+ mask: int) -> None:
+ self.fields = fields
+ self.op_fields = op_fields
+ self.mask = mask
+
+ @staticmethod
+ def empty() -> 'EncSchemeFields':
+ return EncSchemeFields({}, set(), 0)
+
+ @staticmethod
+ def from_yaml(yml: object, name: str) -> 'EncSchemeFields':
+ if not isinstance(yml, dict):
+ raise ValueError('fields for encoding scheme {!r} should be a '
+ 'dict, but we saw a {}.'
+ .format(name, type(yml).__name__))
+
+ fields = {}
+ op_fields = set() # type: Set[str]
+ mask = 0
+
+ overlaps = 0
+
+ for key, val in yml.items():
+ if not isinstance(key, str):
+ raise ValueError('{!r} is a bad key for a field name of '
+ 'encoding scheme {} (should be str, not {}).'
+ .format(key, name, type(key).__name__))
+
+ fld_what = 'field {!r} of encoding scheme {}'.format(key, name)
+ field = EncSchemeField.from_yaml(val, fld_what)
+
+ overlaps |= mask & field.bits.mask
+ mask |= field.bits.mask
+
+ fields[key] = field
+ if field.value is None:
+ op_fields.add(key)
+
+ if overlaps:
+ raise ValueError('Direct fields for encoding scheme {} have '
+ 'overlapping ranges (mask: {:#08x})'
+ .format(name, overlaps))
+
+ return EncSchemeFields(fields, op_fields, mask)
+
+ def merge_in(self, right: 'EncSchemeFields', when: str) -> None:
+ for name, field in right.fields.items():
+ if name in self.fields:
+ raise ValueError('Duplicate field name: {!r} {}.'
+ .format(name, when))
+
+ overlap = self.mask & field.bits.mask
+ if overlap:
+ raise ValueError('Overlapping bit ranges '
+ '(masks: {:08x} and {:08x} have '
+ 'intersection {:08x}) {}.'
+ .format(self.mask,
+ field.bits.mask, overlap, when))
+
+ self.fields[name] = field
+ self.mask |= field.bits.mask
+ if field.value is None:
+ assert name not in self.op_fields
+ self.op_fields.add(name)
+
+
+class EncScheme:
+ def __init__(self, yml: object, name: str) -> None:
+ what = 'encoding scheme {!r}'.format(name)
+ yd = check_keys(yml, what, [], ['parents', 'fields'])
+
+ if not yd:
+ raise ValueError('{} has no parents or fields.'.format(what))
+
+ fields_yml = yd.get('fields')
+ self.direct_fields = (EncSchemeFields.from_yaml(fields_yml, name)
+ if fields_yml is not None
+ else EncSchemeFields.empty())
+
+ parents_yml = yd.get('parents')
+ parents_what = 'parents of {}'.format(what)
+ parents = ([EncSchemeImport(y, name)
+ for y in check_list(parents_yml, parents_what)]
+ if parents_yml is not None
+ else [])
+ self.parents = index_list(parents_what,
+ parents,
+ lambda imp: imp.parent)
+
+
+class EncSchemes:
+ def __init__(self, yml: object) -> None:
+ if not isinstance(yml, dict):
+ raise ValueError("value for encoding-schemes is expected to be "
+ "a dict, but was actually a {}."
+ .format(type(yml).__name__))
+
+ self.schemes = {} # type: Dict[str, EncScheme]
+ self.resolved = {} # type: Dict[str, EncSchemeFields]
+
+ for key, val in yml.items():
+ if not isinstance(key, str):
+ raise ValueError('{!r} is a bad key for an encoding scheme '
+ 'name (should be str, not {}).'
+ .format(key, type(key).__name__))
+ self.schemes[key] = EncScheme(val, key)
+
+ def _resolve(self,
+ name: str,
+ user: str,
+ stack: List[str]) -> EncSchemeFields:
+ # Have we resolved this before?
+ resolved = self.resolved.get(name)
+ if resolved is not None:
+ return resolved
+
+ # Spot any circular inheritance
+ if name in stack:
+ raise RuntimeError('Circular inheritance of encoding '
+ 'schemes: {}'
+ .format(' -> '.join(stack + [name])))
+
+ # Does the scheme actually exist?
+ scheme = self.schemes.get(name)
+ if scheme is None:
+ raise ValueError('{} requires undefined encoding scheme {!r}.'
+ .format(user, name))
+
+ # Recursively try to resolve each parent scheme, applying any import
+ # settings
+ resolved_parents = {}
+ new_stack = stack + [name]
+ what = 'Import list of encoding scheme {!r}'.format(name)
+ for pname, pimport in scheme.parents.items():
+ resolved = self._resolve(pimport.parent, what, new_stack)
+ resolved_parents[pname] = pimport.apply_settings(resolved, what)
+
+ # Now try to merge the resolved imports
+ merged = EncSchemeFields.empty()
+ parent_names_so_far = [] # type: List[str]
+ for pname, pfields in resolved_parents.items():
+ when = ('merging fields of scheme {} into '
+ 'already merged fields of {}'
+ .format(pname, ', '.join(parent_names_so_far)))
+ merged.merge_in(pfields, when)
+ parent_names_so_far.append(repr(pname))
+
+ # Now try to merge in any direct fields
+ when = ('merging direct fields of scheme {} into fields from parents'
+ .format(name))
+ merged.merge_in(scheme.direct_fields, when)
+
+ return merged
+
+ def resolve(self, name: str, mnemonic: str) -> EncSchemeFields:
+ fields = self._resolve(name, 'Instruction {!r}'.format(mnemonic), [])
+
+ # Check completeness
+ missing = ((1 << 32) - 1) & ~fields.mask
+ if missing:
+ raise ValueError('Fields for encoding scheme {} miss some bits '
+ '(mask: {:#08x})'
+ .format(name, missing))
+
+ return fields
diff --git a/hw/ip/otbn/util/shared/insn_yaml.py b/hw/ip/otbn/util/shared/insn_yaml.py
index 22edb45..a397e6d 100644
--- a/hw/ip/otbn/util/shared/insn_yaml.py
+++ b/hw/ip/otbn/util/shared/insn_yaml.py
@@ -6,118 +6,16 @@
import itertools
import re
-from typing import (Callable, Dict, List, Optional,
- Sequence, Set, Tuple, TypeVar, Union, cast)
+from typing import Dict, List, Optional, Tuple, cast
import yaml
-
-T = TypeVar('T')
-
-
-def check_keys(obj: object,
- what: str,
- required_keys: List[str],
- optional_keys: List[str]) -> Dict[str, object]:
- '''Check that obj is a dict object with the expected keys
-
- If not, raise a ValueError; the what argument names the object.
-
- '''
- if not isinstance(obj, dict):
- raise ValueError("{} is expected to be a dict, but was actually a {}."
- .format(what, type(obj).__name__))
-
- allowed = set()
- missing = []
- for key in required_keys:
- assert key not in allowed
- allowed.add(key)
- if key not in obj:
- missing.append(key)
-
- for key in optional_keys:
- assert key not in allowed
- allowed.add(key)
-
- unexpected = []
- for key in obj:
- if key not in allowed:
- unexpected.append(key)
-
- if missing or unexpected:
- mstr = ('The following required fields were missing: {}.'
- .format(', '.join(missing)) if missing else '')
- ustr = ('The following unexpected fields were found: {}.'
- .format(', '.join(unexpected)) if unexpected else '')
- raise ValueError("{} doesn't have the right keys. {}{}{}"
- .format(what,
- mstr,
- ' ' if mstr and ustr else '',
- ustr))
-
- return obj
-
-
-def check_str(obj: object, what: str) -> str:
- '''Check that the given object is a string
-
- If not, raise a ValueError; the what argument names the object.
-
- '''
- if not isinstance(obj, str):
- raise ValueError('{} is of type {}, not a string.'
- .format(what, type(obj).__name__))
- return obj
-
-
-def check_optional_str(obj: object, what: str) -> Optional[str]:
- '''Check that the given object is a string or None
-
- If not, raise a ValueError; the what argument names the object.
-
- '''
- if obj is not None and not isinstance(obj, str):
- raise ValueError('{} is of type {}, not a string.'
- .format(what, type(obj).__name__))
- return obj
-
-
-def check_bool(obj: object, what: str) -> bool:
- '''Check that the given object is a bool
-
- If not, raise a ValueError; the what argument names the object.
-
- '''
- if obj is not True and obj is not False:
- raise ValueError('{} is of type {}, not a string.'
- .format(what, type(obj).__name__))
- return obj
-
-
-def check_list(obj: object, what: str) -> List[object]:
- '''Check that the given object is a list
-
- If not, raise a ValueError; the what argument names the object.
-
- '''
- if not isinstance(obj, list):
- raise ValueError('{} is of type {}, not a list.'
- .format(what, type(obj).__name__))
- return obj
-
-
-def index_list(what: str,
- objs: Sequence[T],
- get_key: Callable[[T], str]) -> Dict[str, T]:
- ret = {}
- for obj in objs:
- key = get_key(obj)
- if key in ret:
- raise ValueError('Duplicate object with key {} in {}.'
- .format(key, what))
- ret[key] = obj
- return ret
+from .encoding import Encoding
+from .encoding_scheme import EncSchemes
+from .operand import Operand
+from .syntax import InsnSyntax
+from .yaml_parse_helpers import (check_keys, check_str, check_bool,
+ check_list, index_list, get_optional_str)
class InsnGroup:
@@ -143,1333 +41,6 @@
return self.groups[0].key
-class BitRanges:
- '''Represents the bit ranges used for a field in an encoding scheme'''
- def __init__(self,
- mask: int,
- ranges: List[Tuple[int, int]],
- width: int) -> None:
- self.mask = mask
- self.ranges = ranges
- self.width = width
-
- @staticmethod
- def from_list(ranges: List[Tuple[int, int]]) -> 'BitRanges':
- mask = 0
- width = 0
- for msb, lsb in ranges:
- assert 0 <= lsb <= msb <= 31
- rng_mask = (1 << (msb + 1)) - (1 << lsb)
- assert not (rng_mask & mask)
- mask |= rng_mask
- width += msb - lsb + 1
-
- return BitRanges(mask, ranges, width)
-
- @staticmethod
- def from_yaml(as_string: str, what: str) -> 'BitRanges':
- # ranges ::= range
- # | range ',' ranges
- #
- # range ::= num
- # | num ':' num
- #
- # Ranges are assumed to be msb:lsb (with msb >= lsb). Bit indices are
- # at most 31 and ranges are disjoint.
-
- if not as_string:
- raise ValueError('Empty string as bits for {}'.format(what))
-
- overlaps = 0
-
- mask = 0
- ranges = []
- width = 0
-
- for rng in as_string.split(','):
- match = re.match(r'([0-9]+)(?:-([0-9]+))?$', rng)
- if match is None:
- raise ValueError('Range {!r} in bits for {} is malformed.'
- .format(rng, what))
-
- msb = int(match.group(1))
- maybe_lsb = match.group(2)
- lsb = msb if maybe_lsb is None else int(maybe_lsb)
-
- if msb < lsb:
- raise ValueError('Range {!r} in bits for {} has msb < lsb.'
- .format(rng, what))
-
- if msb >= 32:
- raise ValueError('Range {!r} in bits for {} has msb >= 32.'
- .format(rng, what))
-
- rng_mask = (1 << (msb + 1)) - (1 << lsb)
- overlaps |= rng_mask & mask
- mask |= rng_mask
-
- ranges.append((msb, lsb))
- width += msb - lsb + 1
-
- if overlaps:
- raise ValueError('Bits for {} have overlapping ranges '
- '(mask: {:#08x})'
- .format(what, overlaps))
-
- return BitRanges(mask, ranges, width)
-
- def __eq__(self, other: object) -> bool:
- return isinstance(other, BitRanges) and self.ranges == other.ranges
-
- def encode(self, value: int) -> int:
- '''Encode the given value as bit fields'''
- ret = 0
- bits_taken = 0
- for msb, lsb in self.ranges:
- rng_width = msb - lsb + 1
- value_msb = self.width - 1 - bits_taken
- value_lsb = value_msb - rng_width + 1
-
- rng_mask = (1 << rng_width) - 1
- rng_value = (value >> value_lsb) & rng_mask
- ret |= rng_value << lsb
- bits_taken += rng_width
-
- assert bits_taken == self.width
- return ret
-
- def decode(self, raw: int) -> int:
- '''Extract the bit fields from the given value'''
- ret = 0
- for msb, lsb in self.ranges:
- width = msb - lsb + 1
- mask = (1 << width) - 1
-
- ret <<= width
- ret |= (raw >> lsb) & mask
- return ret
-
-
-class BoolLiteral:
- '''Represents a boolean literal, with possible 'x characters
-
- We represent this as 2 masks: "ones" and "xs". The ones mask is the bits
- that are marked 1. The xs mask is the bits that are marked x. Then you can
- test whether a particular value matches the literal by zeroing all bits in
- the x mask and then comparing with the ones mask.
-
- '''
- def __init__(self, ones: int, xs: int, width: int) -> None:
- assert width > 0
- assert (ones >> width) == 0
- assert (xs >> width) == 0
-
- self.ones = ones
- self.xs = xs
- self.width = width
-
- @staticmethod
- def from_string(as_string: str, what: str) -> 'BoolLiteral':
- ones = 0
- xs = 0
- width = 0
-
- # The literal should always start with a 'b'
- if not as_string.startswith('b'):
- raise ValueError("Boolean literal for {} doesn't start with a 'b'."
- .format(what))
-
- for char in as_string[1:]:
- if char == '_':
- continue
-
- ones <<= 1
- xs <<= 1
- width += 1
-
- if char == '0':
- continue
- elif char == '1':
- ones |= 1
- elif char == 'x':
- xs |= 1
- else:
- raise ValueError('Boolean literal for {} has '
- 'unsupported character: {!r}.'
- .format(what, char))
-
- if not width:
- raise ValueError('Empty boolean literal for {}.'.format(what))
-
- return BoolLiteral(ones, xs, width)
-
- def char_for_bit(self, bit: int) -> str:
- '''Return 0, 1 or x for the bit at the given position'''
- assert bit < self.width
- if (self.ones >> bit) & 1:
- return '1'
- if (self.xs >> bit) & 1:
- return 'x'
- return '0'
-
-
-class EncSchemeField:
- '''Represents a single field in an encoding scheme'''
- def __init__(self,
- bits: BitRanges,
- value: Optional[BoolLiteral],
- shift: int) -> None:
- self.bits = bits
- self.value = value
- self.shift = shift
-
- @staticmethod
- def from_yaml(yml: object, what: str) -> 'EncSchemeField':
- # This is either represented as a dict in the YAML or as a bare string.
- bits_what = 'bits for {}'.format(what)
- value_what = 'value for {}'.format(what)
- shift_what = 'shift for {}'.format(what)
-
- shift = 0
-
- if isinstance(yml, dict):
- yd = check_keys(yml, what, ['bits'], ['value', 'shift'])
-
- bits_yml = yd['bits']
- if not (isinstance(bits_yml, str) or isinstance(bits_yml, int)):
- raise ValueError('{} is of type {}, not a string or int.'
- .format(bits_what, type(bits_yml).__name__))
-
- # We require value to be given as a string because it's supposed to
- # be in base 2, and PyYAML will parse 111 as one-hundred and
- # eleven, 011 as 9 and 0x11 as 17. Aargh!
- raw_value = None
- val_yml = yd.get('value')
- if val_yml is not None:
- if not isinstance(val_yml, str):
- raise ValueError("{} is of type {}, but must be a string "
- "(we don't allow automatic conversion "
- "because YAML's int conversion assumes "
- "base 10 and value should be in base 2)."
- .format(value_what,
- type(val_yml).__name__))
- raw_value = val_yml
-
- # shift, on the other hand, is written in base 10. Allow an
- # integer.
- shift_yml = yd.get('shift')
- if shift_yml is None:
- pass
- elif isinstance(shift_yml, str):
- if not re.match(r'[0-9]+$', shift_yml):
- raise ValueError('{} is {!r} but should be a '
- 'non-negative integer.'
- .format(shift_what, shift_yml))
- shift = int(shift_yml)
- elif isinstance(shift_yml, int):
- if shift_yml < 0:
- raise ValueError('{} is {!r} but should be a '
- 'non-negative integer.'
- .format(shift_what, shift_yml))
- shift = shift_yml
- else:
- raise ValueError("{} is of type {}, but must be a string "
- "or non-negative integer."
- .format(shift_what, type(shift_yml).__name__))
- elif isinstance(yml, str) or isinstance(yml, int):
- bits_yml = yml
- raw_value = None
- else:
- raise ValueError('{} is a {}, but should be a '
- 'dict, string or integer.'
- .format(what, type(yml).__name__))
-
- # The bits field is usually parsed as a string ("10-4", or similar).
- # But if it's a bare integer then YAML will parse it as an int. That's
- # fine, but we turn it back into a string to be re-parsed by BitRanges.
- assert isinstance(bits_yml, str) or isinstance(bits_yml, int)
-
- bits = BitRanges.from_yaml(str(bits_yml), bits_what)
- value = None
- if raw_value is not None:
- value = BoolLiteral.from_string(raw_value, value_what)
- if bits.width != value.width:
- raise ValueError('{} has bits that imply a width of {}, but '
- 'a value with width {}.'
- .format(what, bits.width, value.width))
-
- return EncSchemeField(bits, value, shift)
-
-
-class EncSchemeImport:
- '''An object representing inheritance of a parent scheme
-
- When importing a parent scheme, we can set some of its fields with
- immediate values. These are stored in the settings field.
-
- '''
- def __init__(self, yml: object, importer_name: str) -> None:
- as_str = check_str(yml,
- 'value for import in encoding scheme {!r}'
- .format(importer_name))
-
- # The supported syntax is
- #
- # - parent0(field0=b111, field1=b10)
- # - parent1()
- # - parent2
-
- match = re.match(r'([^ (]+)[ ]*(?:\(([^)]+)\))?$', as_str)
- if not match:
- raise ValueError('Malformed encoding scheme '
- 'inheritance by scheme {!r}: {!r}.'
- .format(importer_name, as_str))
-
- self.parent = match.group(1)
- self.settings = {} # type: Dict[str, BoolLiteral]
-
- when = ('When inheriting from {!r} in encoding scheme {!r}'
- .format(self.parent, importer_name))
-
- if match.group(2) is not None:
- args = match.group(2).split(',')
- for arg in args:
- arg = arg.strip()
- arg_parts = arg.split('=')
- if len(arg_parts) != 2:
- raise ValueError('{}, found an argument with {} '
- 'equals signs (should have exactly one).'
- .format(when, len(arg_parts) - 1))
-
- field_name = arg_parts[0]
- field_what = ('literal value for field {!r} when inheriting '
- 'from {!r} in encoding scheme {!r}'
- .format(arg_parts[0], self.parent, importer_name))
- field_value = BoolLiteral.from_string(arg_parts[1], field_what)
-
- if field_name in self.settings:
- raise ValueError('{}, found multiple arguments assigning '
- 'values to the field {!r}.'
- .format(when, field_name))
-
- self.settings[field_name] = field_value
-
- def apply_settings(self,
- esf: 'EncSchemeFields', what: str) -> 'EncSchemeFields':
- # Copy and set values in anything that has a setting
- fields = {}
- for name, literal in self.settings.items():
- old_field = esf.fields.get(name)
- if old_field is None:
- raise ValueError('{} sets unknown field {!r} from {!r}.'
- .format(what, name, self.parent))
-
- if old_field.bits.width != literal.width:
- raise ValueError('{} sets field {!r} from {!r} with a literal '
- 'of width {}, but the field has width {}.'
- .format(what, name, self.parent,
- literal.width, old_field.bits.width))
-
- fields[name] = EncSchemeField(old_field.bits,
- literal,
- old_field.shift)
-
- # Copy anything else
- op_fields = set()
- for name, old_field in esf.fields.items():
- if name in fields:
- continue
- op_fields.add(name)
- fields[name] = old_field
-
- return EncSchemeFields(fields, op_fields, esf.mask)
-
-
-class EncSchemeFields:
- '''An object representing some fields in an encoding scheme'''
- def __init__(self,
- fields: Dict[str, EncSchemeField],
- op_fields: Set[str],
- mask: int) -> None:
- self.fields = fields
- self.op_fields = op_fields
- self.mask = mask
-
- @staticmethod
- def empty() -> 'EncSchemeFields':
- return EncSchemeFields({}, set(), 0)
-
- @staticmethod
- def from_yaml(yml: object, name: str) -> 'EncSchemeFields':
- if not isinstance(yml, dict):
- raise ValueError('fields for encoding scheme {!r} should be a '
- 'dict, but we saw a {}.'
- .format(name, type(yml).__name__))
-
- fields = {}
- op_fields = set() # type: Set[str]
- mask = 0
-
- overlaps = 0
-
- for key, val in yml.items():
- if not isinstance(key, str):
- raise ValueError('{!r} is a bad key for a field name of '
- 'encoding scheme {} (should be str, not {}).'
- .format(key, name, type(key).__name__))
-
- fld_what = 'field {!r} of encoding scheme {}'.format(key, name)
- field = EncSchemeField.from_yaml(val, fld_what)
-
- overlaps |= mask & field.bits.mask
- mask |= field.bits.mask
-
- fields[key] = field
- if field.value is None:
- op_fields.add(key)
-
- if overlaps:
- raise ValueError('Direct fields for encoding scheme {} have '
- 'overlapping ranges (mask: {:#08x})'
- .format(name, overlaps))
-
- return EncSchemeFields(fields, op_fields, mask)
-
- def merge_in(self, right: 'EncSchemeFields', when: str) -> None:
- for name, field in right.fields.items():
- if name in self.fields:
- raise ValueError('Duplicate field name: {!r} {}.'
- .format(name, when))
-
- overlap = self.mask & field.bits.mask
- if overlap:
- raise ValueError('Overlapping bit ranges '
- '(masks: {:08x} and {:08x} have '
- 'intersection {:08x}) {}.'
- .format(self.mask,
- field.bits.mask, overlap, when))
-
- self.fields[name] = field
- self.mask |= field.bits.mask
- if field.value is None:
- assert name not in self.op_fields
- self.op_fields.add(name)
-
-
-class EncScheme:
- def __init__(self, yml: object, name: str) -> None:
- what = 'encoding scheme {!r}'.format(name)
- yd = check_keys(yml, what, [], ['parents', 'fields'])
-
- if not yd:
- raise ValueError('{} has no parents or fields.'.format(what))
-
- fields_yml = yd.get('fields')
- self.direct_fields = (EncSchemeFields.from_yaml(fields_yml, name)
- if fields_yml is not None
- else EncSchemeFields.empty())
-
- parents_yml = yd.get('parents')
- parents_what = 'parents of {}'.format(what)
- parents = ([EncSchemeImport(y, name)
- for y in check_list(parents_yml, parents_what)]
- if parents_yml is not None
- else [])
- self.parents = index_list(parents_what,
- parents,
- lambda imp: imp.parent)
-
-
-class EncSchemes:
- def __init__(self, yml: object) -> None:
- if not isinstance(yml, dict):
- raise ValueError("value for encoding-schemes is expected to be "
- "a dict, but was actually a {}."
- .format(type(yml).__name__))
-
- self.schemes = {} # type: Dict[str, EncScheme]
- self.resolved = {} # type: Dict[str, EncSchemeFields]
-
- for key, val in yml.items():
- if not isinstance(key, str):
- raise ValueError('{!r} is a bad key for an encoding scheme '
- 'name (should be str, not {}).'
- .format(key, type(key).__name__))
- self.schemes[key] = EncScheme(val, key)
-
- def _resolve(self,
- name: str,
- user: str,
- stack: List[str]) -> EncSchemeFields:
- # Have we resolved this before?
- resolved = self.resolved.get(name)
- if resolved is not None:
- return resolved
-
- # Spot any circular inheritance
- if name in stack:
- raise RuntimeError('Circular inheritance of encoding '
- 'schemes: {}'
- .format(' -> '.join(stack + [name])))
-
- # Does the scheme actually exist?
- scheme = self.schemes.get(name)
- if scheme is None:
- raise ValueError('{} requires undefined encoding scheme {!r}.'
- .format(user, name))
-
- # Recursively try to resolve each parent scheme, applying any import
- # settings
- resolved_parents = {}
- new_stack = stack + [name]
- what = 'Import list of encoding scheme {!r}'.format(name)
- for pname, pimport in scheme.parents.items():
- resolved = self._resolve(pimport.parent, what, new_stack)
- resolved_parents[pname] = pimport.apply_settings(resolved, what)
-
- # Now try to merge the resolved imports
- merged = EncSchemeFields.empty()
- parent_names_so_far = [] # type: List[str]
- for pname, pfields in resolved_parents.items():
- when = ('merging fields of scheme {} into '
- 'already merged fields of {}'
- .format(pname, ', '.join(parent_names_so_far)))
- merged.merge_in(pfields, when)
- parent_names_so_far.append(repr(pname))
-
- # Now try to merge in any direct fields
- when = ('merging direct fields of scheme {} into fields from parents'
- .format(name))
- merged.merge_in(scheme.direct_fields, when)
-
- return merged
-
- def resolve(self, name: str, mnemonic: str) -> EncSchemeFields:
- fields = self._resolve(name, 'Instruction {!r}'.format(mnemonic), [])
-
- # Check completeness
- missing = ((1 << 32) - 1) & ~fields.mask
- if missing:
- raise ValueError('Fields for encoding scheme {} miss some bits '
- '(mask: {:#08x})'
- .format(name, missing))
-
- return fields
-
-
-class OperandType:
- '''The base class for some sort of operand type'''
- def __init__(self, width: Optional[int]) -> None:
- assert width is None or width > 0
- self.width = width
-
- def markdown_doc(self) -> Optional[str]:
- '''Generate any (markdown) documentation for this operand type
-
- The base class returns None, but subclasses might return something
- useful.
-
- '''
- return None
-
- def syntax_determines_value(self) -> bool:
- '''Can the value of this operand always be inferred from asm syntax?
-
- This is true for things like registers (the value "5" only comes from
- "r5", for example), but false for arbitrary immediates: an immediate
- operand might have a value that comes from a relocation.
-
- '''
- return False
-
- def read_index(self, as_str: str) -> Optional[int]:
- '''Try to read the given syntax as an actual integer index
-
- Raises a ValueError on definite failure ("found cabbage when I expected
- a register name"). Returns None on a soft failure: "this is a
- complicated looking expression, but it might be a sensible immediate".
-
- '''
- return None
-
- def render_val(self, value: int) -> str:
- '''Render the given value as a string.
-
- The default implementation prints it as a decimal number. Register
- operands, for example, will want to print 3 as "x3" and so on.
-
- '''
- return str(value)
-
-
-class RegOperandType(OperandType):
- '''A class representing a register operand type'''
- TYPE_FMTS = {
- 'gpr': (5, 'x'),
- 'wdr': (5, 'w'),
- 'csr': (12, None),
- 'wsr': (8, None)
- }
-
- def __init__(self, reg_type: str, is_dest: bool):
- fmt = RegOperandType.TYPE_FMTS.get(reg_type)
- assert fmt is not None
- width, _ = fmt
- super().__init__(width)
-
- self.reg_type = reg_type
- self.is_dest = is_dest
-
- def syntax_determines_value(self) -> bool:
- return True
-
- def read_index(self, as_str: str) -> int:
- width, pfx = RegOperandType.TYPE_FMTS[self.reg_type]
-
- re_pfx = '' if pfx is None else re.escape(pfx)
- match = re.match(re_pfx + '([0-9]+)$', as_str)
- if match is None:
- raise ValueError("Expression {!r} can't be parsed as a {}."
- .format(as_str, self.reg_type))
-
- idx = int(match.group(1))
- assert 0 <= idx
- if idx >> width:
- raise ValueError("Invalid register of type {}: {!r}."
- .format(self.reg_type, as_str))
-
- return idx
-
- def render_val(self, value: int) -> str:
- fmt = RegOperandType.TYPE_FMTS.get(self.reg_type)
- assert fmt is not None
- _, pfx = fmt
-
- if pfx is None:
- return super().render_val(value)
-
- return '{}{}'.format(pfx, value)
-
-
-class ImmOperandType(OperandType):
- '''A class representing an immediate operand type'''
- def markdown_doc(self) -> Optional[str]:
- # Override from OperandType base class
- if self.width is None:
- return None
-
- return 'Valid range: `0..{}`'.format((1 << self.width) - 1)
-
- def read_index(self, as_str: str) -> Optional[int]:
- # We only support simple integer literals.
- try:
- return int(as_str)
- except ValueError:
- return None
-
-
-class EnumOperandType(ImmOperandType):
- '''A class representing an enum operand type'''
- def __init__(self, items: List[str]):
- assert items
- super().__init__(int.bit_length(len(items) - 1))
- self.items = items
-
- def markdown_doc(self) -> Optional[str]:
- # Override from OperandType base class
- parts = ['Syntax table:\n\n'
- '| Syntax | Value of immediate |\n'
- '|--------|--------------------|\n']
- for idx, item in enumerate(self.items):
- parts.append('| `{}` | `{}` |\n'
- .format(item, idx))
- return ''.join(parts)
-
- def syntax_determines_value(self) -> bool:
- return True
-
- def read_index(self, as_str: str) -> Optional[int]:
- for idx, item in enumerate(self.items):
- if as_str == item:
- return idx
-
- known_vals = ', '.join(repr(item) for item in self.items)
- raise ValueError('Invalid enum value, {!r}. '
- 'Supported values: {}.'
- .format(as_str, known_vals))
-
- def render_val(self, value: int) -> str:
- # On a bad value, we have to return *something*. Since this is just
- # going into disassembly, let's be vaguely helpful and return something
- # that looks clearly bogus.
- #
- # Note that if the number of items in the enum is not a power of 2,
- # this could happen with a bad binary, despite good tools.
- if value < 0 or value >= len(self.items):
- return '???'
-
- return self.items[value]
-
-
-class OptionOperandType(ImmOperandType):
- '''A class representing an option operand type'''
- def __init__(self, option: str):
- super().__init__(1)
- self.option = option
-
- def markdown_doc(self) -> Optional[str]:
- # Override from OperandType base class
- return 'To specify, use the literal syntax `{}`\n'.format(self.option)
-
- def syntax_determines_value(self) -> bool:
- return True
-
- def read_index(self, as_str: str) -> Optional[int]:
- if as_str == self.option:
- return 1
-
- raise ValueError('Invalid option value, {!r}. '
- 'If specified, it should have been {!r}.'
- .format(as_str, self.option))
-
- def render_val(self, value: int) -> str:
- # Option types are always 1 bit wide, so the value should be 0 or 1.
- assert value in [0, 1]
- return self.option if value else ''
-
-
-def parse_operand_type(fmt: str) -> OperandType:
- '''Make sense of the operand type syntax'''
- # Registers
- if fmt == 'grs':
- return RegOperandType('gpr', False)
- if fmt == 'grd':
- return RegOperandType('gpr', True)
- if fmt == 'wrs':
- return RegOperandType('wdr', False)
- if fmt == 'wrd':
- return RegOperandType('wdr', True)
- if fmt == 'csr':
- return RegOperandType('csr', True)
- if fmt == 'wsr':
- return RegOperandType('wsr', True)
-
- # Immediates
- if fmt == 'imm':
- return ImmOperandType(None)
- m = re.match(r'imm([1-9][0-9]*)$', fmt)
- if m:
- return ImmOperandType(int(m.group(1)))
- m = re.match(r'enum\(([^\)]+)\)$', fmt)
- if m:
- return EnumOperandType([item.strip()
- for item in m.group(1).split(',')])
- m = re.match(r'option\(([^\)]+)\)$', fmt)
- if m:
- return OptionOperandType(m.group(1).strip())
-
- raise ValueError("Operand type description {!r} "
- "didn't match any recognised format."
- .format(fmt))
-
-
-def infer_operand_type(name: str) -> OperandType:
- '''Try to guess an operand's type from its name'''
-
- if re.match(r'grs[0-9]*$', name):
- return parse_operand_type('grs')
- if name in ['grd', 'wrd', 'csr', 'wsr']:
- return parse_operand_type(name)
- if re.match(r'wrs[0-9]*$', name):
- return parse_operand_type('wrs')
- if re.match(r'imm[0-9]*$', name):
- return parse_operand_type('imm')
- if name == 'offset':
- return parse_operand_type('imm')
-
- raise ValueError("Operand name {!r} doesn't imply an operand type: "
- "you'll have to set the type explicitly."
- .format(name))
-
-
-def make_operand_type(yml: object, operand_name: str) -> OperandType:
- '''Construct a type for an operand
-
- This is either based on the type, if given, or inferred from the name
- otherwise.
-
- '''
- return (parse_operand_type(check_str(yml,
- 'type for {} operand'
- .format(operand_name)))
- if yml is not None
- else infer_operand_type(operand_name))
-
-
-def get_optional_str(data: Dict[str, object],
- key: str, what: str) -> Optional[str]:
- return check_optional_str(data.get(key), '{} field for {}'.format(key, what))
-
-
-class Operand:
- def __init__(self, yml: object, insn_name: str) -> None:
- # The YAML representation should be a string (a bare operand name) or a
- # dict.
- what = 'operand for {!r} instruction'.format(insn_name)
- if isinstance(yml, str):
- name = yml
- op_type = None
- doc = None
- elif isinstance(yml, dict):
- yd = check_keys(yml, what, ['name'], ['type', 'doc'])
- name = check_str(yd['name'], 'name of ' + what)
-
- op_what = '{!r} {}'.format(name, what)
- op_type = get_optional_str(yd, 'type', op_what)
- doc = get_optional_str(yd, 'doc', op_what)
-
- op_what = '{!r} {}'.format(name, what)
- self.name = name
- self.op_type = make_operand_type(op_type, name)
- self.doc = doc
-
-
-class SyntaxToken:
- '''An object representing a single token in an instruction's syntax
-
- See InsnSyntax for more details. The is_literal attribute is true if this
- is a literal hunk of text (rather than an operand name). The text attribute
- either holds the literal syntax or the operand name.
-
- '''
- def __init__(self, is_literal: bool, text: str) -> None:
- assert text
- self.is_literal = is_literal
- # Make whitespace canonical for literals
- self.text = re.sub(r'\s+', ' ', text) if is_literal else text
-
- def render_doc(self) -> str:
- '''Return how this syntax token should look in the documentation'''
- if self.is_literal:
- return self.text
- else:
- return '<{}>'.format(self.text)
-
- def asm_pattern(self) -> str:
- '''Return a regex pattern that can be used for matching this token
-
- If the token represents an operand, the pattern is wrapped in a group
- (to capture the operand). For more details about the syntax, see
- InsnSyntax.
-
- '''
- if self.is_literal:
- # A literal that is pure whitespace "requires the whitespace".
- # Otherwise, replace all internal whitespace with \s+ and allow
- # optional whitespace afterwards. To do this easily, we split the
- # literal on whitespace. The result is empty iff it was just
- # whitespace in the first place.
- words = self.text.split()
- if not words:
- return r'\s+'
-
- # For non-whitespace literals, we disallow leading space and add
- # optional trailing space. This convention should avoid lots of
- # \s*\s* pairs.
- parts = [re.escape(words[0])]
- for w in words[1:]:
- parts.append(r'\s+')
- parts.append(re.escape(w))
- parts.append(r'\s*')
-
- return ''.join(parts)
-
- # Otherwise, this is an operand. For now, at least, we're very
- # restrictive for operands. No spaces and no commas (the second rule
- # avoids silliness like "a, b, c" matching a syntax with only two
- # operands by setting the second to "b, c").
- #
- # We also split out ++ and -- separately, to disambiguate things like
- # x1++, which must be parsed as x1 followed by ++.
- #
- # If we want to do better and allow things like
- #
- # addi x0, x1, 1 + 3
- #
- # then we need to use something more serious than just regexes for
- # parsing.
- return r'(-?[^ ,+\-]+|[+\-]+)\s*'
-
- def render_vals(self,
- op_vals: Dict[str, int],
- operands: Dict[str, Operand]) -> str:
- '''Return an assembly listing for the given operand fields
-
- '''
- if self.is_literal:
- return self.text
-
- assert self.text in op_vals
- assert self.text in operands
-
- return operands[self.text].op_type.render_val(op_vals[self.text])
-
-
-class SyntaxHunk:
- '''An object representing a hunk of syntax that might be optional'''
- def __init__(self,
- is_optional: bool,
- tokens: List[SyntaxToken],
- op_list: List[str],
- op_set: Set[str]) -> None:
- assert tokens
- self.is_optional = is_optional
- self.tokens = tokens
- self.op_list = op_list
- self.op_set = op_set
-
- @staticmethod
- def from_list(operands: List[str]) -> 'SyntaxHunk':
- '''Smart constructor for a list of operands with "normal" syntax'''
- assert operands
- comma = SyntaxToken(True, ', ')
- tokens = [SyntaxToken(False, operands[0])]
- for op in operands[1:]:
- tokens.append(comma)
- tokens.append(SyntaxToken(False, op))
-
- op_set = set(operands)
- assert len(op_set) == len(operands)
-
- return SyntaxHunk(False, tokens, operands, op_set)
-
- @staticmethod
- def from_string(mnemonic: str, optional: bool, raw: str) -> 'SyntaxHunk':
- '''Smart constructor that parses YAML syntax (see InsnSyntax)'''
- assert raw
-
- tokens = []
- op_list = []
- op_set = set()
-
- parts = re.split(r'<([^>]+)>', raw)
- for idx, part in enumerate(parts):
- # The matches for the regex appear in positions 1, 3, 5, ...
- is_literal = not (idx & 1)
- if ('<' in part or '>' in part) and not is_literal:
- raise ValueError("Syntax for {!r} has hunk {!r} which doesn't "
- "seem to surround <operand>s properly."
- .format(mnemonic, raw))
-
- if not is_literal:
- assert part
- if part in op_set:
- raise ValueError("Syntax for {!r} has hunk {!r} with "
- "more than one occurrence of <{}>."
- .format(mnemonic, raw, part))
- op_list.append(part)
- op_set.add(part)
-
- # Only allow empty parts (and skip their tokens) if at one end or
- # the other
- if not part and idx not in [0, len(parts) - 1]:
- raise ValueError("Syntax for {!r} has two adjacent operand "
- "tokens, with no intervening syntax."
- .format(mnemonic))
-
- if part:
- tokens.append(SyntaxToken(is_literal, part))
-
- return SyntaxHunk(optional, tokens, op_list, op_set)
-
- def render_doc(self) -> str:
- '''Return how this hunk should look in the documentation'''
- parts = []
- for token in self.tokens:
- parts.append(token.render_doc())
-
- body = ''.join(parts)
- return '[{}]'.format(body) if self.is_optional else body
-
- def asm_pattern(self) -> str:
- '''Return a regex pattern that can be used for matching this hunk
-
- The result will have a group per operand. It allows trailing, but not
- leading, space within the hunk.
-
- '''
- parts = []
- for token in self.tokens:
- parts.append(token.asm_pattern())
- body = ''.join(parts)
-
- # For an optional hunk, we build it up in the form "(?:foo)?". This
- # puts a non-capturing group around foo and then applies "?"
- # (one-or-more) to it.
- return '(?:{})?'.format(body) if self.is_optional else body
-
- def render_vals(self,
- op_vals: Dict[str, int],
- operands: Dict[str, Operand]) -> str:
- '''Return an assembly listing for the hunk given operand values
-
- If this hunk is optional and all its operands are zero, the hunk is
- omitted (so this function returns the empty string).
-
- '''
- if self.is_optional:
- required = False
- for op_name in self.op_list:
- if op_vals[op_name] != 0:
- required = True
- break
-
- if not required:
- return ''
-
- return ''.join(token.render_vals(op_vals, operands)
- for token in self.tokens)
-
-
-class InsnSyntax:
- '''A class representing the syntax of an instruction
-
- An instruction's syntax is specified in the YAML file by writing it out
- with operand names surrounded by angle brackets. For example, a simple NOT
- instruction might have a syntax of
-
- <dst>, <src>
-
- which should be interpreted as the following tokens:
-
- - Operand called 'dst'
- - A literal ','
- - Operand called 'src'
-
- Between the tokens, whitespace is optional (so "x0 , x1" and "x0,x1" both
- match the syntax above) unless a literal token is just a space, in which
- case some whitespace is required. For example
-
- <dst> <src>
-
- would match "x0 x1" but not "x0x1". Whitespace within literal syntax tokens
- means that some space is required, matching the regex \\s+. For example,
- the (rather strange) syntax
-
- <dst> + - <src>
-
- would match "x0 + - x1" or "x0+ -x1", but not "x0 +- x1".
-
- Some operands (and surrounding syntax) might be optional. The optional
- syntax is surrounded by square brackets. Nesting is not supported. For
- example:
-
- <dst>, <src>[, <offset>]
-
- would match "x0, x1, 123" or "x0, x1".
-
- Note that a given syntax might be ambiguous. For example,
-
- <dst>, <src>[, <offset>][, <flavour>]
-
- With "x0, x1, 123", is 123 an offset or a flavour? (We choose not to embed
- typing information into the syntax, because that results in very confusing
- assembler error messages). We break ties in the same way as the underlying
- regex engine, assigning the operand to the first group, so 123 is an offset
- in this case. Such syntaxes are rather confusing though, so probably not a
- good idea.
-
- The parsed syntax is stored as a list of "hunks". Each hunk contains a flag
- showing whether the hunk is optional or required and also a list of
- SyntaxToken objects.
-
- '''
- def __init__(self,
- hunks: List[SyntaxHunk],
- op_list: List[str],
- op_set: Set[str]) -> None:
- self.hunks = hunks
- self.op_list = op_list
- self.op_set = op_set
-
- @staticmethod
- def from_list(operands: List[str]) -> 'InsnSyntax':
- '''Smart constructor for a list of operands with "normal" syntax'''
- if not operands:
- return InsnSyntax([], [], set())
-
- hunk = SyntaxHunk.from_list(operands)
- return InsnSyntax([hunk], hunk.op_list, hunk.op_set)
-
- @staticmethod
- def from_yaml(mnemonic: str, raw: str) -> 'InsnSyntax':
- '''Parse the syntax in the YAML file'''
-
- # The raw syntax looks something like
- #
- # <op0>, <op1>[(<op2>)]
- #
- # to mean that you either have "x0, x1" or "x0, x2(x3)". First, split
- # out the bracketed parts.
- by_left = raw.split('[')
- parts = [(False, by_left[0])]
- for after_left in by_left[1:]:
- split = after_left.split(']', 1)
- if len(split) != 2:
- raise ValueError('Unbalanced or nested [] in instruction '
- 'syntax for {!r}.'
- .format(mnemonic))
-
- parts += [(True, split[0]), (False, split[1])]
-
- # Now parts contains a list of pairs (required, txt) where txt is a
- # hunk of the syntax and req is true if this hunk is required. A part
- # might be empty. For example, "[a]b c[d]" with both lead and trail
- # with an empty part. But it shouldn't be empty if it's marked
- # optional: that would be something like "a[]b", which doesn't make
- # much sense.
- hunks = []
- for optional, raw in parts:
- if raw:
- hunks.append(SyntaxHunk.from_string(mnemonic, optional, raw))
- elif optional:
- raise ValueError('Empty [] in instruction syntax for {!r}.'
- .format(mnemonic))
-
- # Collect up operands across the hunks
- op_list = []
- op_set = set()
- for hunk in hunks:
- op_list += hunk.op_list
- op_set |= hunk.op_set
-
- if len(op_list) != len(op_set):
- raise ValueError('Instruction syntax for {!r} is not '
- 'linear in its operands.'
- .format(mnemonic))
-
- return InsnSyntax(hunks, op_list, op_set)
-
- def render_doc(self) -> str:
- '''Return how this syntax should look in the documentation'''
- return ''.join(hunk.render_doc() for hunk in self.hunks)
-
- def asm_pattern(self) -> Tuple[str, Dict[str, int]]:
- '''Return a regex pattern and a group name map for this syntax'''
- parts = [r'\s*']
- for hunk in self.hunks:
- parts.append(hunk.asm_pattern())
- parts.append('$')
- pattern = ''.join(parts)
-
- op_to_grp = {}
- for idx, op in enumerate(self.op_list):
- op_to_grp[op] = 1 + idx
-
- return (pattern, op_to_grp)
-
- def render_vals(self,
- op_vals: Dict[str, int],
- operands: Dict[str, Operand]) -> str:
- '''Return an assembly listing for the given operand fields'''
- parts = []
- for hunk in self.hunks:
- parts.append(hunk.render_vals(op_vals, operands))
- return ''.join(parts)
-
-
-class EncodingField:
- '''A single element of an encoding's mapping'''
- def __init__(self,
- value: Union[BoolLiteral, str],
- scheme_field: EncSchemeField) -> None:
- self.value = value
- self.scheme_field = scheme_field
-
- @staticmethod
- def from_yaml(as_str: str,
- scheme_field: EncSchemeField,
- name_to_operand: Dict[str, Operand],
- what: str) -> 'EncodingField':
- # The value should either be a boolean literal ("000xx11" or similar)
- # or should be a name, which is taken as the name of an operand.
- if not as_str:
- raise ValueError('Empty string as {}.'.format(what))
-
- # Set self.value to be either the bool literal or the name of the
- # operand.
- value_width = None
- value = '' # type: Union[BoolLiteral, str]
- if re.match(r'b[01x_]+$', as_str):
- value = BoolLiteral.from_string(as_str, what)
- value_width = value.width
- value_type = 'a literal value'
- else:
- operand = name_to_operand.get(as_str)
- if operand is None:
- raise ValueError('Unknown operand, {!r}, as {}'
- .format(as_str, what))
- value_width = operand.op_type.width
- value = as_str
- value_type = 'an operand'
-
- # Unless we had an operand of type 'imm' (unknown width), we now have
- # an expected width. Check it matches the width of the schema field.
- if value_width is not None:
- if scheme_field.bits.width != value_width:
- raise ValueError('{} is mapped to {} with width {}, but the '
- 'encoding schema field has width {}.'
- .format(what, value_type, value_width,
- scheme_field.bits.width))
-
- # Track the scheme field as well (so we don't have to keep track of a
- # scheme once we've made an encoding object)
- return EncodingField(value, scheme_field)
-
-
-class Encoding:
- '''The encoding for an instruction'''
- def __init__(self,
- yml: object,
- schemes: EncSchemes,
- name_to_operand: Dict[str, Operand],
- mnemonic: str):
- what = 'encoding for instruction {!r}'.format(mnemonic)
- yd = check_keys(yml, what, ['scheme', 'mapping'], [])
-
- scheme_what = 'encoding scheme for instruction {!r}'.format(mnemonic)
- scheme_name = check_str(yd['scheme'], scheme_what)
- scheme_fields = schemes.resolve(scheme_name, mnemonic)
-
- what = 'encoding mapping for instruction {!r}'.format(mnemonic)
-
- # Check we've got exactly the right fields for the scheme
- ydm = check_keys(yd['mapping'], what, list(scheme_fields.op_fields), [])
-
- # Track the set of operand names that were used in some field
- operands_used = set()
-
- self.fields = {}
- for field_name, scheme_field in scheme_fields.fields.items():
- if scheme_field.value is not None:
- field = EncodingField(scheme_field.value, scheme_field)
- else:
- field_what = ('value for {} field in encoding for instruction {!r}'
- .format(field_name, mnemonic))
- field = EncodingField.from_yaml(check_str(ydm[field_name], field_what),
- scheme_fields.fields[field_name],
- name_to_operand,
- field_what)
-
- # If the field's value is an operand rather than a literal, it
- # will have type str. Track the operands that we've used.
- if isinstance(field.value, str):
- operands_used.add(field.value)
-
- self.fields[field_name] = field
-
- # We know that every field in the encoding scheme has a value. But we
- # still need to check that every operand ended up in some field.
- assert operands_used <= set(name_to_operand.keys())
- unused_ops = set(name_to_operand.keys()) - operands_used
- if unused_ops:
- raise ValueError('Not all operands used in {} (missing: {}).'
- .format(what, ', '.join(list(unused_ops))))
-
- def get_masks(self) -> Tuple[int, int]:
- '''Return zeros/ones masks for encoding
-
- Returns a pair (m0, m1) where m0 is the "zeros mask": a mask where a
- bit is set if there is an bit pattern matching this encoding with that
- bit zero. m1 is the ones mask: equivalent, but for that bit one.
-
- '''
- m0 = 0
- m1 = 0
- for field_name, field in self.fields.items():
- if isinstance(field.value, str):
- m0 |= field.scheme_field.bits.mask
- m1 |= field.scheme_field.bits.mask
- else:
- # Match up the bits in the value with the ranges in the scheme.
- assert field.value.width > 0
- assert field.value.width == field.scheme_field.bits.width
- bits_seen = 0
- for msb, lsb in field.scheme_field.bits.ranges:
- val_msb = field.scheme_field.bits.width - 1 - bits_seen
- val_lsb = val_msb - msb + lsb
- bits_seen += msb - lsb + 1
-
- for idx in range(0, msb - lsb + 1):
- desc = field.value.char_for_bit(val_lsb + idx)
- if desc in ['0', 'x']:
- m0 |= 1 << (idx + lsb)
- if desc in ['1', 'x']:
- m1 |= 1 << (idx + lsb)
-
- all_bits = (1 << 32) - 1
- assert (m0 | m1) == all_bits
- return (m0, m1)
-
- def get_ones_mask(self) -> int:
- '''Return the mask of fixed bits that are set
-
- For literal values of x (unused bits in the encoding), we'll prefer
- '0'.
-
- '''
- m0, m1 = self.get_masks()
- return m1 & ~m0
-
- def assemble(self, op_to_idx: Dict[str, int]) -> int:
- '''Assemble an instruction
-
- op_to_idx should map each operand in the encoding to some integer
- index, which should be small enough to fit in the width of the
- operand's type and should be representable after any shift. Will raise
- a ValueError if not.
-
- '''
- val = self.get_ones_mask()
- for field_name, field in self.fields.items():
- if not isinstance(field.value, str):
- # We've done this field already (in get_ones_mask)
- continue
-
- # Try to get the operand value for the field. If this is an
- # optional operand, we might not have one, and just encode zero.
- field_val = op_to_idx.get(field.value, 0)
-
- # Are there any low bits that shouldn't be there?
- shift_mask = (1 << field.scheme_field.shift) - 1
- if field_val & shift_mask:
- raise ValueError("operand field {} has a shift of {}, "
- "so can't represent the value {:#x}."
- .format(field.value,
- field.scheme_field.shift,
- field_val))
-
- shifted = field_val >> field.scheme_field.shift
-
- # Is the number too big? At the moment, we are assuming immediates
- # are unsigned (because the OTBN big number instructions all have
- # unsigned immediates).
- if shifted >> field.scheme_field.bits.width:
- shift_msg = ((' (shifted right by {} bits from {:#x})'
- .format(field.scheme_field.shift, field_val))
- if field.scheme_field.shift
- else '')
- raise ValueError("operand field {} has a width of {}, "
- "so can't represent the value {:#x}{}."
- .format(field.value,
- field.scheme_field.bits.width,
- shifted, shift_msg))
-
- val |= field.scheme_field.bits.encode(shifted)
-
- return val
-
-
class Insn:
def __init__(self,
yml: object,
diff --git a/hw/ip/otbn/util/shared/operand.py b/hw/ip/otbn/util/shared/operand.py
new file mode 100644
index 0000000..e20289c
--- /dev/null
+++ b/hw/ip/otbn/util/shared/operand.py
@@ -0,0 +1,279 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+import re
+from typing import List, Optional
+from .yaml_parse_helpers import check_keys, check_str, get_optional_str
+
+
+class OperandType:
+ '''The base class for some sort of operand type'''
+ def __init__(self, width: Optional[int]) -> None:
+ assert width is None or width > 0
+ self.width = width
+
+ def markdown_doc(self) -> Optional[str]:
+ '''Generate any (markdown) documentation for this operand type
+
+ The base class returns None, but subclasses might return something
+ useful.
+
+ '''
+ return None
+
+ def syntax_determines_value(self) -> bool:
+ '''Can the value of this operand always be inferred from asm syntax?
+
+ This is true for things like registers (the value "5" only comes from
+ "r5", for example), but false for arbitrary immediates: an immediate
+ operand might have a value that comes from a relocation.
+
+ '''
+ return False
+
+ def read_index(self, as_str: str) -> Optional[int]:
+ '''Try to read the given syntax as an actual integer index
+
+ Raises a ValueError on definite failure ("found cabbage when I expected
+ a register name"). Returns None on a soft failure: "this is a
+ complicated looking expression, but it might be a sensible immediate".
+
+ '''
+ return None
+
+ def render_val(self, value: int) -> str:
+ '''Render the given value as a string.
+
+ The default implementation prints it as a decimal number. Register
+ operands, for example, will want to print 3 as "x3" and so on.
+
+ '''
+ return str(value)
+
+
+class RegOperandType(OperandType):
+ '''A class representing a register operand type'''
+ TYPE_FMTS = {
+ 'gpr': (5, 'x'),
+ 'wdr': (5, 'w'),
+ 'csr': (12, None),
+ 'wsr': (8, None)
+ }
+
+ def __init__(self, reg_type: str, is_dest: bool):
+ fmt = RegOperandType.TYPE_FMTS.get(reg_type)
+ assert fmt is not None
+ width, _ = fmt
+ super().__init__(width)
+
+ self.reg_type = reg_type
+ self.is_dest = is_dest
+
+ def syntax_determines_value(self) -> bool:
+ return True
+
+ def read_index(self, as_str: str) -> int:
+ width, pfx = RegOperandType.TYPE_FMTS[self.reg_type]
+
+ re_pfx = '' if pfx is None else re.escape(pfx)
+ match = re.match(re_pfx + '([0-9]+)$', as_str)
+ if match is None:
+ raise ValueError("Expression {!r} can't be parsed as a {}."
+ .format(as_str, self.reg_type))
+
+ idx = int(match.group(1))
+ assert 0 <= idx
+ if idx >> width:
+ raise ValueError("Invalid register of type {}: {!r}."
+ .format(self.reg_type, as_str))
+
+ return idx
+
+ def render_val(self, value: int) -> str:
+ fmt = RegOperandType.TYPE_FMTS.get(self.reg_type)
+ assert fmt is not None
+ _, pfx = fmt
+
+ if pfx is None:
+ return super().render_val(value)
+
+ return '{}{}'.format(pfx, value)
+
+
+class ImmOperandType(OperandType):
+ '''A class representing an immediate operand type'''
+ def markdown_doc(self) -> Optional[str]:
+ # Override from OperandType base class
+ if self.width is None:
+ return None
+
+ return 'Valid range: `0..{}`'.format((1 << self.width) - 1)
+
+ def read_index(self, as_str: str) -> Optional[int]:
+ # We only support simple integer literals.
+ try:
+ return int(as_str)
+ except ValueError:
+ return None
+
+
+class EnumOperandType(ImmOperandType):
+ '''A class representing an enum operand type'''
+ def __init__(self, items: List[str]):
+ assert items
+ super().__init__(int.bit_length(len(items) - 1))
+ self.items = items
+
+ def markdown_doc(self) -> Optional[str]:
+ # Override from OperandType base class
+ parts = ['Syntax table:\n\n'
+ '| Syntax | Value of immediate |\n'
+ '|--------|--------------------|\n']
+ for idx, item in enumerate(self.items):
+ parts.append('| `{}` | `{}` |\n'
+ .format(item, idx))
+ return ''.join(parts)
+
+ def syntax_determines_value(self) -> bool:
+ return True
+
+ def read_index(self, as_str: str) -> Optional[int]:
+ for idx, item in enumerate(self.items):
+ if as_str == item:
+ return idx
+
+ known_vals = ', '.join(repr(item) for item in self.items)
+ raise ValueError('Invalid enum value, {!r}. '
+ 'Supported values: {}.'
+ .format(as_str, known_vals))
+
+ def render_val(self, value: int) -> str:
+ # On a bad value, we have to return *something*. Since this is just
+ # going into disassembly, let's be vaguely helpful and return something
+ # that looks clearly bogus.
+ #
+ # Note that if the number of items in the enum is not a power of 2,
+ # this could happen with a bad binary, despite good tools.
+ if value < 0 or value >= len(self.items):
+ return '???'
+
+ return self.items[value]
+
+
+class OptionOperandType(ImmOperandType):
+ '''A class representing an option operand type'''
+ def __init__(self, option: str):
+ super().__init__(1)
+ self.option = option
+
+ def markdown_doc(self) -> Optional[str]:
+ # Override from OperandType base class
+ return 'To specify, use the literal syntax `{}`\n'.format(self.option)
+
+ def syntax_determines_value(self) -> bool:
+ return True
+
+ def read_index(self, as_str: str) -> Optional[int]:
+ if as_str == self.option:
+ return 1
+
+ raise ValueError('Invalid option value, {!r}. '
+ 'If specified, it should have been {!r}.'
+ .format(as_str, self.option))
+
+ def render_val(self, value: int) -> str:
+ # Option types are always 1 bit wide, so the value should be 0 or 1.
+ assert value in [0, 1]
+ return self.option if value else ''
+
+
+def parse_operand_type(fmt: str) -> OperandType:
+ '''Make sense of the operand type syntax'''
+ # Registers
+ if fmt == 'grs':
+ return RegOperandType('gpr', False)
+ if fmt == 'grd':
+ return RegOperandType('gpr', True)
+ if fmt == 'wrs':
+ return RegOperandType('wdr', False)
+ if fmt == 'wrd':
+ return RegOperandType('wdr', True)
+ if fmt == 'csr':
+ return RegOperandType('csr', True)
+ if fmt == 'wsr':
+ return RegOperandType('wsr', True)
+
+ # Immediates
+ if fmt == 'imm':
+ return ImmOperandType(None)
+ m = re.match(r'imm([1-9][0-9]*)$', fmt)
+ if m:
+ return ImmOperandType(int(m.group(1)))
+ m = re.match(r'enum\(([^\)]+)\)$', fmt)
+ if m:
+ return EnumOperandType([item.strip()
+ for item in m.group(1).split(',')])
+ m = re.match(r'option\(([^\)]+)\)$', fmt)
+ if m:
+ return OptionOperandType(m.group(1).strip())
+
+ raise ValueError("Operand type description {!r} "
+ "didn't match any recognised format."
+ .format(fmt))
+
+
+def infer_operand_type(name: str) -> OperandType:
+ '''Try to guess an operand's type from its name'''
+
+ if re.match(r'grs[0-9]*$', name):
+ return parse_operand_type('grs')
+ if name in ['grd', 'wrd', 'csr', 'wsr']:
+ return parse_operand_type(name)
+ if re.match(r'wrs[0-9]*$', name):
+ return parse_operand_type('wrs')
+ if re.match(r'imm[0-9]*$', name):
+ return parse_operand_type('imm')
+ if name == 'offset':
+ return parse_operand_type('imm')
+
+ raise ValueError("Operand name {!r} doesn't imply an operand type: "
+ "you'll have to set the type explicitly."
+ .format(name))
+
+
+def make_operand_type(yml: object, operand_name: str) -> OperandType:
+ '''Construct a type for an operand
+
+ This is either based on the type, if given, or inferred from the name
+ otherwise.
+
+ '''
+ return (parse_operand_type(check_str(yml,
+ 'type for {} operand'
+ .format(operand_name)))
+ if yml is not None
+ else infer_operand_type(operand_name))
+
+
+class Operand:
+ def __init__(self, yml: object, insn_name: str) -> None:
+ # The YAML representation should be a string (a bare operand name) or a
+ # dict.
+ what = 'operand for {!r} instruction'.format(insn_name)
+ if isinstance(yml, str):
+ name = yml
+ op_type = None
+ doc = None
+ elif isinstance(yml, dict):
+ yd = check_keys(yml, what, ['name'], ['type', 'doc'])
+ name = check_str(yd['name'], 'name of ' + what)
+
+ op_what = '{!r} {}'.format(name, what)
+ op_type = get_optional_str(yd, 'type', op_what)
+ doc = get_optional_str(yd, 'doc', op_what)
+
+ op_what = '{!r} {}'.format(name, what)
+ self.name = name
+ self.op_type = make_operand_type(op_type, name)
+ self.doc = doc
diff --git a/hw/ip/otbn/util/shared/syntax.py b/hw/ip/otbn/util/shared/syntax.py
new file mode 100644
index 0000000..d715366
--- /dev/null
+++ b/hw/ip/otbn/util/shared/syntax.py
@@ -0,0 +1,354 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+'''Code for making sense of instruction syntax as defined in insns.yml'''
+
+import re
+from typing import Dict, List, Set, Tuple
+
+from .operand import Operand
+
+
+class SyntaxToken:
+ '''An object representing a single token in an instruction's syntax
+
+ See InsnSyntax for more details. The is_literal attribute is true if this
+ is a literal hunk of text (rather than an operand name). The text attribute
+ either holds the literal syntax or the operand name.
+
+ '''
+ def __init__(self, is_literal: bool, text: str) -> None:
+ assert text
+ self.is_literal = is_literal
+ # Make whitespace canonical for literals
+ self.text = re.sub(r'\s+', ' ', text) if is_literal else text
+
+ def render_doc(self) -> str:
+ '''Return how this syntax token should look in the documentation'''
+ if self.is_literal:
+ return self.text
+ else:
+ return '<{}>'.format(self.text)
+
+ def asm_pattern(self) -> str:
+ '''Return a regex pattern that can be used for matching this token
+
+ If the token represents an operand, the pattern is wrapped in a group
+ (to capture the operand). For more details about the syntax, see
+ InsnSyntax.
+
+ '''
+ if self.is_literal:
+ # A literal that is pure whitespace "requires the whitespace".
+ # Otherwise, replace all internal whitespace with \s+ and allow
+ # optional whitespace afterwards. To do this easily, we split the
+ # literal on whitespace. The result is empty iff it was just
+ # whitespace in the first place.
+ words = self.text.split()
+ if not words:
+ return r'\s+'
+
+ # For non-whitespace literals, we disallow leading space and add
+ # optional trailing space. This convention should avoid lots of
+ # \s*\s* pairs.
+ parts = [re.escape(words[0])]
+ for w in words[1:]:
+ parts.append(r'\s+')
+ parts.append(re.escape(w))
+ parts.append(r'\s*')
+
+ return ''.join(parts)
+
+ # Otherwise, this is an operand. For now, at least, we're very
+ # restrictive for operands. No spaces and no commas (the second rule
+ # avoids silliness like "a, b, c" matching a syntax with only two
+ # operands by setting the second to "b, c").
+ #
+ # We also split out ++ and -- separately, to disambiguate things like
+ # x1++, which must be parsed as x1 followed by ++.
+ #
+ # If we want to do better and allow things like
+ #
+ # addi x0, x1, 1 + 3
+ #
+ # then we need to use something more serious than just regexes for
+ # parsing.
+ return r'(-?[^ ,+\-]+|[+\-]+)\s*'
+
+ def render_vals(self,
+ op_vals: Dict[str, int],
+ operands: Dict[str, Operand]) -> str:
+ '''Return an assembly listing for the given operand fields
+
+ '''
+ if self.is_literal:
+ return self.text
+
+ assert self.text in op_vals
+ assert self.text in operands
+
+ return operands[self.text].op_type.render_val(op_vals[self.text])
+
+
+class SyntaxHunk:
+ '''An object representing a hunk of syntax that might be optional'''
+ def __init__(self,
+ is_optional: bool,
+ tokens: List[SyntaxToken],
+ op_list: List[str],
+ op_set: Set[str]) -> None:
+ assert tokens
+ self.is_optional = is_optional
+ self.tokens = tokens
+ self.op_list = op_list
+ self.op_set = op_set
+
+ @staticmethod
+ def from_list(operands: List[str]) -> 'SyntaxHunk':
+ '''Smart constructor for a list of operands with "normal" syntax'''
+ assert operands
+ comma = SyntaxToken(True, ', ')
+ tokens = [SyntaxToken(False, operands[0])]
+ for op in operands[1:]:
+ tokens.append(comma)
+ tokens.append(SyntaxToken(False, op))
+
+ op_set = set(operands)
+ assert len(op_set) == len(operands)
+
+ return SyntaxHunk(False, tokens, operands, op_set)
+
+ @staticmethod
+ def from_string(mnemonic: str, optional: bool, raw: str) -> 'SyntaxHunk':
+ '''Smart constructor that parses YAML syntax (see InsnSyntax)'''
+ assert raw
+
+ tokens = []
+ op_list = []
+ op_set = set()
+
+ parts = re.split(r'<([^>]+)>', raw)
+ for idx, part in enumerate(parts):
+ # The matches for the regex appear in positions 1, 3, 5, ...
+ is_literal = not (idx & 1)
+ if ('<' in part or '>' in part) and not is_literal:
+ raise ValueError("Syntax for {!r} has hunk {!r} which doesn't "
+ "seem to surround <operand>s properly."
+ .format(mnemonic, raw))
+
+ if not is_literal:
+ assert part
+ if part in op_set:
+ raise ValueError("Syntax for {!r} has hunk {!r} with "
+ "more than one occurrence of <{}>."
+ .format(mnemonic, raw, part))
+ op_list.append(part)
+ op_set.add(part)
+
+ # Only allow empty parts (and skip their tokens) if at one end or
+ # the other
+ if not part and idx not in [0, len(parts) - 1]:
+ raise ValueError("Syntax for {!r} has two adjacent operand "
+ "tokens, with no intervening syntax."
+ .format(mnemonic))
+
+ if part:
+ tokens.append(SyntaxToken(is_literal, part))
+
+ return SyntaxHunk(optional, tokens, op_list, op_set)
+
+ def render_doc(self) -> str:
+ '''Return how this hunk should look in the documentation'''
+ parts = []
+ for token in self.tokens:
+ parts.append(token.render_doc())
+
+ body = ''.join(parts)
+ return '[{}]'.format(body) if self.is_optional else body
+
+ def asm_pattern(self) -> str:
+ '''Return a regex pattern that can be used for matching this hunk
+
+ The result will have a group per operand. It allows trailing, but not
+ leading, space within the hunk.
+
+ '''
+ parts = []
+ for token in self.tokens:
+ parts.append(token.asm_pattern())
+ body = ''.join(parts)
+
+ # For an optional hunk, we build it up in the form "(?:foo)?". This
+ # puts a non-capturing group around foo and then applies "?"
+ # (one-or-more) to it.
+ return '(?:{})?'.format(body) if self.is_optional else body
+
+ def render_vals(self,
+ op_vals: Dict[str, int],
+ operands: Dict[str, Operand]) -> str:
+ '''Return an assembly listing for the hunk given operand values
+
+ If this hunk is optional and all its operands are zero, the hunk is
+ omitted (so this function returns the empty string).
+
+ '''
+ if self.is_optional:
+ required = False
+ for op_name in self.op_list:
+ if op_vals[op_name] != 0:
+ required = True
+ break
+
+ if not required:
+ return ''
+
+ return ''.join(token.render_vals(op_vals, operands)
+ for token in self.tokens)
+
+
+class InsnSyntax:
+ '''A class representing the syntax of an instruction
+
+ An instruction's syntax is specified in the YAML file by writing it out
+ with operand names surrounded by angle brackets. For example, a simple NOT
+ instruction might have a syntax of
+
+ <dst>, <src>
+
+ which should be interpreted as the following tokens:
+
+ - Operand called 'dst'
+ - A literal ','
+ - Operand called 'src'
+
+ Between the tokens, whitespace is optional (so "x0 , x1" and "x0,x1" both
+ match the syntax above) unless a literal token is just a space, in which
+ case some whitespace is required. For example
+
+ <dst> <src>
+
+ would match "x0 x1" but not "x0x1". Whitespace within literal syntax tokens
+ means that some space is required, matching the regex \\s+. For example,
+ the (rather strange) syntax
+
+ <dst> + - <src>
+
+ would match "x0 + - x1" or "x0+ -x1", but not "x0 +- x1".
+
+ Some operands (and surrounding syntax) might be optional. The optional
+ syntax is surrounded by square brackets. Nesting is not supported. For
+ example:
+
+ <dst>, <src>[, <offset>]
+
+ would match "x0, x1, 123" or "x0, x1".
+
+ Note that a given syntax might be ambiguous. For example,
+
+ <dst>, <src>[, <offset>][, <flavour>]
+
+ With "x0, x1, 123", is 123 an offset or a flavour? (We choose not to embed
+ typing information into the syntax, because that results in very confusing
+ assembler error messages). We break ties in the same way as the underlying
+ regex engine, assigning the operand to the first group, so 123 is an offset
+ in this case. Such syntaxes are rather confusing though, so probably not a
+ good idea.
+
+ The parsed syntax is stored as a list of "hunks". Each hunk contains a flag
+ showing whether the hunk is optional or required and also a list of
+ SyntaxToken objects.
+
+ '''
+ def __init__(self,
+ hunks: List[SyntaxHunk],
+ op_list: List[str],
+ op_set: Set[str]) -> None:
+ self.hunks = hunks
+ self.op_list = op_list
+ self.op_set = op_set
+
+ @staticmethod
+ def from_list(operands: List[str]) -> 'InsnSyntax':
+ '''Smart constructor for a list of operands with "normal" syntax'''
+ if not operands:
+ return InsnSyntax([], [], set())
+
+ hunk = SyntaxHunk.from_list(operands)
+ return InsnSyntax([hunk], hunk.op_list, hunk.op_set)
+
+ @staticmethod
+ def from_yaml(mnemonic: str, raw: str) -> 'InsnSyntax':
+ '''Parse the syntax in the YAML file'''
+
+ # The raw syntax looks something like
+ #
+ # <op0>, <op1>[(<op2>)]
+ #
+ # to mean that you either have "x0, x1" or "x0, x2(x3)". First, split
+ # out the bracketed parts.
+ by_left = raw.split('[')
+ parts = [(False, by_left[0])]
+ for after_left in by_left[1:]:
+ split = after_left.split(']', 1)
+ if len(split) != 2:
+ raise ValueError('Unbalanced or nested [] in instruction '
+ 'syntax for {!r}.'
+ .format(mnemonic))
+
+ parts += [(True, split[0]), (False, split[1])]
+
+ # Now parts contains a list of pairs (required, txt) where txt is a
+ # hunk of the syntax and req is true if this hunk is required. A part
+ # might be empty. For example, "[a]b c[d]" with both lead and trail
+ # with an empty part. But it shouldn't be empty if it's marked
+ # optional: that would be something like "a[]b", which doesn't make
+ # much sense.
+ hunks = []
+ for optional, raw in parts:
+ if raw:
+ hunks.append(SyntaxHunk.from_string(mnemonic, optional, raw))
+ elif optional:
+ raise ValueError('Empty [] in instruction syntax for {!r}.'
+ .format(mnemonic))
+
+ # Collect up operands across the hunks
+ op_list = []
+ op_set = set()
+ for hunk in hunks:
+ op_list += hunk.op_list
+ op_set |= hunk.op_set
+
+ if len(op_list) != len(op_set):
+ raise ValueError('Instruction syntax for {!r} is not '
+ 'linear in its operands.'
+ .format(mnemonic))
+
+ return InsnSyntax(hunks, op_list, op_set)
+
+ def render_doc(self) -> str:
+ '''Return how this syntax should look in the documentation'''
+ return ''.join(hunk.render_doc() for hunk in self.hunks)
+
+ def asm_pattern(self) -> Tuple[str, Dict[str, int]]:
+ '''Return a regex pattern and a group name map for this syntax'''
+ parts = [r'\s*']
+ for hunk in self.hunks:
+ parts.append(hunk.asm_pattern())
+ parts.append('$')
+ pattern = ''.join(parts)
+
+ op_to_grp = {}
+ for idx, op in enumerate(self.op_list):
+ op_to_grp[op] = 1 + idx
+
+ return (pattern, op_to_grp)
+
+ def render_vals(self,
+ op_vals: Dict[str, int],
+ operands: Dict[str, Operand]) -> str:
+ '''Return an assembly listing for the given operand fields'''
+ parts = []
+ for hunk in self.hunks:
+ parts.append(hunk.render_vals(op_vals, operands))
+ return ''.join(parts)
diff --git a/hw/ip/otbn/util/shared/yaml_parse_helpers.py b/hw/ip/otbn/util/shared/yaml_parse_helpers.py
new file mode 100644
index 0000000..9ef1158
--- /dev/null
+++ b/hw/ip/otbn/util/shared/yaml_parse_helpers.py
@@ -0,0 +1,120 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+'''Code to help make typed objects out of parsed YAML'''
+
+from typing import Callable, Dict, List, Optional, Sequence, TypeVar
+
+
+T = TypeVar('T')
+
+
+def check_keys(obj: object,
+ what: str,
+ required_keys: List[str],
+ optional_keys: List[str]) -> Dict[str, object]:
+ '''Check that obj is a dict object with the expected keys
+
+ If not, raise a ValueError; the what argument names the object.
+
+ '''
+ if not isinstance(obj, dict):
+ raise ValueError("{} is expected to be a dict, but was actually a {}."
+ .format(what, type(obj).__name__))
+
+ allowed = set()
+ missing = []
+ for key in required_keys:
+ assert key not in allowed
+ allowed.add(key)
+ if key not in obj:
+ missing.append(key)
+
+ for key in optional_keys:
+ assert key not in allowed
+ allowed.add(key)
+
+ unexpected = []
+ for key in obj:
+ if key not in allowed:
+ unexpected.append(key)
+
+ if missing or unexpected:
+ mstr = ('The following required fields were missing: {}.'
+ .format(', '.join(missing)) if missing else '')
+ ustr = ('The following unexpected fields were found: {}.'
+ .format(', '.join(unexpected)) if unexpected else '')
+ raise ValueError("{} doesn't have the right keys. {}{}{}"
+ .format(what,
+ mstr,
+ ' ' if mstr and ustr else '',
+ ustr))
+
+ return obj
+
+
+def check_str(obj: object, what: str) -> str:
+ '''Check that the given object is a string
+
+ If not, raise a ValueError; the what argument names the object.
+
+ '''
+ if not isinstance(obj, str):
+ raise ValueError('{} is of type {}, not a string.'
+ .format(what, type(obj).__name__))
+ return obj
+
+
+def check_optional_str(obj: object, what: str) -> Optional[str]:
+ '''Check that the given object is a string or None
+
+ If not, raise a ValueError; the what argument names the object.
+
+ '''
+ if obj is not None and not isinstance(obj, str):
+ raise ValueError('{} is of type {}, not a string.'
+ .format(what, type(obj).__name__))
+ return obj
+
+
+def check_bool(obj: object, what: str) -> bool:
+ '''Check that the given object is a bool
+
+ If not, raise a ValueError; the what argument names the object.
+
+ '''
+ if obj is not True and obj is not False:
+ raise ValueError('{} is of type {}, not a string.'
+ .format(what, type(obj).__name__))
+ return obj
+
+
+def check_list(obj: object, what: str) -> List[object]:
+ '''Check that the given object is a list
+
+ If not, raise a ValueError; the what argument names the object.
+
+ '''
+ if not isinstance(obj, list):
+ raise ValueError('{} is of type {}, not a list.'
+ .format(what, type(obj).__name__))
+ return obj
+
+
+def index_list(what: str,
+ objs: Sequence[T],
+ get_key: Callable[[T], str]) -> Dict[str, T]:
+ ret = {}
+ for obj in objs:
+ key = get_key(obj)
+ if key in ret:
+ raise ValueError('Duplicate object with key {} in {}.'
+ .format(key, what))
+ ret[key] = obj
+ return ret
+
+
+def get_optional_str(data: Dict[str, object],
+ key: str, what: str) -> Optional[str]:
+ return check_optional_str(data.get(key), '{} field for {}'.format(key, what))
diff --git a/hw/ip/otbn/util/yaml_to_doc.py b/hw/ip/otbn/util/yaml_to_doc.py
index 30d815a..b1d5898 100755
--- a/hw/ip/otbn/util/yaml_to_doc.py
+++ b/hw/ip/otbn/util/yaml_to_doc.py
@@ -9,8 +9,10 @@
import sys
from typing import List
-from shared.insn_yaml import (BoolLiteral, Encoding, Insn, InsnsFile, Operand,
- load_file)
+from shared.bool_literal import BoolLiteral
+from shared.encoding import Encoding
+from shared.insn_yaml import Insn, InsnsFile, load_file
+from shared.operand import Operand
def render_operand_row(operand: Operand) -> str: