| # Copyright lowRISC contributors. |
| # Licensed under the Apache License, Version 2.0, see LICENSE for details. |
| # SPDX-License-Identifier: Apache-2.0 |
| |
| '''Support code for reading the instruction database in insns.yml''' |
| |
| import itertools |
| import re |
| from typing import (Callable, Dict, List, Optional, |
| Sequence, Set, Tuple, TypeVar, Union) |
| |
| import yaml |
| |
| |
| T = TypeVar('T') |
| |
| |
| def check_keys(obj: object, |
| what: str, |
| required_keys: List[str], |
| optional_keys: List[str]) -> Dict[str, object]: |
| '''Check that obj is a dict object with the expected keys |
| |
| If not, raise a ValueError; the what argument names the object. |
| |
| ''' |
| if not isinstance(obj, dict): |
| raise ValueError("{} is expected to be a dict, but was actually a {}." |
| .format(what, type(obj).__name__)) |
| |
| allowed = set() |
| missing = [] |
| for key in required_keys: |
| assert key not in allowed |
| allowed.add(key) |
| if key not in obj: |
| missing.append(key) |
| |
| for key in optional_keys: |
| assert key not in allowed |
| allowed.add(key) |
| |
| unexpected = [] |
| for key in obj: |
| if key not in allowed: |
| unexpected.append(key) |
| |
| if missing or unexpected: |
| mstr = ('The following required fields were missing: {}.' |
| .format(', '.join(missing)) if missing else '') |
| ustr = ('The following unexpected fields were found: {}.' |
| .format(', '.join(unexpected)) if unexpected else '') |
| raise ValueError("{} doesn't have the right keys. {}{}{}" |
| .format(what, |
| mstr, |
| ' ' if mstr and ustr else '', |
| ustr)) |
| |
| return obj |
| |
| |
| def check_str(obj: object, what: str) -> str: |
| '''Check that the given object is a string |
| |
| If not, raise a ValueError; the what argument names the object. |
| |
| ''' |
| if not isinstance(obj, str): |
| raise ValueError('{} is of type {}, not a string.' |
| .format(what, type(obj).__name__)) |
| return obj |
| |
| |
| def check_optional_str(obj: object, what: str) -> Optional[str]: |
| '''Check that the given object is a string or None |
| |
| If not, raise a ValueError; the what argument names the object. |
| |
| ''' |
| if obj is not None and not isinstance(obj, str): |
| raise ValueError('{} is of type {}, not a string.' |
| .format(what, type(obj).__name__)) |
| return obj |
| |
| |
| def check_bool(obj: object, what: str) -> bool: |
| '''Check that the given object is a bool |
| |
| If not, raise a ValueError; the what argument names the object. |
| |
| ''' |
| if obj is not True and obj is not False: |
| raise ValueError('{} is of type {}, not a string.' |
| .format(what, type(obj).__name__)) |
| return obj |
| |
| |
| def check_list(obj: object, what: str) -> List[object]: |
| '''Check that the given object is a list |
| |
| If not, raise a ValueError; the what argument names the object. |
| |
| ''' |
| if not isinstance(obj, list): |
| raise ValueError('{} is of type {}, not a list.' |
| .format(what, type(obj).__name__)) |
| return obj |
| |
| |
| def index_list(what: str, |
| objs: Sequence[T], |
| get_key: Callable[[T], str]) -> Dict[str, T]: |
| ret = {} |
| for obj in objs: |
| key = get_key(obj) |
| if key in ret: |
| raise ValueError('Duplicate object with key {} in {}.' |
| .format(key, what)) |
| ret[key] = obj |
| return ret |
| |
| |
| class InsnGroup: |
| def __init__(self, yml: object) -> None: |
| yd = check_keys(yml, 'insn-group', ['key', 'title', 'doc'], []) |
| self.key = check_str(yd['key'], 'insn-group key') |
| self.title = check_str(yd['title'], 'insn-group title') |
| self.doc = check_str(yd['doc'], 'insn-group doc') |
| |
| |
| class InsnGroups: |
| def __init__(self, yml: object) -> None: |
| self.groups = [InsnGroup(y) for y in check_list(yml, 'insn-groups')] |
| if not self.groups: |
| raise ValueError('Empty list of instruction groups: ' |
| 'we need at least one as a base group.') |
| self.key_to_group = index_list('insn-groups', |
| self.groups, lambda ig: ig.key) |
| |
| def default_group(self) -> str: |
| '''Get the name of the default instruction group''' |
| assert self.groups |
| return self.groups[0].key |
| |
| |
| class BitRanges: |
| '''Represents the bit ranges used for a field in an encoding scheme''' |
| def __init__(self, |
| mask: int, |
| ranges: List[Tuple[int, int]], |
| width: int) -> None: |
| self.mask = mask |
| self.ranges = ranges |
| self.width = width |
| |
| @staticmethod |
| def from_list(ranges: List[Tuple[int, int]]) -> 'BitRanges': |
| mask = 0 |
| width = 0 |
| for msb, lsb in ranges: |
| assert 0 <= lsb <= msb <= 31 |
| rng_mask = (1 << (msb + 1)) - (1 << lsb) |
| assert not (rng_mask & mask) |
| mask |= rng_mask |
| width += msb - lsb + 1 |
| |
| return BitRanges(mask, ranges, width) |
| |
| @staticmethod |
| def from_yaml(as_string: str, what: str) -> 'BitRanges': |
| # ranges ::= range |
| # | range ',' ranges |
| # |
| # range ::= num |
| # | num ':' num |
| # |
| # Ranges are assumed to be msb:lsb (with msb >= lsb). Bit indices are |
| # at most 31 and ranges are disjoint. |
| |
| if not as_string: |
| raise ValueError('Empty string as bits for {}'.format(what)) |
| |
| overlaps = 0 |
| |
| mask = 0 |
| ranges = [] |
| width = 0 |
| |
| for rng in as_string.split(','): |
| match = re.match(r'([0-9]+)(?:-([0-9]+))?$', rng) |
| if match is None: |
| raise ValueError('Range {!r} in bits for {} is malformed.' |
| .format(rng, what)) |
| |
| msb = int(match.group(1)) |
| maybe_lsb = match.group(2) |
| lsb = msb if maybe_lsb is None else int(maybe_lsb) |
| |
| if msb < lsb: |
| raise ValueError('Range {!r} in bits for {} has msb < lsb.' |
| .format(rng, what)) |
| |
| if msb >= 32: |
| raise ValueError('Range {!r} in bits for {} has msb >= 32.' |
| .format(rng, what)) |
| |
| rng_mask = (1 << (msb + 1)) - (1 << lsb) |
| overlaps |= rng_mask & mask |
| mask |= rng_mask |
| |
| ranges.append((msb, lsb)) |
| width += msb - lsb + 1 |
| |
| if overlaps: |
| raise ValueError('Bits for {} have overlapping ranges ' |
| '(mask: {:#08x})' |
| .format(what, overlaps)) |
| |
| return BitRanges(mask, ranges, width) |
| |
| def __eq__(self, other: object) -> bool: |
| return isinstance(other, BitRanges) and self.ranges == other.ranges |
| |
| def encode(self, value: int) -> int: |
| '''Encode the given value as bit fields''' |
| ret = 0 |
| bits_taken = 0 |
| for msb, lsb in self.ranges: |
| rng_width = msb - lsb + 1 |
| value_msb = self.width - 1 - bits_taken |
| value_lsb = value_msb - rng_width + 1 |
| |
| rng_mask = (1 << rng_width) - 1 |
| rng_value = (value >> value_lsb) & rng_mask |
| ret |= rng_value << lsb |
| bits_taken += rng_width |
| |
| assert bits_taken == self.width |
| return ret |
| |
| def decode(self, raw: int) -> int: |
| '''Extract the bit fields from the given value''' |
| ret = 0 |
| for msb, lsb in self.ranges: |
| width = msb - lsb + 1 |
| mask = (1 << width) - 1 |
| |
| ret <<= width |
| ret |= (raw >> lsb) & mask |
| return ret |
| |
| |
| class BoolLiteral: |
| '''Represents a boolean literal, with possible 'x characters |
| |
| We represent this as 2 masks: "ones" and "xs". The ones mask is the bits |
| that are marked 1. The xs mask is the bits that are marked x. Then you can |
| test whether a particular value matches the literal by zeroing all bits in |
| the x mask and then comparing with the ones mask. |
| |
| ''' |
| def __init__(self, ones: int, xs: int, width: int) -> None: |
| assert width > 0 |
| assert (ones >> width) == 0 |
| assert (xs >> width) == 0 |
| |
| self.ones = ones |
| self.xs = xs |
| self.width = width |
| |
| @staticmethod |
| def from_string(as_string: str, what: str) -> 'BoolLiteral': |
| ones = 0 |
| xs = 0 |
| width = 0 |
| |
| # The literal should always start with a 'b' |
| if not as_string.startswith('b'): |
| raise ValueError("Boolean literal for {} doesn't start with a 'b'." |
| .format(what)) |
| |
| for char in as_string[1:]: |
| if char == '_': |
| continue |
| |
| ones <<= 1 |
| xs <<= 1 |
| width += 1 |
| |
| if char == '0': |
| continue |
| elif char == '1': |
| ones |= 1 |
| elif char == 'x': |
| xs |= 1 |
| else: |
| raise ValueError('Boolean literal for {} has ' |
| 'unsupported character: {!r}.' |
| .format(what, char)) |
| |
| if not width: |
| raise ValueError('Empty boolean literal for {}.'.format(what)) |
| |
| return BoolLiteral(ones, xs, width) |
| |
| def char_for_bit(self, bit: int) -> str: |
| '''Return 0, 1 or x for the bit at the given position''' |
| assert bit < self.width |
| if (self.ones >> bit) & 1: |
| return '1' |
| if (self.xs >> bit) & 1: |
| return 'x' |
| return '0' |
| |
| |
| class EncSchemeField: |
| '''Represents a single field in an encoding scheme''' |
| def __init__(self, |
| bits: BitRanges, |
| value: Optional[BoolLiteral], |
| shift: int) -> None: |
| self.bits = bits |
| self.value = value |
| self.shift = shift |
| |
| @staticmethod |
| def from_yaml(yml: object, what: str) -> 'EncSchemeField': |
| # This is either represented as a dict in the YAML or as a bare string. |
| bits_what = 'bits for {}'.format(what) |
| value_what = 'value for {}'.format(what) |
| shift_what = 'shift for {}'.format(what) |
| |
| shift = 0 |
| |
| if isinstance(yml, dict): |
| yd = check_keys(yml, what, ['bits'], ['value', 'shift']) |
| |
| bits_yml = yd['bits'] |
| if not (isinstance(bits_yml, str) or isinstance(bits_yml, int)): |
| raise ValueError('{} is of type {}, not a string or int.' |
| .format(bits_what, type(bits_yml).__name__)) |
| |
| # We require value to be given as a string because it's supposed to |
| # be in base 2, and PyYAML will parse 111 as one-hundred and |
| # eleven, 011 as 9 and 0x11 as 17. Aargh! |
| raw_value = None |
| val_yml = yd.get('value') |
| if val_yml is not None: |
| if not isinstance(val_yml, str): |
| raise ValueError("{} is of type {}, but must be a string " |
| "(we don't allow automatic conversion " |
| "because YAML's int conversion assumes " |
| "base 10 and value should be in base 2)." |
| .format(value_what, |
| type(val_yml).__name__)) |
| raw_value = val_yml |
| |
| # shift, on the other hand, is written in base 10. Allow an |
| # integer. |
| shift_yml = yd.get('shift') |
| if shift_yml is None: |
| pass |
| elif isinstance(shift_yml, str): |
| if not re.match(r'[0-9]+$', shift_yml): |
| raise ValueError('{} is {!r} but should be a ' |
| 'non-negative integer.' |
| .format(shift_what, shift_yml)) |
| shift = int(shift_yml) |
| elif isinstance(shift_yml, int): |
| if shift_yml < 0: |
| raise ValueError('{} is {!r} but should be a ' |
| 'non-negative integer.' |
| .format(shift_what, shift_yml)) |
| shift = shift_yml |
| else: |
| raise ValueError("{} is of type {}, but must be a string " |
| "or non-negative integer." |
| .format(shift_what, type(shift_yml).__name__)) |
| elif isinstance(yml, str) or isinstance(yml, int): |
| bits_yml = yml |
| raw_value = None |
| else: |
| raise ValueError('{} is a {}, but should be a ' |
| 'dict, string or integer.' |
| .format(what, type(yml).__name__)) |
| |
| # The bits field is usually parsed as a string ("10-4", or similar). |
| # But if it's a bare integer then YAML will parse it as an int. That's |
| # fine, but we turn it back into a string to be re-parsed by BitRanges. |
| assert isinstance(bits_yml, str) or isinstance(bits_yml, int) |
| |
| bits = BitRanges.from_yaml(str(bits_yml), bits_what) |
| value = None |
| if raw_value is not None: |
| value = BoolLiteral.from_string(raw_value, value_what) |
| if bits.width != value.width: |
| raise ValueError('{} has bits that imply a width of {}, but ' |
| 'a value with width {}.' |
| .format(what, bits.width, value.width)) |
| |
| return EncSchemeField(bits, value, shift) |
| |
| |
| class EncSchemeImport: |
| '''An object representing inheritance of a parent scheme |
| |
| When importing a parent scheme, we can set some of its fields with |
| immediate values. These are stored in the settings field. |
| |
| ''' |
| def __init__(self, yml: object, importer_name: str) -> None: |
| as_str = check_str(yml, |
| 'value for import in encoding scheme {!r}' |
| .format(importer_name)) |
| |
| # The supported syntax is |
| # |
| # - parent0(field0=b111, field1=b10) |
| # - parent1() |
| # - parent2 |
| |
| match = re.match(r'([^ (]+)[ ]*(?:\(([^)]+)\))?$', as_str) |
| if not match: |
| raise ValueError('Malformed encoding scheme ' |
| 'inheritance by scheme {!r}: {!r}.' |
| .format(importer_name, as_str)) |
| |
| self.parent = match.group(1) |
| self.settings = {} # type: Dict[str, BoolLiteral] |
| |
| when = ('When inheriting from {!r} in encoding scheme {!r}' |
| .format(self.parent, importer_name)) |
| |
| if match.group(2) is not None: |
| args = match.group(2).split(',') |
| for arg in args: |
| arg = arg.strip() |
| arg_parts = arg.split('=') |
| if len(arg_parts) != 2: |
| raise ValueError('{}, found an argument with {} ' |
| 'equals signs (should have exactly one).' |
| .format(when, len(arg_parts) - 1)) |
| |
| field_name = arg_parts[0] |
| field_what = ('literal value for field {!r} when inheriting ' |
| 'from {!r} in encoding scheme {!r}' |
| .format(arg_parts[0], self.parent, importer_name)) |
| field_value = BoolLiteral.from_string(arg_parts[1], field_what) |
| |
| if field_name in self.settings: |
| raise ValueError('{}, found multiple arguments assigning ' |
| 'values to the field {!r}.' |
| .format(when, field_name)) |
| |
| self.settings[field_name] = field_value |
| |
| def apply_settings(self, |
| esf: 'EncSchemeFields', what: str) -> 'EncSchemeFields': |
| # Copy and set values in anything that has a setting |
| fields = {} |
| for name, literal in self.settings.items(): |
| old_field = esf.fields.get(name) |
| if old_field is None: |
| raise ValueError('{} sets unknown field {!r} from {!r}.' |
| .format(what, name, self.parent)) |
| |
| if old_field.bits.width != literal.width: |
| raise ValueError('{} sets field {!r} from {!r} with a literal ' |
| 'of width {}, but the field has width {}.' |
| .format(what, name, self.parent, |
| literal.width, old_field.bits.width)) |
| |
| fields[name] = EncSchemeField(old_field.bits, |
| literal, |
| old_field.shift) |
| |
| # Copy anything else |
| op_fields = set() |
| for name, old_field in esf.fields.items(): |
| if name in fields: |
| continue |
| op_fields.add(name) |
| fields[name] = old_field |
| |
| return EncSchemeFields(fields, op_fields, esf.mask) |
| |
| |
| class EncSchemeFields: |
| '''An object representing some fields in an encoding scheme''' |
| def __init__(self, |
| fields: Dict[str, EncSchemeField], |
| op_fields: Set[str], |
| mask: int) -> None: |
| self.fields = fields |
| self.op_fields = op_fields |
| self.mask = mask |
| |
| @staticmethod |
| def empty() -> 'EncSchemeFields': |
| return EncSchemeFields({}, set(), 0) |
| |
| @staticmethod |
| def from_yaml(yml: object, name: str) -> 'EncSchemeFields': |
| if not isinstance(yml, dict): |
| raise ValueError('fields for encoding scheme {!r} should be a ' |
| 'dict, but we saw a {}.' |
| .format(name, type(yml).__name__)) |
| |
| fields = {} |
| op_fields = set() # type: Set[str] |
| mask = 0 |
| |
| overlaps = 0 |
| |
| for key, val in yml.items(): |
| if not isinstance(key, str): |
| raise ValueError('{!r} is a bad key for a field name of ' |
| 'encoding scheme {} (should be str, not {}).' |
| .format(key, name, type(key).__name__)) |
| |
| fld_what = 'field {!r} of encoding scheme {}'.format(key, name) |
| field = EncSchemeField.from_yaml(val, fld_what) |
| |
| overlaps |= mask & field.bits.mask |
| mask |= field.bits.mask |
| |
| fields[key] = field |
| if field.value is None: |
| op_fields.add(key) |
| |
| if overlaps: |
| raise ValueError('Direct fields for encoding scheme {} have ' |
| 'overlapping ranges (mask: {:#08x})' |
| .format(name, overlaps)) |
| |
| return EncSchemeFields(fields, op_fields, mask) |
| |
| def merge_in(self, right: 'EncSchemeFields', when: str) -> None: |
| for name, field in right.fields.items(): |
| if name in self.fields: |
| raise ValueError('Duplicate field name: {!r} {}.' |
| .format(name, when)) |
| |
| overlap = self.mask & field.bits.mask |
| if overlap: |
| raise ValueError('Overlapping bit ranges ' |
| '(masks: {:08x} and {:08x} have ' |
| 'intersection {:08x}) {}.' |
| .format(self.mask, |
| field.bits.mask, overlap, when)) |
| |
| self.fields[name] = field |
| self.mask |= field.bits.mask |
| if field.value is None: |
| assert name not in self.op_fields |
| self.op_fields.add(name) |
| |
| |
| class EncScheme: |
| def __init__(self, yml: object, name: str) -> None: |
| what = 'encoding scheme {!r}'.format(name) |
| yd = check_keys(yml, what, [], ['parents', 'fields']) |
| |
| if not yd: |
| raise ValueError('{} has no parents or fields.'.format(what)) |
| |
| fields_yml = yd.get('fields') |
| self.direct_fields = (EncSchemeFields.from_yaml(fields_yml, name) |
| if fields_yml is not None |
| else EncSchemeFields.empty()) |
| |
| parents_yml = yd.get('parents') |
| parents_what = 'parents of {}'.format(what) |
| parents = ([EncSchemeImport(y, name) |
| for y in check_list(parents_yml, parents_what)] |
| if parents_yml is not None |
| else []) |
| self.parents = index_list(parents_what, |
| parents, |
| lambda imp: imp.parent) |
| |
| |
| class EncSchemes: |
| def __init__(self, yml: object) -> None: |
| if not isinstance(yml, dict): |
| raise ValueError("value for encoding-schemes is expected to be " |
| "a dict, but was actually a {}." |
| .format(type(yml).__name__)) |
| |
| self.schemes = {} # type: Dict[str, EncScheme] |
| self.resolved = {} # type: Dict[str, EncSchemeFields] |
| |
| for key, val in yml.items(): |
| if not isinstance(key, str): |
| raise ValueError('{!r} is a bad key for an encoding scheme ' |
| 'name (should be str, not {}).' |
| .format(key, type(key).__name__)) |
| self.schemes[key] = EncScheme(val, key) |
| |
| def _resolve(self, |
| name: str, |
| user: str, |
| stack: List[str]) -> EncSchemeFields: |
| # Have we resolved this before? |
| resolved = self.resolved.get(name) |
| if resolved is not None: |
| return resolved |
| |
| # Spot any circular inheritance |
| if name in stack: |
| raise RuntimeError('Circular inheritance of encoding ' |
| 'schemes: {}' |
| .format(' -> '.join(stack + [name]))) |
| |
| # Does the scheme actually exist? |
| scheme = self.schemes.get(name) |
| if scheme is None: |
| raise ValueError('{} requires undefined encoding scheme {!r}.' |
| .format(user, name)) |
| |
| # Recursively try to resolve each parent scheme, applying any import |
| # settings |
| resolved_parents = {} |
| new_stack = stack + [name] |
| what = 'Import list of encoding scheme {!r}'.format(name) |
| for pname, pimport in scheme.parents.items(): |
| resolved = self._resolve(pimport.parent, what, new_stack) |
| resolved_parents[pname] = pimport.apply_settings(resolved, what) |
| |
| # Now try to merge the resolved imports |
| merged = EncSchemeFields.empty() |
| parent_names_so_far = [] # type: List[str] |
| for pname, pfields in resolved_parents.items(): |
| when = ('merging fields of scheme {} into ' |
| 'already merged fields of {}' |
| .format(pname, ', '.join(parent_names_so_far))) |
| merged.merge_in(pfields, when) |
| parent_names_so_far.append(repr(pname)) |
| |
| # Now try to merge in any direct fields |
| when = ('merging direct fields of scheme {} into fields from parents' |
| .format(name)) |
| merged.merge_in(scheme.direct_fields, when) |
| |
| return merged |
| |
| def resolve(self, name: str, mnemonic: str) -> EncSchemeFields: |
| fields = self._resolve(name, 'Instruction {!r}'.format(mnemonic), []) |
| |
| # Check completeness |
| missing = ((1 << 32) - 1) & ~fields.mask |
| if missing: |
| raise ValueError('Fields for encoding scheme {} miss some bits ' |
| '(mask: {:#08x})' |
| .format(name, missing)) |
| |
| return fields |
| |
| |
| class OperandType: |
| '''The base class for some sort of operand type''' |
| def __init__(self, width: Optional[int]) -> None: |
| assert width is None or width > 0 |
| self.width = width |
| |
| def markdown_doc(self) -> Optional[str]: |
| '''Generate any (markdown) documentation for this operand type |
| |
| The base class returns None, but subclasses might return something |
| useful. |
| |
| ''' |
| return None |
| |
| def syntax_determines_value(self) -> bool: |
| '''Can the value of this operand always be inferred from asm syntax? |
| |
| This is true for things like registers (the value "5" only comes from |
| "r5", for example), but false for arbitrary immediates: an immediate |
| operand might have a value that comes from a relocation. |
| |
| ''' |
| return False |
| |
| def read_index(self, as_str: str) -> Optional[int]: |
| '''Try to read the given syntax as an actual integer index |
| |
| Raises a ValueError on definite failure ("found cabbage when I expected |
| a register name"). Returns None on a soft failure: "this is a |
| complicated looking expression, but it might be a sensible immediate". |
| |
| ''' |
| return None |
| |
| def render_val(self, value: int) -> str: |
| '''Render the given value as a string. |
| |
| The default implementation prints it as a decimal number. Register |
| operands, for example, will want to print 3 as "x3" and so on. |
| |
| ''' |
| return str(value) |
| |
| |
| class RegOperandType(OperandType): |
| '''A class representing a register operand type''' |
| TYPE_FMTS = { |
| 'gpr': (5, 'x'), |
| 'wdr': (5, 'w'), |
| 'csr': (12, None), |
| 'wsr': (8, None) |
| } |
| |
| def __init__(self, reg_type: str, is_dest: bool): |
| fmt = RegOperandType.TYPE_FMTS.get(reg_type) |
| assert fmt is not None |
| width, _ = fmt |
| super().__init__(width) |
| |
| self.reg_type = reg_type |
| self.is_dest = is_dest |
| |
| def syntax_determines_value(self) -> bool: |
| return True |
| |
| def read_index(self, as_str: str) -> int: |
| width, pfx = RegOperandType.TYPE_FMTS[self.reg_type] |
| |
| re_pfx = '' if pfx is None else re.escape(pfx) |
| match = re.match(re_pfx + '([0-9]+)$', as_str) |
| if match is None: |
| raise ValueError("Expression {!r} can't be parsed as a {}." |
| .format(as_str, self.reg_type)) |
| |
| idx = int(match.group(1)) |
| assert 0 <= idx |
| if idx >> width: |
| raise ValueError("Invalid register of type {}: {!r}." |
| .format(self.reg_type, as_str)) |
| |
| return idx |
| |
| def render_val(self, value: int) -> str: |
| fmt = RegOperandType.TYPE_FMTS.get(self.reg_type) |
| assert fmt is not None |
| _, pfx = fmt |
| |
| if pfx is None: |
| return super().render_val(value) |
| |
| return '{}{}'.format(pfx, value) |
| |
| |
| class ImmOperandType(OperandType): |
| '''A class representing an immediate operand type''' |
| def markdown_doc(self) -> Optional[str]: |
| # Override from OperandType base class |
| if self.width is None: |
| return None |
| |
| return 'Valid range: `0..{}`'.format((1 << self.width) - 1) |
| |
| def read_index(self, as_str: str) -> Optional[int]: |
| # We only support simple integer literals. |
| try: |
| return int(as_str) |
| except ValueError: |
| return None |
| |
| |
| class EnumOperandType(ImmOperandType): |
| '''A class representing an enum operand type''' |
| def __init__(self, items: List[str]): |
| assert items |
| super().__init__(int.bit_length(len(items) - 1)) |
| self.items = items |
| |
| def markdown_doc(self) -> Optional[str]: |
| # Override from OperandType base class |
| parts = ['Syntax table:\n\n' |
| '| Syntax | Value of immediate |\n' |
| '|--------|--------------------|\n'] |
| for idx, item in enumerate(self.items): |
| parts.append('| `{}` | `{}` |\n' |
| .format(item, idx)) |
| return ''.join(parts) |
| |
| def syntax_determines_value(self) -> bool: |
| return True |
| |
| def read_index(self, as_str: str) -> Optional[int]: |
| for idx, item in enumerate(self.items): |
| if as_str == item: |
| return idx |
| |
| known_vals = ', '.join(repr(item) for item in self.items) |
| raise ValueError('Invalid enum value, {!r}. ' |
| 'Supported values: {}.' |
| .format(as_str, known_vals)) |
| |
| def render_val(self, value: int) -> str: |
| # On a bad value, we have to return *something*. Since this is just |
| # going into disassembly, let's be vaguely helpful and return something |
| # that looks clearly bogus. |
| # |
| # Note that if the number of items in the enum is not a power of 2, |
| # this could happen with a bad binary, despite good tools. |
| if value < 0 or value >= len(self.items): |
| return '???' |
| |
| return self.items[value] |
| |
| |
| class OptionOperandType(ImmOperandType): |
| '''A class representing an option operand type''' |
| def __init__(self, option: str): |
| super().__init__(1) |
| self.option = option |
| |
| def markdown_doc(self) -> Optional[str]: |
| # Override from OperandType base class |
| return 'To specify, use the literal syntax `{}`\n'.format(self.option) |
| |
| def syntax_determines_value(self) -> bool: |
| return True |
| |
| def read_index(self, as_str: str) -> Optional[int]: |
| if as_str == self.option: |
| return 1 |
| |
| raise ValueError('Invalid option value, {!r}. ' |
| 'If specified, it should have been {!r}.' |
| .format(as_str, self.option)) |
| |
| def render_val(self, value: int) -> str: |
| # Option types are always 1 bit wide, so the value should be 0 or 1. |
| assert value in [0, 1] |
| return self.option if value else '' |
| |
| |
| def parse_operand_type(fmt: str) -> OperandType: |
| '''Make sense of the operand type syntax''' |
| # Registers |
| if fmt == 'grs': |
| return RegOperandType('gpr', False) |
| if fmt == 'grd': |
| return RegOperandType('gpr', True) |
| if fmt == 'wrs': |
| return RegOperandType('wdr', False) |
| if fmt == 'wrd': |
| return RegOperandType('wdr', True) |
| if fmt == 'csr': |
| return RegOperandType('csr', True) |
| if fmt == 'wsr': |
| return RegOperandType('wsr', True) |
| |
| # Immediates |
| if fmt == 'imm': |
| return ImmOperandType(None) |
| m = re.match(r'imm([1-9][0-9]*)$', fmt) |
| if m: |
| return ImmOperandType(int(m.group(1))) |
| m = re.match(r'enum\(([^\)]+)\)$', fmt) |
| if m: |
| return EnumOperandType([item.strip() |
| for item in m.group(1).split(',')]) |
| m = re.match(r'option\(([^\)]+)\)$', fmt) |
| if m: |
| return OptionOperandType(m.group(1).strip()) |
| |
| raise ValueError("Operand type description {!r} " |
| "didn't match any recognised format." |
| .format(fmt)) |
| |
| |
| def infer_operand_type(name: str) -> OperandType: |
| '''Try to guess an operand's type from its name''' |
| |
| if re.match(r'grs[0-9]*$', name): |
| return parse_operand_type('grs') |
| if name in ['grd', 'wrd', 'csr', 'wsr']: |
| return parse_operand_type(name) |
| if re.match(r'wrs[0-9]*$', name): |
| return parse_operand_type('wrs') |
| if re.match(r'imm[0-9]*$', name): |
| return parse_operand_type('imm') |
| if name == 'offset': |
| return parse_operand_type('imm') |
| |
| raise ValueError("Operand name {!r} doesn't imply an operand type: " |
| "you'll have to set the type explicitly." |
| .format(name)) |
| |
| |
| def make_operand_type(yml: object, operand_name: str) -> OperandType: |
| '''Construct a type for an operand |
| |
| This is either based on the type, if given, or inferred from the name |
| otherwise. |
| |
| ''' |
| return (parse_operand_type(check_str(yml, |
| 'type for {} operand' |
| .format(operand_name))) |
| if yml is not None |
| else infer_operand_type(operand_name)) |
| |
| |
| def get_optional_str(data: Dict[str, object], |
| key: str, what: str) -> Optional[str]: |
| return check_optional_str(data.get(key), '{} field for {}'.format(key, what)) |
| |
| |
| class Operand: |
| def __init__(self, yml: object, insn_name: str) -> None: |
| # The YAML representation should be a string (a bare operand name) or a |
| # dict. |
| what = 'operand for {!r} instruction'.format(insn_name) |
| if isinstance(yml, str): |
| name = yml |
| op_type = None |
| doc = None |
| elif isinstance(yml, dict): |
| yd = check_keys(yml, what, ['name'], ['type', 'doc']) |
| name = check_str(yd['name'], 'name of ' + what) |
| |
| op_what = '{!r} {}'.format(name, what) |
| op_type = get_optional_str(yd, 'type', op_what) |
| doc = get_optional_str(yd, 'doc', op_what) |
| |
| op_what = '{!r} {}'.format(name, what) |
| self.name = name |
| self.op_type = make_operand_type(op_type, name) |
| self.doc = doc |
| |
| |
| class SyntaxToken: |
| '''An object representing a single token in an instruction's syntax |
| |
| See InsnSyntax for more details. The is_literal attribute is true if this |
| is a literal hunk of text (rather than an operand name). The text attribute |
| either holds the literal syntax or the operand name. |
| |
| ''' |
| def __init__(self, is_literal: bool, text: str) -> None: |
| assert text |
| self.is_literal = is_literal |
| # Make whitespace canonical for literals |
| self.text = re.sub(r'\s+', ' ', text) if is_literal else text |
| |
| def render_doc(self) -> str: |
| '''Return how this syntax token should look in the documentation''' |
| if self.is_literal: |
| return self.text |
| else: |
| return '<{}>'.format(self.text) |
| |
| def asm_pattern(self) -> str: |
| '''Return a regex pattern that can be used for matching this token |
| |
| If the token represents an operand, the pattern is wrapped in a group |
| (to capture the operand). For more details about the syntax, see |
| InsnSyntax. |
| |
| ''' |
| if self.is_literal: |
| # A literal that is pure whitespace "requires the whitespace". |
| # Otherwise, replace all internal whitespace with \s+ and allow |
| # optional whitespace afterwards. To do this easily, we split the |
| # literal on whitespace. The result is empty iff it was just |
| # whitespace in the first place. |
| words = self.text.split() |
| if not words: |
| return r'\s+' |
| |
| # For non-whitespace literals, we disallow leading space and add |
| # optional trailing space. This convention should avoid lots of |
| # \s*\s* pairs. |
| parts = [re.escape(words[0])] |
| for w in words[1:]: |
| parts.append(r'\s+') |
| parts.append(re.escape(w)) |
| parts.append(r'\s*') |
| |
| return ''.join(parts) |
| |
| # Otherwise, this is an operand. For now, at least, we're very |
| # restrictive for operands. No spaces and no commas (the second rule |
| # avoids silliness like "a, b, c" matching a syntax with only two |
| # operands by setting the second to "b, c"). |
| # |
| # We also split out ++ and -- separately, to disambiguate things like |
| # x1++, which must be parsed as x1 followed by ++. |
| # |
| # If we want to do better and allow things like |
| # |
| # addi x0, x1, 1 + 3 |
| # |
| # then we need to use something more serious than just regexes for |
| # parsing. |
| return r'([^ ,+\-]+|[+\-]+)\s*' |
| |
| def render_vals(self, |
| op_vals: Dict[str, int], |
| operands: Dict[str, Operand]) -> str: |
| '''Return an assembly listing for the given operand fields |
| |
| ''' |
| if self.is_literal: |
| return self.text |
| |
| assert self.text in op_vals |
| assert self.text in operands |
| |
| return operands[self.text].op_type.render_val(op_vals[self.text]) |
| |
| |
| class SyntaxHunk: |
| '''An object representing a hunk of syntax that might be optional''' |
| def __init__(self, |
| is_optional: bool, |
| tokens: List[SyntaxToken], |
| op_list: List[str], |
| op_set: Set[str]) -> None: |
| assert tokens |
| self.is_optional = is_optional |
| self.tokens = tokens |
| self.op_list = op_list |
| self.op_set = op_set |
| |
| @staticmethod |
| def from_list(operands: List[str]) -> 'SyntaxHunk': |
| '''Smart constructor for a list of operands with "normal" syntax''' |
| assert operands |
| comma = SyntaxToken(True, ', ') |
| tokens = [SyntaxToken(False, operands[0])] |
| for op in operands[1:]: |
| tokens.append(comma) |
| tokens.append(SyntaxToken(False, op)) |
| |
| op_set = set(operands) |
| assert len(op_set) == len(operands) |
| |
| return SyntaxHunk(False, tokens, operands, op_set) |
| |
| @staticmethod |
| def from_string(mnemonic: str, optional: bool, raw: str) -> 'SyntaxHunk': |
| '''Smart constructor that parses YAML syntax (see InsnSyntax)''' |
| assert raw |
| |
| tokens = [] |
| op_list = [] |
| op_set = set() |
| |
| parts = re.split(r'<([^>]+)>', raw) |
| for idx, part in enumerate(parts): |
| # The matches for the regex appear in positions 1, 3, 5, ... |
| is_literal = not (idx & 1) |
| if ('<' in part or '>' in part) and not is_literal: |
| raise ValueError("Syntax for {!r} has hunk {!r} which doesn't " |
| "seem to surround <operand>s properly." |
| .format(mnemonic, raw)) |
| |
| if not is_literal: |
| assert part |
| if part in op_set: |
| raise ValueError("Syntax for {!r} has hunk {!r} with " |
| "more than one occurrence of <{}>." |
| .format(mnemonic, raw, part)) |
| op_list.append(part) |
| op_set.add(part) |
| |
| # Only allow empty parts (and skip their tokens) if at one end or |
| # the other |
| if not part and idx not in [0, len(parts) - 1]: |
| raise ValueError("Syntax for {!r} has two adjacent operand " |
| "tokens, with no intervening syntax." |
| .format(mnemonic)) |
| |
| if part: |
| tokens.append(SyntaxToken(is_literal, part)) |
| |
| return SyntaxHunk(optional, tokens, op_list, op_set) |
| |
| def render_doc(self) -> str: |
| '''Return how this hunk should look in the documentation''' |
| parts = [] |
| for token in self.tokens: |
| parts.append(token.render_doc()) |
| |
| body = ''.join(parts) |
| return '[{}]'.format(body) if self.is_optional else body |
| |
| def asm_pattern(self) -> str: |
| '''Return a regex pattern that can be used for matching this hunk |
| |
| The result will have a group per operand. It allows trailing, but not |
| leading, space within the hunk. |
| |
| ''' |
| parts = [] |
| for token in self.tokens: |
| parts.append(token.asm_pattern()) |
| body = ''.join(parts) |
| |
| # For an optional hunk, we build it up in the form "(?:foo)?". This |
| # puts a non-capturing group around foo and then applies "?" |
| # (one-or-more) to it. |
| return '(?:{})?'.format(body) if self.is_optional else body |
| |
| def render_vals(self, |
| op_vals: Dict[str, int], |
| operands: Dict[str, Operand]) -> str: |
| '''Return an assembly listing for the hunk given operand values |
| |
| If this hunk is optional and all its operands are zero, the hunk is |
| omitted (so this function returns the empty string). |
| |
| ''' |
| if self.is_optional: |
| required = False |
| for op_name in self.op_list: |
| if op_vals[op_name] != 0: |
| required = True |
| break |
| |
| if not required: |
| return '' |
| |
| return ''.join(token.render_vals(op_vals, operands) |
| for token in self.tokens) |
| |
| |
| class InsnSyntax: |
| '''A class representing the syntax of an instruction |
| |
| An instruction's syntax is specified in the YAML file by writing it out |
| with operand names surrounded by angle brackets. For example, a simple NOT |
| instruction might have a syntax of |
| |
| <dst>, <src> |
| |
| which should be interpreted as the following tokens: |
| |
| - Operand called 'dst' |
| - A literal ',' |
| - Operand called 'src' |
| |
| Between the tokens, whitespace is optional (so "x0 , x1" and "x0,x1" both |
| match the syntax above) unless a literal token is just a space, in which |
| case some whitespace is required. For example |
| |
| <dst> <src> |
| |
| would match "x0 x1" but not "x0x1". Whitespace within literal syntax tokens |
| means that some space is required, matching the regex \\s+. For example, |
| the (rather strange) syntax |
| |
| <dst> + - <src> |
| |
| would match "x0 + - x1" or "x0+ -x1", but not "x0 +- x1". |
| |
| Some operands (and surrounding syntax) might be optional. The optional |
| syntax is surrounded by square brackets. Nesting is not supported. For |
| example: |
| |
| <dst>, <src>[, <offset>] |
| |
| would match "x0, x1, 123" or "x0, x1". |
| |
| Note that a given syntax might be ambiguous. For example, |
| |
| <dst>, <src>[, <offset>][, <flavour>] |
| |
| With "x0, x1, 123", is 123 an offset or a flavour? (We choose not to embed |
| typing information into the syntax, because that results in very confusing |
| assembler error messages). We break ties in the same way as the underlying |
| regex engine, assigning the operand to the first group, so 123 is an offset |
| in this case. Such syntaxes are rather confusing though, so probably not a |
| good idea. |
| |
| The parsed syntax is stored as a list of "hunks". Each hunk contains a flag |
| showing whether the hunk is optional or required and also a list of |
| SyntaxToken objects. |
| |
| ''' |
| def __init__(self, |
| hunks: List[SyntaxHunk], |
| op_list: List[str], |
| op_set: Set[str]) -> None: |
| self.hunks = hunks |
| self.op_list = op_list |
| self.op_set = op_set |
| |
| @staticmethod |
| def from_list(operands: List[str]) -> 'InsnSyntax': |
| '''Smart constructor for a list of operands with "normal" syntax''' |
| if not operands: |
| return InsnSyntax([], [], set()) |
| |
| hunk = SyntaxHunk.from_list(operands) |
| return InsnSyntax([hunk], hunk.op_list, hunk.op_set) |
| |
| @staticmethod |
| def from_yaml(mnemonic: str, raw: str) -> 'InsnSyntax': |
| '''Parse the syntax in the YAML file''' |
| |
| # The raw syntax looks something like |
| # |
| # <op0>, <op1>[(<op2>)] |
| # |
| # to mean that you either have "x0, x1" or "x0, x2(x3)". First, split |
| # out the bracketed parts. |
| by_left = raw.split('[') |
| parts = [(False, by_left[0])] |
| for after_left in by_left[1:]: |
| split = after_left.split(']', 1) |
| if len(split) != 2: |
| raise ValueError('Unbalanced or nested [] in instruction ' |
| 'syntax for {!r}.' |
| .format(mnemonic)) |
| |
| parts += [(True, split[0]), (False, split[1])] |
| |
| # Now parts contains a list of pairs (required, txt) where txt is a |
| # hunk of the syntax and req is true if this hunk is required. A part |
| # might be empty. For example, "[a]b c[d]" with both lead and trail |
| # with an empty part. But it shouldn't be empty if it's marked |
| # optional: that would be something like "a[]b", which doesn't make |
| # much sense. |
| hunks = [] |
| for optional, raw in parts: |
| if raw: |
| hunks.append(SyntaxHunk.from_string(mnemonic, optional, raw)) |
| elif optional: |
| raise ValueError('Empty [] in instruction syntax for {!r}.' |
| .format(mnemonic)) |
| |
| # Collect up operands across the hunks |
| op_list = [] |
| op_set = set() |
| for hunk in hunks: |
| op_list += hunk.op_list |
| op_set |= hunk.op_set |
| |
| if len(op_list) != len(op_set): |
| raise ValueError('Instruction syntax for {!r} is not ' |
| 'linear in its operands.' |
| .format(mnemonic)) |
| |
| return InsnSyntax(hunks, op_list, op_set) |
| |
| def render_doc(self) -> str: |
| '''Return how this syntax should look in the documentation''' |
| return ''.join(hunk.render_doc() for hunk in self.hunks) |
| |
| def asm_pattern(self) -> Tuple[str, Dict[str, int]]: |
| '''Return a regex pattern and a group name map for this syntax''' |
| parts = [r'\s*'] |
| for hunk in self.hunks: |
| parts.append(hunk.asm_pattern()) |
| parts.append('$') |
| pattern = ''.join(parts) |
| |
| op_to_grp = {} |
| for idx, op in enumerate(self.op_list): |
| op_to_grp[op] = 1 + idx |
| |
| return (pattern, op_to_grp) |
| |
| def render_vals(self, |
| op_vals: Dict[str, int], |
| operands: Dict[str, Operand]) -> str: |
| '''Return an assembly listing for the given operand fields''' |
| parts = [] |
| for hunk in self.hunks: |
| parts.append(hunk.render_vals(op_vals, operands)) |
| return ''.join(parts) |
| |
| |
| class EncodingField: |
| '''A single element of an encoding's mapping''' |
| def __init__(self, |
| value: Union[BoolLiteral, str], |
| scheme_field: EncSchemeField) -> None: |
| self.value = value |
| self.scheme_field = scheme_field |
| |
| @staticmethod |
| def from_yaml(as_str: str, |
| scheme_field: EncSchemeField, |
| name_to_operand: Dict[str, Operand], |
| what: str) -> 'EncodingField': |
| # The value should either be a boolean literal ("000xx11" or similar) |
| # or should be a name, which is taken as the name of an operand. |
| if not as_str: |
| raise ValueError('Empty string as {}.'.format(what)) |
| |
| # Set self.value to be either the bool literal or the name of the |
| # operand. |
| value_width = None |
| value = '' # type: Union[BoolLiteral, str] |
| if re.match(r'b[01x_]+$', as_str): |
| value = BoolLiteral.from_string(as_str, what) |
| value_width = value.width |
| value_type = 'a literal value' |
| else: |
| operand = name_to_operand.get(as_str) |
| if operand is None: |
| raise ValueError('Unknown operand, {!r}, as {}' |
| .format(as_str, what)) |
| value_width = operand.op_type.width |
| value = as_str |
| value_type = 'an operand' |
| |
| # Unless we had an operand of type 'imm' (unknown width), we now have |
| # an expected width. Check it matches the width of the schema field. |
| if value_width is not None: |
| if scheme_field.bits.width != value_width: |
| raise ValueError('{} is mapped to {} with width {}, but the ' |
| 'encoding schema field has width {}.' |
| .format(what, value_type, value_width, |
| scheme_field.bits.width)) |
| |
| # Track the scheme field as well (so we don't have to keep track of a |
| # scheme once we've made an encoding object) |
| return EncodingField(value, scheme_field) |
| |
| |
| class Encoding: |
| '''The encoding for an instruction''' |
| def __init__(self, |
| yml: object, |
| schemes: EncSchemes, |
| name_to_operand: Dict[str, Operand], |
| mnemonic: str): |
| what = 'encoding for instruction {!r}'.format(mnemonic) |
| yd = check_keys(yml, what, ['scheme', 'mapping'], []) |
| |
| scheme_what = 'encoding scheme for instruction {!r}'.format(mnemonic) |
| scheme_name = check_str(yd['scheme'], scheme_what) |
| scheme_fields = schemes.resolve(scheme_name, mnemonic) |
| |
| what = 'encoding mapping for instruction {!r}'.format(mnemonic) |
| |
| # Check we've got exactly the right fields for the scheme |
| ydm = check_keys(yd['mapping'], what, list(scheme_fields.op_fields), []) |
| |
| # Track the set of operand names that were used in some field |
| operands_used = set() |
| |
| self.fields = {} |
| for field_name, scheme_field in scheme_fields.fields.items(): |
| if scheme_field.value is not None: |
| field = EncodingField(scheme_field.value, scheme_field) |
| else: |
| field_what = ('value for {} field in encoding for instruction {!r}' |
| .format(field_name, mnemonic)) |
| field = EncodingField.from_yaml(check_str(ydm[field_name], field_what), |
| scheme_fields.fields[field_name], |
| name_to_operand, |
| field_what) |
| |
| # If the field's value is an operand rather than a literal, it |
| # will have type str. Track the operands that we've used. |
| if isinstance(field.value, str): |
| operands_used.add(field.value) |
| |
| self.fields[field_name] = field |
| |
| # We know that every field in the encoding scheme has a value. But we |
| # still need to check that every operand ended up in some field. |
| assert operands_used <= set(name_to_operand.keys()) |
| unused_ops = set(name_to_operand.keys()) - operands_used |
| if unused_ops: |
| raise ValueError('Not all operands used in {} (missing: {}).' |
| .format(what, ', '.join(list(unused_ops)))) |
| |
| def get_masks(self) -> Tuple[int, int]: |
| '''Return zeros/ones masks for encoding |
| |
| Returns a pair (m0, m1) where m0 is the "zeros mask": a mask where a |
| bit is set if there is an bit pattern matching this encoding with that |
| bit zero. m1 is the ones mask: equivalent, but for that bit one. |
| |
| ''' |
| m0 = 0 |
| m1 = 0 |
| for field_name, field in self.fields.items(): |
| if isinstance(field.value, str): |
| m0 |= field.scheme_field.bits.mask |
| m1 |= field.scheme_field.bits.mask |
| else: |
| # Match up the bits in the value with the ranges in the scheme. |
| assert field.value.width > 0 |
| assert field.value.width == field.scheme_field.bits.width |
| bits_seen = 0 |
| for msb, lsb in field.scheme_field.bits.ranges: |
| val_msb = field.scheme_field.bits.width - 1 - bits_seen |
| val_lsb = val_msb - msb + lsb |
| bits_seen += msb - lsb + 1 |
| |
| for idx in range(0, msb - lsb + 1): |
| desc = field.value.char_for_bit(val_lsb + idx) |
| if desc in ['0', 'x']: |
| m0 |= 1 << (idx + lsb) |
| if desc in ['1', 'x']: |
| m1 |= 1 << (idx + lsb) |
| |
| all_bits = (1 << 32) - 1 |
| assert (m0 | m1) == all_bits |
| return (m0, m1) |
| |
| def get_ones_mask(self) -> int: |
| '''Return the mask of fixed bits that are set |
| |
| For literal values of x (unused bits in the encoding), we'll prefer |
| '0'. |
| |
| ''' |
| m0, m1 = self.get_masks() |
| return m1 & ~m0 |
| |
| def assemble(self, op_to_idx: Dict[str, int]) -> int: |
| '''Assemble an instruction |
| |
| op_to_idx should map each operand in the encoding to some integer |
| index, which should be small enough to fit in the width of the |
| operand's type and should be representable after any shift. Will raise |
| a ValueError if not. |
| |
| ''' |
| val = self.get_ones_mask() |
| for field_name, field in self.fields.items(): |
| if not isinstance(field.value, str): |
| # We've done this field already (in get_ones_mask) |
| continue |
| |
| # Try to get the operand value for the field. If this is an |
| # optional operand, we might not have one, and just encode zero. |
| field_val = op_to_idx.get(field.value, 0) |
| |
| # Are there any low bits that shouldn't be there? |
| shift_mask = (1 << field.scheme_field.shift) - 1 |
| if field_val & shift_mask: |
| raise ValueError("operand field {} has a shift of {}, " |
| "so can't represent the value {:#x}." |
| .format(field.value, |
| field.scheme_field.shift, |
| field_val)) |
| |
| shifted = field_val >> field.scheme_field.shift |
| |
| # Is the number too big? At the moment, we are assuming immediates |
| # are unsigned (because the OTBN big number instructions all have |
| # unsigned immediates). |
| if shifted >> field.scheme_field.bits.width: |
| shift_msg = ((' (shifted right by {} bits from {:#x})' |
| .format(field.scheme_field.shift, field_val)) |
| if field.scheme_field.shift |
| else '') |
| raise ValueError("operand field {} has a width of {}, " |
| "so can't represent the value {:#x}{}." |
| .format(field.value, |
| field.scheme_field.bits.width, |
| shifted, shift_msg)) |
| |
| val |= field.scheme_field.bits.encode(shifted) |
| |
| return val |
| |
| |
| class Insn: |
| def __init__(self, |
| yml: object, |
| groups: InsnGroups, |
| encoding_schemes: EncSchemes) -> None: |
| yd = check_keys(yml, 'instruction', |
| ['mnemonic', 'operands'], |
| ['group', 'rv32i', 'synopsis', |
| 'syntax', 'doc', 'note', 'trailing-doc', |
| 'decode', 'operation', 'encoding', 'glued-ops']) |
| |
| self.mnemonic = check_str(yd['mnemonic'], 'mnemonic for instruction') |
| |
| what = 'instruction with mnemonic {!r}'.format(self.mnemonic) |
| self.operands = [Operand(y, self.mnemonic) |
| for y in check_list(yd['operands'], |
| 'operands for ' + what)] |
| self.name_to_operand = index_list('operands for ' + what, |
| self.operands, |
| lambda op: op.name) |
| |
| raw_group = get_optional_str(yd, 'group', what) |
| self.group = groups.default_group() if raw_group is None else raw_group |
| |
| if self.group not in groups.key_to_group: |
| raise ValueError('Unknown instruction group, {!r}, ' |
| 'for mnemonic {!r}.' |
| .format(self.group, self.mnemonic)) |
| |
| self.rv32i = check_bool(yd.get('rv32i', False), |
| 'rv32i flag for ' + what) |
| self.glued_ops = check_bool(yd.get('glued-ops', False), |
| 'glued-ops flag for ' + what) |
| self.synopsis = get_optional_str(yd, 'synopsis', what) |
| self.doc = get_optional_str(yd, 'doc', what) |
| self.note = get_optional_str(yd, 'note', what) |
| self.trailing_doc = get_optional_str(yd, 'trailing-doc', what) |
| self.decode = get_optional_str(yd, 'decode', what) |
| self.operation = get_optional_str(yd, 'operation', what) |
| |
| raw_syntax = get_optional_str(yd, 'syntax', what) |
| if raw_syntax is not None: |
| self.syntax = InsnSyntax.from_yaml(self.mnemonic, |
| raw_syntax.strip()) |
| else: |
| self.syntax = InsnSyntax.from_list([op.name |
| for op in self.operands]) |
| |
| pattern, op_to_grp = self.syntax.asm_pattern() |
| self.asm_pattern = re.compile(pattern) |
| self.pattern_op_to_grp = op_to_grp |
| |
| # Make sure we have exactly the operands we expect. |
| if set(self.name_to_operand.keys()) != self.syntax.op_set: |
| raise ValueError("Operand syntax for {!r} doesn't have the " |
| "same list of operands as given in the " |
| "operand list. The syntax uses {}, " |
| "but the list of operands gives {}." |
| .format(self.mnemonic, |
| list(sorted(self.syntax.op_set)), |
| list(sorted(self.name_to_operand)))) |
| |
| encoding_yml = yd.get('encoding') |
| self.encoding = None |
| if encoding_yml is not None: |
| self.encoding = Encoding(encoding_yml, encoding_schemes, |
| self.name_to_operand, self.mnemonic) |
| |
| |
| def find_ambiguous_encodings(insns: List[Insn]) -> List[Tuple[str, str, int]]: |
| '''Check for ambiguous instruction encodings |
| |
| Returns a list of ambiguous pairs (mnemonic0, mnemonic1, bits) where |
| bits is a bit pattern that would match either instruction. |
| |
| ''' |
| masks = {} |
| for insn in insns: |
| if insn.encoding is not None: |
| masks[insn.mnemonic] = insn.encoding.get_masks() |
| |
| ret = [] |
| for mnem0, mnem1 in itertools.combinations(masks.keys(), 2): |
| m00, m01 = masks[mnem0] |
| m10, m11 = masks[mnem1] |
| |
| # The pair of instructions is ambiguous if a bit pattern might be |
| # either instruction. That happens if each bit index is either |
| # allowed to be a 0 in both or allowed to be a 1 in both. |
| # ambiguous_mask is the set of bits that don't distinguish the |
| # instructions from each other. |
| m0 = m00 & m10 |
| m1 = m01 & m11 |
| |
| ambiguous_mask = m0 | m1 |
| if ambiguous_mask == (1 << 32) - 1: |
| ret.append((mnem0, mnem1, m1 & ~m0)) |
| |
| return ret |
| |
| |
| class InsnsFile: |
| def __init__(self, yml: object) -> None: |
| yd = check_keys(yml, 'top-level', |
| ['insn-groups', 'encoding-schemes', 'insns'], |
| []) |
| |
| self.groups = InsnGroups(yd['insn-groups']) |
| self.encoding_schemes = EncSchemes(yd['encoding-schemes']) |
| self.insns = [Insn(i, self.groups, self.encoding_schemes) |
| for i in check_list(yd['insns'], 'insns')] |
| self.mnemonic_to_insn = index_list('insns', self.insns, |
| lambda insn: insn.mnemonic.lower()) |
| |
| ambiguous_encodings = find_ambiguous_encodings(self.insns) |
| if ambiguous_encodings: |
| ambiguity_msgs = [] |
| for mnem0, mnem1, bits in ambiguous_encodings: |
| ambiguity_msgs.append('{!r} and {!r} ' |
| 'both match bit pattern {:#010x}' |
| .format(mnem0, mnem1, bits)) |
| raise ValueError('Ambiguous instruction encodings: ' + |
| ', '.join(ambiguity_msgs)) |
| |
| def grouped_insns(self) -> List[Tuple[InsnGroup, List[Insn]]]: |
| '''Return the instructions in groups''' |
| grp_to_insns = {} # type: Dict[str, List[Insn]] |
| for insn in self.insns: |
| grp_to_insns.setdefault(insn.group, []).append(insn) |
| |
| ret = [] |
| for grp in self.groups.groups: |
| ret.append((grp, grp_to_insns.get(grp.key, []))) |
| |
| # We should have picked up all the instructions, because we checked |
| # that each instruction has a valid group in the Insn constructor. Just |
| # in case something went wrong, check that the counts match. |
| gti_count = sum(len(insns) for insns in grp_to_insns.values()) |
| ret_count = sum(len(insns) for _, insns in ret) |
| assert ret_count == gti_count |
| |
| return ret |
| |
| |
| def load_file(path: str) -> InsnsFile: |
| '''Load the YAML file at path. |
| |
| Raises a RuntimeError on syntax or schema error. |
| |
| ''' |
| try: |
| with open(path, 'r') as handle: |
| return InsnsFile(yaml.load(handle, Loader=yaml.SafeLoader)) |
| except FileNotFoundError: |
| raise RuntimeError('Cannot find YAML file at {!r}.' |
| .format(path)) from None |
| except yaml.YAMLError as err: |
| raise RuntimeError('Failed to parse YAML file at {!r}: {}' |
| .format(path, err)) from None |
| except ValueError as err: |
| raise RuntimeError('Invalid schema in YAML file at {!r}: {}' |
| .format(path, err)) from None |