blob: a846ec9fb297a079dc462c335d3488f856777cb3 [file] [log] [blame]
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
'''Support code for reading the instruction database in insns.yml'''
import itertools
import os
import re
from typing import Dict, List, Optional, Tuple, cast
from .encoding import Encoding
from .encoding_scheme import EncSchemes
from .lsu_desc import LSUDesc
from .operand import Operand
from .syntax import InsnSyntax
from .yaml_parse_helpers import (check_keys, check_str, check_bool,
check_list, index_list, get_optional_str,
load_yaml)
class Insn:
def __init__(self,
yml: object,
encoding_schemes: Optional[EncSchemes]) -> None:
yd = check_keys(yml, 'instruction',
['mnemonic', 'operands'],
['group', 'rv32i', 'synopsis',
'syntax', 'doc', 'note', 'trailing-doc',
'encoding', 'glued-ops',
'literal-pseudo-op', 'python-pseudo-op', 'lsu',
'straight-line'])
self.mnemonic = check_str(yd['mnemonic'], 'mnemonic for instruction')
what = 'instruction with mnemonic {!r}'.format(self.mnemonic)
encoding_yml = yd.get('encoding')
self.encoding = None
if encoding_yml is not None:
if encoding_schemes is None:
raise ValueError('{} specifies an encoding, but the file '
'didn\'t specify any encoding schemes.'
.format(what))
self.encoding = Encoding(encoding_yml,
encoding_schemes, self.mnemonic)
self.operands = [Operand(y, self.mnemonic, self.encoding)
for y in check_list(yd['operands'],
'operands for ' + what)]
self.name_to_operand = index_list('operands for ' + what,
self.operands,
lambda op: op.name)
# The call to index_list has checked that operand names are distinct.
# We also need to check that no operand abbreviation clashes with
# anything else.
operand_names = set(self.name_to_operand.keys())
for op in self.operands:
if op.abbrev is not None:
if op.abbrev in operand_names:
raise ValueError('The name {!r} appears as an operand or '
'abbreviation more than once for '
'instruction {!r}.'
.format(op.abbrev, self.mnemonic))
operand_names.add(op.abbrev)
if self.encoding is not None:
# If we have an encoding, we passed it to the Operand constructors
# above. This ensured that each operand has a field. However, it's
# possible that there are some operand names the encoding mentions
# that don't actually have an operand. Check for that here.
missing_ops = (set(self.encoding.op_to_field_name.keys()) -
set(self.name_to_operand.keys()))
if missing_ops:
raise ValueError('Encoding scheme for {} specifies '
'some non-existent operands: {}.'
.format(what, ', '.join(list(missing_ops))))
self.rv32i = check_bool(yd.get('rv32i', False),
'rv32i flag for ' + what)
self.glued_ops = check_bool(yd.get('glued-ops', False),
'glued-ops flag for ' + what)
self.synopsis = get_optional_str(yd, 'synopsis', what)
self.doc = get_optional_str(yd, 'doc', what)
self.note = get_optional_str(yd, 'note', what)
self.trailing_doc = get_optional_str(yd, 'trailing-doc', what)
raw_syntax = get_optional_str(yd, 'syntax', what)
if raw_syntax is not None:
self.syntax = InsnSyntax.from_yaml(self.mnemonic,
raw_syntax.strip())
else:
self.syntax = InsnSyntax.from_list([op.name
for op in self.operands])
pattern, op_to_grp = self.syntax.asm_pattern()
self.asm_pattern = re.compile(pattern)
self.pattern_op_to_grp = op_to_grp
# Make sure we have exactly the operands we expect.
if set(self.name_to_operand.keys()) != self.syntax.op_set:
raise ValueError("Operand syntax for {!r} doesn't have the "
"same list of operands as given in the "
"operand list. The syntax uses {}, "
"but the list of operands gives {}."
.format(self.mnemonic,
list(sorted(self.syntax.op_set)),
list(sorted(self.name_to_operand))))
self.python_pseudo_op = check_bool(yd.get('python-pseudo-op', False),
'python-pseudo-op flag for ' + what)
if self.python_pseudo_op and self.encoding is not None:
raise ValueError('{} specifies an encoding and also sets '
'python-pseudo-op.'.format(what))
lpo = yd.get('literal-pseudo-op')
if lpo is None:
self.literal_pseudo_op = None
else:
lpo_lst = check_list(lpo, 'literal-pseudo-op flag for ' + what)
for idx, item in enumerate(lpo_lst):
if not isinstance(item, str):
raise ValueError('Item {} of literal-pseudo-op list for '
'{} is {!r}, which is not a string.'
.format(idx, what, item))
self.literal_pseudo_op = cast(Optional[List[str]], lpo_lst)
if self.python_pseudo_op:
raise ValueError('{} specifies both python-pseudo-op and '
'literal-pseudo-op.'
.format(what))
if self.encoding is not None:
raise ValueError('{} specifies both an encoding and '
'literal-pseudo-op.'
.format(what))
lsu_yaml = yd.get('lsu', None)
if lsu_yaml is None:
self.lsu = None
else:
self.lsu = LSUDesc.from_yaml(lsu_yaml,
'lsu field for {}'.format(what))
for idx, op_name in enumerate(self.lsu.target):
if op_name not in self.name_to_operand:
raise ValueError('element {} of the target for the lsu '
'field for {} is {!r}, which is not a '
'operand name of the instruction.'
.format(idx, what, op_name))
self.straight_line = yd.get('straight-line', True)
def enc_vals_to_op_vals(self,
cur_pc: int,
enc_vals: Dict[str, int]) -> Dict[str, int]:
'''Convert values extracted from an encoding to their logical values
This converts between "encoded values" and "operand values" (as defined
in the OperandType class).
The enc_vals dictionary should be keyed by the instruction's operand
names (guaranteed by Encoding.extract_operands). This function should
only be called when every operand has a width (which will definitely be
the case if we just decoded these values from an instruction word).
'''
op_vals = {}
for op_name, enc_val in enc_vals.items():
op_type = self.name_to_operand[op_name].op_type
op_val = op_type.enc_val_to_op_val(enc_val, cur_pc)
# This assertion should hold because OperandType.enc_val_to_op_val
# doesn't return None if the operand type has a width and the
# function is given a PC.
assert op_val is not None
op_vals[op_name] = op_val
return op_vals
def disassemble(self,
cur_pc: int,
op_vals: Dict[str, int]) -> str:
'''Return disassembly for this instruction
op_vals should be a dictionary mapping operand names to operand values
(not encoded values). mnem_width is the width to pad the mnemonic to.
'''
hunks = self.syntax.render(cur_pc, op_vals, self.name_to_operand)
mnem = self.mnemonic
if hunks and self.glued_ops:
mnem += hunks[0] + ' '
hunks = hunks[1:]
else:
mnem += ' '
if len(mnem) < 15:
mnem += ' ' * (15 - len(mnem))
# The lstrip here deals with a tricky corner case for instructions like
# bn.mulqacc if the .z option isn't supplied. In that case, the syntax
# for the operands starts with a space (following the optional .z that
# isn't there) and would mess up our alignment.
return mnem + ''.join(hunks).lstrip()
class DummyInsn(Insn):
'''A dummy instruction that will never be decoded.
This shouldn't appear in an InsnGroup or InsnsFile, but can be handy when
you have an object that wraps an instruction but need to easily handle the
case of a bogus encoding.
'''
def __init__(self) -> None:
fake_yml = {
'mnemonic': 'dummy-insn',
'operands': []
}
super().__init__(fake_yml, None)
class InsnGroup:
def __init__(self,
path: str,
encoding_schemes: Optional[EncSchemes],
yml: object) -> None:
yd = check_keys(yml, 'insn-group',
['key', 'title', 'doc', 'insns'], [])
self.key = check_str(yd['key'], 'insn-group key')
self.title = check_str(yd['title'], 'insn-group title')
self.doc = check_str(yd['doc'], 'insn-group doc')
insns_what = 'insns field for {!r} instruction group'.format(self.key)
insns_rel_path = check_str(yd['insns'], insns_what)
insns_path = os.path.normpath(os.path.join(os.path.dirname(path),
insns_rel_path))
insns_yaml = load_yaml(insns_path, insns_what)
try:
self.insns = [Insn(i, encoding_schemes)
for i in check_list(insns_yaml, insns_what)]
except ValueError as err:
raise RuntimeError('Invalid schema in YAML file at {!r}: {}'
.format(insns_path, err)) from None
class InsnGroups:
def __init__(self,
path: str,
encoding_schemes: Optional[EncSchemes],
yml: object) -> None:
self.groups = [InsnGroup(path, encoding_schemes, y)
for y in check_list(yml, 'insn-groups')]
if not self.groups:
raise ValueError('Empty list of instruction groups: '
'we need at least one as a base group.')
self.key_to_group = index_list('insn-groups',
self.groups, lambda ig: ig.key)
class InsnsFile:
def __init__(self, path: str, yml: object) -> None:
yd = check_keys(yml, 'top-level',
['insn-groups'],
['encoding-schemes'])
enc_scheme_path = get_optional_str(yd, 'encoding-schemes', 'top-level')
if enc_scheme_path is None:
self.encoding_schemes = None
else:
src_dir = os.path.dirname(path)
es_path = os.path.normpath(os.path.join(src_dir, enc_scheme_path))
es_yaml = load_yaml(es_path, 'encoding schemes')
try:
self.encoding_schemes = EncSchemes(es_yaml)
except ValueError as err:
raise RuntimeError('Invalid schema in YAML file at {!r}: {}'
.format(es_path, err)) from None
self.groups = InsnGroups(path,
self.encoding_schemes,
yd['insn-groups'])
# The instructions are grouped by instruction group and stored in
# self.groups. Most of the time, however, we just want "an OTBN
# instruction" and don't care about the group. Retrieve them here.
self.insns = []
for grp in self.groups.groups:
self.insns += grp.insns
self.mnemonic_to_insn = index_list('insns', self.insns,
lambda insn: insn.mnemonic.lower())
masks_exc, ambiguities = self._get_masks()
if ambiguities:
raise ValueError('Ambiguous instruction encodings: ' +
', '.join(ambiguities))
self._masks = masks_exc
def grouped_insns(self) -> List[Tuple[InsnGroup, List[Insn]]]:
'''Return the instructions in groups'''
return [(grp, grp.insns) for grp in self.groups.groups]
def _get_masks(self) -> Tuple[Dict[str, Tuple[int, int]], List[str]]:
'''Generate a list of zeros/ones masks and do ambiguity checks
Returns a pair (masks, ambiguities). Masks is keyed by instruction
mnemonic. Its elements are pairs (m0, m1) where m0 is the bits that are
always zero for this instruction's in the encoding and m1 is the bits
that are always one. (Bits that can be either are not set in m0 or m1).
ambiguities is a list of error messages describing ambiguities in the
encoding. Unless something has gone wrong, it should be empty.
'''
masks_inc = {}
masks_exc = {}
for insn in self.insns:
if insn.encoding is not None:
m0, m1 = insn.encoding.get_masks()
masks_inc[insn.mnemonic] = (m0, m1)
masks_exc[insn.mnemonic] = (m0 & ~m1, m1 & ~m0)
ambiguities = []
for mnem0, mnem1 in itertools.combinations(masks_inc.keys(), 2):
m00, m01 = masks_inc[mnem0]
m10, m11 = masks_inc[mnem1]
# The pair of instructions is ambiguous if a bit pattern might be
# either instruction. That happens if each bit index is either
# allowed to be a 0 in both or allowed to be a 1 in both.
# ambiguous_mask is the set of bits that don't distinguish the
# instructions from each other.
m0 = m00 & m10
m1 = m01 & m11
ambiguous_mask = m0 | m1
if ambiguous_mask == (1 << 32) - 1:
ambiguities.append('{!r} and {!r} '
'both match bit pattern {:#010x}'
.format(mnem0, mnem1, m1 & ~m0))
return (masks_exc, ambiguities)
def mnem_for_word(self, word: int) -> Optional[str]:
'''Find the instruction that could be encoded as word
If there is no such instruction, return None.
'''
ret = None
for mnem, (m0, m1) in self._masks.items():
# If any bit is set that should be zero or if any bit is clear that
# should be one, ignore this instruction.
if word & m0 or (~ word) & m1:
continue
# Belt-and-braces ambiguity check
assert ret is None
ret = mnem
return ret
def load_file(path: str) -> InsnsFile:
'''Load the YAML file at path.
Raises a RuntimeError on syntax or schema error.
'''
try:
return InsnsFile(path, load_yaml(path, None))
except ValueError as err:
raise RuntimeError('Invalid schema in YAML file at {!r}: {}'
.format(path, err)) from None
_DEFAULT_INSNS_FILE = None # type: Optional[InsnsFile]
def load_insns_yaml() -> InsnsFile:
'''Load the insns.yml file from its default location.
Caches its result. Raises a RuntimeError on syntax or schema error.
'''
global _DEFAULT_INSNS_FILE
if _DEFAULT_INSNS_FILE is None:
dirname = os.path.dirname(__file__)
rel_path = os.path.join('..', '..', 'data', 'insns.yml')
insns_yml = os.path.normpath(os.path.join(dirname, rel_path))
_DEFAULT_INSNS_FILE = load_file(insns_yml)
return _DEFAULT_INSNS_FILE