| #!/usr/bin/env python3 |
| # Copyright lowRISC contributors. |
| # Licensed under the Apache License, Version 2.0, see LICENSE for details. |
| # SPDX-License-Identifier: Apache-2.0 |
| '''A wrapper around riscv32-unknown-elf-as for OTBN |
| |
| Partial support: |
| |
| - This doesn't currently support .include directives fully (the included file |
| will not be transformed, so OTBN instructions won't work there). |
| |
| - .file support assumes we're not using DWARF2 file numbers. |
| |
| - Operands may not have embedded spaces or commas. Complicated immediate |
| expressions are not currently supported. |
| |
| ''' |
| |
| import os |
| import re |
| import subprocess |
| import sys |
| import tempfile |
| from typing import Dict, List, Optional, Set, TextIO, Tuple |
| |
| from shared.bit_ranges import BitRanges |
| from shared.encoding import Encoding |
| from shared.insn_yaml import Insn, InsnsFile, load_insns_yaml |
| from shared.operand import ImmOperandType, Operand, RegOperandType |
| from shared.toolchain import find_tool |
| |
| |
| class RVFmt: |
| '''A simple representation of a format supported by .insn |
| |
| The internal representation has the list of operand names (self.operands). |
| These are keys for self.op_data, whose entries are tuples (fmt, shift, |
| ranges). |
| |
| Later, the assembler will need to look up fields by bit. The self.bit_to_op |
| attribute is a list of length 32 (indexed by bit) whose items are tuples |
| (msb, lsb, op_name, shift). Such a tuple means "the range of bits |
| {msb,...lsb} appears in the operand called op_name and its LSB there is |
| shift". |
| |
| To make these easy to write down, we parse a textual format which describes |
| the bit fields and then the .insn syntax. This is a sort of "inside out" |
| version of the BitRanges format, but more closely matches the RISC-V |
| documentation, so should be easier to write by hand. |
| |
| The bit fields should be a list of strings of the form NAME:TYPE. NAME is |
| the name of the field which is just used to match up with the syntax. TYPE |
| is one of the following: |
| |
| f<n>: An n-bit literal value. |
| r: A 5-bit register name. |
| i<n>: An n-bit signed immediate |
| u<n>: An n-bit unsigned immediate |
| |
| The syntax should be a list of NAMEs. When an immediate in the encoding is |
| split into multiple fields, they can be recombined here with a pipe symbol. |
| So "foo|bar" means that the operand is split into the bit fields called |
| "foo" and "bar" (MSB first). If the field is shifted (i.e. the assembly |
| syntax N gets encoded as N >> S), the shift can be specified by a <<n |
| suffix. |
| |
| ''' |
| |
| def __init__(self, name: str, bitfields: List[str], |
| syntax: List[str]) -> None: |
| self.name = name |
| self.operands = syntax |
| self.op_data = {} # type: Dict[str, Tuple[str, int, BitRanges]] |
| |
| # First, expand the bitfield types to a dict mapping name to (fmt, msb, |
| # lsb). Here, and below, we use assertions for error checking: the |
| # inputs are from the code, so we don't need proper error messages. |
| name_to_triple = {} |
| msb = 31 |
| for field in bitfields: |
| name, fmt_width = field.split(':') |
| fmt = fmt_width[0] |
| assert fmt in ['f', 'r', 'i', 'u'] |
| width = 5 if fmt == 'r' else int(fmt_width[1:]) |
| |
| assert name not in name_to_triple |
| lsb = msb - width + 1 |
| |
| name_to_triple[name] = (fmt, msb, lsb) |
| msb -= width |
| |
| assert msb == -1 |
| |
| fields_used = set() # type: Set[str] |
| b2o_dict = {} |
| |
| for operand in syntax: |
| op_fmt = None |
| op_ranges = [] |
| |
| op_shift = 0 |
| shift_idx = operand.find('<<') |
| unshifted_name = operand |
| if shift_idx >= 0: |
| op_shift = int(operand[shift_idx + 2:]) |
| unshifted_name = operand[:shift_idx] |
| |
| # Work through the fields LSB-first (so that we can figure out the |
| # shifts despite not knowing the width of the operand in advance). |
| field_lsb = 0 |
| for field_name in reversed(unshifted_name.split('|')): |
| assert field_name not in fields_used |
| fields_used.add(field_name) |
| assert field_name in name_to_triple |
| fmt, msb, lsb = name_to_triple[field_name] |
| |
| assert op_fmt is None or fmt == op_fmt |
| op_fmt = fmt |
| |
| op_ranges.append((msb, lsb)) |
| |
| b2o_tuple = (msb, lsb, operand, op_shift + field_lsb) |
| for bit in range(lsb, msb + 1): |
| assert bit not in b2o_dict |
| b2o_dict[bit] = b2o_tuple |
| |
| field_lsb += msb - lsb + 1 |
| |
| # Reverse op_ranges again, because we want them MSB-first when we |
| # call the BitRanges constructor. |
| op_ranges.reverse() |
| |
| assert operand not in self.op_data |
| assert op_fmt is not None |
| self.op_data[operand] = (op_fmt, op_shift, |
| BitRanges.from_list(op_ranges)) |
| |
| assert len(b2o_dict) == 32 |
| |
| # We've checked that we didn't use any fields twice. Now, make sure |
| # that each field is actually associated with (part of) an operand in |
| # the syntax. |
| assert len(fields_used) == len(name_to_triple) |
| |
| self.bit_to_op = [b2o_dict[bit] for bit in range(32)] |
| |
| |
| def rv_render(fmt: str, num: int) -> str: |
| '''Render a number as expected by a RISC-V .insn field''' |
| if fmt in ['f', 'i', 'u']: |
| return '{:#x}'.format(num) |
| |
| assert fmt == 'r' |
| return 'x{}'.format(num) |
| |
| |
| # A _PartFieldEncoding is a list with items: |
| # |
| # ((field_msb, field_lsb), rv_name, rv_lsb) |
| # |
| # This means: "The range of bits, {field_msb..field_lsb}, from the instruction |
| # field should be written in the .insn line by putting them in the operand |
| # called rv_name after shifting left by rv_lsb." |
| _PartFieldEncoding = List[Tuple[Tuple[int, int], str, int]] |
| |
| |
| class RVEncoding: |
| '''A mapping from an Encoding to a RVFmt |
| |
| If we have one of these, we can use it to express the use of an instruction |
| that has the given encoding as a .insn line (with the given RVFmt). |
| |
| ''' |
| |
| def __init__(self, encoding: Encoding, rvfmt: RVFmt, |
| rv_masks: Dict[str, int], rv_to_full_op: Dict[str, str], |
| part_field_to_rv: Dict[str, _PartFieldEncoding]) -> None: |
| self.encoding = encoding |
| self.rvfmt = rvfmt |
| self.rv_masks = rv_masks |
| self.rv_to_full_op = rv_to_full_op |
| self.part_field_to_rv = part_field_to_rv |
| |
| |
| RISCV_FORMATS = [ |
| RVFmt('r', ['func7:f7', 'rs2:r', 'rs1:r', 'func3:f3', 'rd:r', 'opcode:f7'], |
| ['opcode', 'func3', 'func7', 'rd', 'rs1', 'rs2']), |
| RVFmt('r4', [ |
| 'rs3:r', 'func2:f2', 'rs2:r', 'rs1:r', 'func3:f3', 'rd:r', 'opcode:f7' |
| ], ['opcode', 'func3', 'func2', 'rd', 'rs1', 'rs2', 'rs3']), |
| RVFmt('i', ['simm12:i12', 'rs1:r', 'func3:f3', 'rd:r', 'opcode:f7'], |
| ['opcode', 'func3', 'rd', 'rs1', 'simm12']), |
| RVFmt('s', |
| ['imm0:i7', 'rs2:r', 'rs1:r', 'func3:f3', 'imm1:i5', 'opcode:f7'], |
| ['opcode', 'func3', 'rs2', 'rs1', 'imm0|imm1']), |
| RVFmt('sb', [ |
| 'imm12:i1', 'imm105:i6', 'rs2:r', 'rs1:r', 'func3:f3', 'imm41:i4', |
| 'imm11:i1', 'opcode:f7' |
| ], ['opcode', 'func3', 'rs2', 'rs1', 'imm12|imm11|imm105|imm41<<1']), |
| |
| # The "U" format is used for LUI. The immediate operand gives imm[31:12] |
| # for some imm. Confusingly, the spec implies that this is a signed |
| # immediate, probably because it *is* sign-extended for the 64-bit |
| # architecture. For a 32-bit architecture, the top bit is the sign, so |
| # there is no sign extension to be done. The binutils assembler rejects |
| # things like "lui x1, -1", treating the immediate as unsigned. |
| RVFmt('u', ['imm20:u20', 'rd:r', 'opcode:f7'], ['opcode', 'rd', 'imm20']), |
| RVFmt('j', |
| ['i20:i1', 'i101:i10', 'i11:i1', 'i1912:i8', 'rd:r', 'opcode:f7'], |
| ['opcode', 'rd', 'i20|i1912|i11|i101<<1']) |
| ] |
| |
| |
| def find_rv_encoding(enc: Encoding, name_to_operand: Dict[str, Operand], |
| rvfmt: RVFmt) -> Optional[RVEncoding]: |
| '''Try to find an RVEncoding that expresses enc with rvfmt''' |
| |
| # A map from RV field to operand name. It holds operands that fit |
| # completely in a field and don't need any recoding. |
| rv_field_to_op = {} # type: Dict[str, str] |
| full_ops = set() |
| |
| # When we assemble an instruction, it will look something this: |
| # |
| # opcode r0, r1, 1, some_symbol |
| # |
| # Operands that take immediates (the "1" and "some_symbol" in the example) |
| # are complicated, and we want to make sure that they correspond to an |
| # immediate field in the encoding, so we can pass them through |
| # syntactically and leave the GNU assembler to do the work. |
| # |
| # Operands that take registers (or options, or enums) are much simpler: we |
| # can always get the integer value from the syntax. These, we can splat all |
| # over a .insn line without having to worry. |
| # |
| # To work out whether an encoding is possible, we first go through fields |
| # with "difficult" immediate operands. These need to map exactly to a field |
| # in the RISC-V format. |
| for field_name, field in enc.fields.items(): |
| if not isinstance(field.value, str): |
| continue |
| |
| op_type = name_to_operand[field.value].op_type |
| if op_type.syntax_determines_value(): |
| continue |
| |
| # We only support immediate operands (ImmOperandType) below here, |
| # because we need to extract the "signed" field. If this instruction |
| # uses some weird new operand type whose syntax doesn't determine its |
| # value, we'll have to give up. |
| if not isinstance(op_type, ImmOperandType): |
| return None |
| |
| assert field.value not in full_ops |
| full_ops.add(field.value) |
| |
| # Try to find a non-fixed field in rvfmt with the same bit ranges as |
| # those in the encoding field. |
| match = None |
| for rv_name, (fmt, shift, rv_bits) in rvfmt.op_data.items(): |
| if fmt == 'f': |
| continue |
| if field.scheme_field.bits == rv_bits: |
| match = (fmt, shift) |
| break |
| |
| # If we didn't find a field in rvfmt for this field, we've failed. |
| if match is None: |
| return None |
| |
| fmt, shift = match |
| |
| # These "difficult" operands can only be encoded in immediate fields of |
| # the correct shift and sign. |
| if shift != op_type.shift: |
| return None |
| if fmt == 'i': |
| if not op_type.signed: |
| return None |
| elif fmt == 'u': |
| if op_type.signed: |
| return None |
| else: |
| return None |
| |
| rv_field_to_op[rv_name] = field.value |
| |
| # We've dealt with any difficult operands and, if we got to here, we know |
| # we're going to be able to make an .insn encoding for this instruction. |
| # Now, we need to deal with all the other instruction fields. We accumulate |
| # literal values from into a set of fixed masks (indexed by RV field name) |
| # and take ranges from the other operands, describing how they should be |
| # mapped to RV fields. |
| # |
| # We construct a dictionary of partial field mappings, keyed by encoding |
| # field. The value at a field is a _PartFieldEncoding (see comment above |
| # the definition there). |
| partial_fields = {} |
| |
| for field_name, field in enc.fields.items(): |
| if isinstance(field.value, str): |
| # An operand. Have we dealt with this already? |
| if field.value in full_ops: |
| continue |
| |
| operand = name_to_operand[field.value] |
| assert operand.op_type.syntax_determines_value() |
| |
| scheme_field = field.scheme_field |
| items = [] |
| |
| # If not, look at the bit ranges in the instruction encoding and |
| # work out how to map them to RV fields. |
| bits_taken = 0 |
| for msb, lsb in scheme_field.bits.ranges: |
| while msb >= lsb: |
| rv_msb, rv_lsb, rv_op, rv_shift = rvfmt.bit_to_op[msb] |
| |
| # Intersect {msb..lsb} with {rv_msb..rv_lsb} to give the range |
| # of bits we're covering in the eventual encoding. |
| enc_msb = min(msb, rv_msb) |
| enc_lsb = max(lsb, rv_lsb) |
| enc_width = enc_msb - enc_lsb + 1 |
| |
| # Since we looked up by msb, at least that point should be |
| # in the range. |
| assert enc_lsb <= msb <= enc_msb |
| |
| # {field_msb..field_lsb} is the range of bits from the |
| # instruction field that are used. |
| field_msb = scheme_field.bits.width - 1 - bits_taken |
| field_lsb = field_msb - enc_width + 1 |
| assert 0 <= field_lsb <= field_msb <= scheme_field.bits.width |
| |
| # Finally, {rv_msb..rv_lsb} is the range of bits in the .insn |
| # operand that we'll write. |
| rv_lsb = rv_shift + (enc_lsb - rv_lsb) |
| |
| items.append(((field_msb, field_lsb), rv_op, rv_lsb)) |
| |
| msb = enc_lsb - 1 |
| bits_taken += enc_width |
| |
| partial_fields[field_name] = items |
| |
| # Now we work through the fields (yet) again. For any fixed field, we can |
| # calculate fixed values that will always be written. rv_masks is keyed by |
| # rv operand name and contains those fixed values. part_field_to_rv_field |
| # is everything else (with the same format as partial_fields) |
| rv_masks = {} |
| for rv_op in rvfmt.operands: |
| rv_masks[rv_op] = 0 |
| |
| part_field_to_rv_field = {} |
| |
| for field_name, pf_data in partial_fields.items(): |
| field = enc.fields[field_name] |
| if isinstance(field.value, str): |
| part_field_to_rv_field[field_name] = pf_data |
| continue |
| |
| for (field_msb, field_lsb), rv_name, rv_lsb in pf_data: |
| for field_bit in range(field_lsb, field_msb + 1): |
| is_one = field.value.char_for_bit(field_bit) == '1' |
| if is_one: |
| rv_bit = field_bit - field_lsb + rv_lsb |
| rv_masks[rv_name] |= 1 << rv_bit |
| |
| return RVEncoding(enc, rvfmt, rv_masks, rv_field_to_op, |
| part_field_to_rv_field) |
| |
| |
| def find_insn_schemes(mnem_to_insn: Dict[str, Insn]) -> Dict[str, RVEncoding]: |
| '''Try to find a .insn scheme for each instruction''' |
| ret = {} |
| for mnem, insn in mnem_to_insn.items(): |
| # We definitely aren't going to manage it if we have no encoding |
| if insn.encoding is None: |
| continue |
| |
| for rvfmt in RISCV_FORMATS: |
| rve = find_rv_encoding(insn.encoding, insn.name_to_operand, rvfmt) |
| if rve is not None: |
| ret[mnem] = rve |
| break |
| return ret |
| |
| |
| def parse_positionals( |
| argv: List[str]) -> Tuple[List[str], List[str], Set[str]]: |
| '''A partial argument parser that extracts positional arguments''' |
| |
| # The only arguments we actually need to parse from as are the input files: |
| # we'll pass anything else straight through. Unfortunately, we can't use |
| # argparse's parse_known_args because GNU as has an unusual syntax where |
| # '--' means "standard input", rather than "stuff after this positional". |
| positionals = [] |
| others = [] |
| |
| # The switches listed in the as manual that can be specified as "--foo bar" |
| # (we need to know them so that we don't think that "bar" is a positional |
| # argument). |
| space_args = ['--debug-prefix-map', '--defsym', '-I', '-o'] |
| |
| # OTBN-specific flags |
| otbn_flags = ['--otbn-translate'] |
| |
| flags = set() |
| |
| expecting_arg = False |
| for arg in argv[1:]: |
| if expecting_arg: |
| others.append(arg) |
| expecting_arg = False |
| continue |
| |
| if arg in otbn_flags: |
| flags.add(arg) |
| |
| if arg in space_args: |
| others.append(arg) |
| expecting_arg = True |
| continue |
| |
| if arg.startswith('-'): |
| others.append(arg) |
| continue |
| |
| positionals.append(arg) |
| |
| if '-h' in others or '--help' in others: |
| print('otbn_as.py:\n\n' |
| 'A wrapper around riscv32-unknown-elf-as for OTBN.\n' |
| 'Most arguments are passed through: see "man as" ' |
| 'for more information.\n' |
| '\n' |
| ' --otbn-translate: Translate the input and dump to ' |
| 'stdout rather than calling as.\n') |
| sys.exit(0) |
| |
| return (positionals, others, flags) |
| |
| |
| def _unpack_lx(where: str, mnemonic: str, |
| op_to_expr: Dict[str, Optional[str]]) -> Tuple[str, str]: |
| '''Unpack the arguments to li or la''' |
| if set(op_to_expr.keys()) != {'grd', 'imm'}: |
| umnem = mnemonic.upper() |
| keys_list = list(op_to_expr.keys()) |
| raise RuntimeError(f'When expanding {umnem}, got wrong op_to_expr ' |
| f'keys ({keys_list}). This is a mismatch between ' |
| f'expand_{mnemonic} in otbn_as.py and the operands ' |
| f'for {umnem} in insns.yml.') |
| |
| grd = op_to_expr['grd'] |
| imm = op_to_expr['imm'] |
| if grd is None: |
| raise RuntimeError('When expanding LI, got <grd> = None. Is the ' |
| '<grd> operand wrongly marked as optional in ' |
| 'insns.yml?') |
| if imm is None: |
| raise RuntimeError('When expanding LI, got <imm> = None. Is the ' |
| '<imm> operand wrongly marked as optional in ' |
| 'insns.yml?') |
| |
| try: |
| gpr_type = RegOperandType('gpr', False, True) |
| grd_op_val = gpr_type.str_to_op_val(grd) |
| assert grd_op_val is not None |
| except ValueError as err: |
| raise RuntimeError('{}: When parsing LI instruction, <grd> ' |
| 'operand is wrong: {}'.format(where, err)) |
| |
| grd_txt = gpr_type.op_val_to_str(grd_op_val, None) |
| return (grd_txt, imm) |
| |
| |
| def expand_li(where: str, op_to_expr: Dict[str, Optional[str]]) -> List[str]: |
| '''Expand the li pseudo-op''' |
| |
| # This logic is slightly complicated so it has some associated tests in the |
| # ISS testsuite (where we can run the results). If adding more cleverness |
| # to this or fixing bugs, we should also add a check at |
| # hw/ip/otbn/dv/otbnsim/test/simple/pseudos/li.s. |
| |
| grd_txt, imm = _unpack_lx(where, 'li', op_to_expr) |
| try: |
| imm_int = int(imm, 0) |
| except ValueError: |
| raise RuntimeError('{}: Cannot parse {!r}, the immediate for an LI ' |
| 'instruction, as an integer.'.format(where, imm)) |
| |
| # We allow immediates in the range [-2^31, 2^31-1] (the i32 range), plus |
| # [2^31, 2^32-1] (to allow any u32 constant). |
| if not (-(1 << 31) <= imm_int <= (1 << 32) - 1): |
| raise RuntimeError('{}: The immediate for an LI instruction is {}, ' |
| 'which does not fit in a 32-bit integer.'.format( |
| where, imm)) |
| |
| # Convert any large positive constants to negative ones, so imm_int ends up |
| # in the range [-2^31, 2^31-1]. |
| if (1 << 31) <= imm_int: |
| imm_int -= 1 << 32 |
| |
| assert -(1 << 31) <= imm_int <= (1 << 31) - 1 |
| |
| # If imm_int is representable as an i12, we can just generate an addi |
| if -(1 << 11) <= imm_int < (1 << 11): |
| return ['addi {}, x0, {}'.format(grd_txt, imm_int)] |
| |
| imm_uint = imm_int if imm_int > 0 else (1 << 32) + imm_int |
| assert imm_uint >= 0 |
| assert (imm_uint >> 32) == 0 |
| |
| # Otherwise, we'll have to start with LUI. Extract the 12-bit constant as a |
| # signed integer. |
| mask_12 = (1 << 12) - 1 |
| imm_12 = mask_12 & imm_uint |
| if imm_12 >> 11: |
| imm_12 = imm_12 - (1 << 12) |
| |
| if imm_12 == 0: |
| # We can just generate an LUI here |
| return ['lui {}, {:#x}'.format(grd_txt, imm_uint >> 12)] |
| |
| if imm_12 > 0: |
| # LUI; ADDI with a positive constant |
| return [ |
| 'lui {}, {:#x}'.format(grd_txt, imm_uint >> 12), |
| 'addi {}, {}, {:#x}'.format(grd_txt, grd_txt, imm_12) |
| ] |
| |
| # LUI; ADDI with a negative constant. Add 1 to the upper immediate to |
| # subtract from it. |
| return [ |
| 'lui {}, {:#x}'.format(grd_txt, 1 + (imm_uint >> 12)), |
| 'addi {}, {}, {}'.format(grd_txt, grd_txt, imm_12) |
| ] |
| |
| |
| def expand_la(where: str, op_to_expr: Dict[str, Optional[str]]) -> List[str]: |
| '''Expand the la pseudo-op''' |
| |
| # For RISC-V, "la rd, symbol" expands to two instructions: |
| # |
| # auipc rd, delta[31:12] + delta[11] |
| # addi rd, rd, delta[11:0] |
| # |
| # where delta = symbol - pc. |
| # |
| # For OTBN, both IMEM and DMEM are small. This means that we can represent |
| # every symbol in 12 bits, so "la rd, symbol" can expand to: |
| # |
| # addi rd, x0, %lo(symbol) |
| # |
| # Much easier! |
| grd_txt, imm = _unpack_lx(where, 'la', op_to_expr) |
| return [ |
| 'lui {}, %hi({})'.format(grd_txt, imm), |
| 'addi {}, {}, %lo({})'.format(grd_txt, grd_txt, imm) |
| ] |
| |
| |
| _PSEUDO_OP_ASSEMBLERS = {'li': expand_li, 'la': expand_la} |
| |
| |
| class Transformer: |
| '''A simple parser/transformer for OTBN input files |
| |
| We have to do some basic tokenization to understand things like comments |
| and strings (which can contain embedded newlines). We don't want to perturb |
| the existing syntax, and want to pass comments through properly. Since the |
| grammar we're recognising is very simple, it's probably easiest to |
| hand-roll the parser. |
| |
| The grammar in a lazy pseudo-BNF syntax: |
| |
| file ::= statement* |
| |
| blank ::= [\t ]+ |
| |
| ws ::= blank? '#' [^n]* '\n' |
| | blank? '/*' .*? '*/' |
| | blank |
| |
| statement ::= ws? labels stmt-body? '\n' |
| |
| labels ::= label* |
| |
| label ::= symbol ':' ws? |
| |
| stmt-body ::= key-sym expr* |
| |
| key-sym ::= symbol (this is a .directive or a mnemonic) |
| |
| symbol ::= [a-zA-Z0-9$._]+ |
| |
| The proper syntax for "expr" depends on the key-sym, but we want to be very |
| permissive, so allow pretty much anything up to end of line. The only |
| reason we have to be careful is because strings can contain newlines, so we |
| end up with: |
| |
| string ::= "... the usual \n \" rules with |
| embedded newlines" ws? |
| |
| normal-token ::= [^ \t"]+ ws? |
| |
| token ::= string | normal-token |
| |
| expr ::= token* |
| |
| Note that, while we don't need to understand the labels themselves, we do |
| need to spot them in order to find the key-sym. |
| |
| ''' |
| |
| def __init__(self, out_handle: TextIO, in_path: str, insns_file: InsnsFile, |
| glued_insns_dec_len: List[Insn], |
| mnem_to_rve: Dict[str, RVEncoding]) -> None: |
| self.out_handle = out_handle |
| self.in_path = in_path |
| self.insns_file = insns_file |
| self.glued_insns_dec_len = glued_insns_dec_len |
| self.mnem_to_rve = mnem_to_rve |
| |
| self.line_number = 0 |
| |
| # Strings that should be spat out verbatim |
| self.acc = [] # type: List[str] |
| |
| # The key symbol for this statement |
| self.key_sym = None # type: Optional[str] |
| |
| self.in_comment = False |
| self.in_string = False |
| |
| # FSM state. |
| # |
| # 0: Waiting for statement |
| # 1: Waiting for body of statement (directive or instruction) |
| self.state = 0 |
| |
| # Write .file and .line directives to tell the assembler where the code |
| # came from. |
| out_handle.write('.file "{}"\n.line 1\n'.format(in_path)) |
| |
| def mk_raw_line(self, insn: Insn, op_to_expr: Dict[str, |
| Optional[str]]) -> str: |
| '''Generate a .word-style raw line |
| |
| insn must have an encoding and op_to_expr should map the operand names |
| of insn to their string expressions from the assembly file. |
| |
| ''' |
| assert insn.encoding is not None |
| |
| # Generate a mapping from operand name to an encoded value. Note that |
| # read_index checks that the value fits in the operand type and |
| # converts to the value that should be encoded. |
| op_to_idx = {} |
| for op_name, expr in op_to_expr.items(): |
| op_type = insn.name_to_operand[op_name].op_type |
| try: |
| op_val = (0 if expr is None else op_type.str_to_op_val( |
| expr.strip())) |
| except ValueError as err: |
| raise RuntimeError('{}:{}: {}'.format(self.in_path, |
| self.line_number, |
| err)) from None |
| if op_val is None: |
| raise RuntimeError('{}:{}: Cannot resolve operand expression ' |
| '{!r} to an index and the instruction {!r} ' |
| 'has an encoding incompatible with rv32i ' |
| '.insn lines.'.format( |
| self.in_path, self.line_number, expr, |
| insn.mnemonic)) from None |
| |
| try: |
| enc_val = op_type.op_val_to_enc_val(op_val, None) |
| except ValueError as err: |
| raise RuntimeError('{}:{}: {}'.format(self.in_path, |
| self.line_number, |
| err)) from None |
| |
| if enc_val is None: |
| raise RuntimeError( |
| '{}:{}: Cannot encode {!r} operand for ' |
| '{!r} instruction without a current PC ' |
| '(which is not known to otbn_as.py).'.format( |
| self.in_path, self.line_number, expr, |
| insn.mnemonic)) from None |
| |
| op_to_idx[op_name] = enc_val |
| |
| try: |
| word_val = insn.encoding.assemble(op_to_idx) |
| except ValueError as err: |
| raise RuntimeError('{}:{}: {}'.format(self.in_path, |
| self.line_number, |
| err)) from None |
| |
| return '.word {:#010x}'.format(word_val) |
| |
| def mk_rve_line(self, insn: Insn, rve: RVEncoding, |
| op_to_expr: Dict[str, Optional[str]]) -> str: |
| |
| # Take a copy of the fixed fields |
| rv_nums = rve.rv_masks.copy() |
| |
| # Now resolve all the fields for which we know we'll be able to get |
| # numerical values |
| for field_name, pfe in rve.part_field_to_rv.items(): |
| field = rve.encoding.fields[field_name] |
| assert isinstance(field.value, str) |
| |
| expr = op_to_expr[field.value] |
| if expr is None: |
| continue |
| |
| op_type = insn.name_to_operand[field.value].op_type |
| try: |
| op_val = op_type.str_to_op_val(expr.strip()) |
| assert op_val is not None |
| enc_val = op_type.op_val_to_enc_val(op_val, None) |
| assert enc_val is not None |
| except ValueError as err: |
| raise RuntimeError('{}:{}: {}'.format(self.in_path, |
| self.line_number, |
| err)) from None |
| |
| # read_index should always return a non-None result if |
| # syntax_determines_value() returned false. |
| assert enc_val is not None |
| |
| for (field_msb, field_lsb), rv_name, rv_lsb in pfe: |
| part_field_mask = (1 << (field_msb - field_lsb + 1)) - 1 |
| part_field_value = (enc_val >> field_lsb) & part_field_mask |
| assert 0 == ((rv_nums[rv_name] >> rv_lsb) & part_field_mask) |
| rv_nums[rv_name] |= part_field_value << rv_lsb |
| |
| rv_strings = {} |
| for rv_name, rv_num in rv_nums.items(): |
| rv_op_fmt, _, _ = rve.rvfmt.op_data[rv_name] |
| if rv_num != 0: |
| # We've got some fixed or partial data. We shouldn't have a |
| # match in rv_to_full_op (because the code in find_rv_encoding |
| # will put an operand in that or rv_masks, but not both) |
| assert rv_name not in rve.rv_to_full_op |
| rv_strings[rv_name] = rv_render(rv_op_fmt, rv_num) |
| continue |
| |
| op_name = rve.rv_to_full_op.get(rv_name) |
| if op_name is not None: |
| expr = op_to_expr[op_name] |
| if expr is not None: |
| rv_strings[rv_name] = expr.strip() |
| continue |
| |
| rv_strings[rv_name] = rv_render(rv_op_fmt, rv_num) |
| |
| rv_str_list = [] |
| for rv_name in rve.rvfmt.operands: |
| rv_str_list.append(rv_strings[rv_name]) |
| |
| return '.insn {} {}'.format(rve.rvfmt.name, ', '.join(rv_str_list)) |
| |
| def gen_line(self, insn: Insn, op_to_expr: Dict[str, |
| Optional[str]]) -> None: |
| '''Build and write out a line for this instruction''' |
| assert self.key_sym is not None |
| |
| expansion = None |
| |
| # If this instruction is a pseudo-operation with a literal expansion, |
| # dump that literal expansion. |
| if insn.literal_pseudo_op is not None: |
| expansion = insn.literal_pseudo_op |
| |
| # If this instruction has a special-case in the assembler, use it. We |
| # checked when loading our instruction definitions that if the |
| # instruction claims to have a special-case assembler, it really does. |
| if insn.python_pseudo_op: |
| where = '{}:{}'.format(self.in_path, self.line_number) |
| po_assembler = _PSEUDO_OP_ASSEMBLERS[insn.mnemonic] |
| expansion = po_assembler(where, op_to_expr) |
| |
| reconstructed = self.key_sym + ''.join(self.acc).rstrip() |
| assert '\n' not in reconstructed |
| |
| if expansion is not None: |
| self.out_handle.write( |
| '# pseudo-expansion for: {}\n'.format(reconstructed)) |
| for entry in expansion: |
| self.out_handle.write('.line {}\n{}\n'.format( |
| self.line_number - 1, entry)) |
| return |
| |
| # If this instruction comes from the rv32i instruction set, we can just |
| # pass it straight through. |
| if insn.rv32i: |
| self.out_handle.write('.line {}\n{}\n'.format( |
| self.line_number - 1, reconstructed)) |
| return |
| |
| # If we don't know an encoding for this instruction, we're not going to |
| # have much chance. |
| if insn.encoding is None: |
| raise RuntimeError( |
| '{}:{}: Instruction {!r} has no encoding.'.format( |
| self.in_path, self.line_number, insn.mnemonic)) |
| |
| # A custom instruction. We have two possible approaches. |
| # |
| # 1. Generate a .insn line. This is fine if the encoding happens to |
| # map on to one of the encodings that riscv32-unknown-elf-as |
| # supports, but won't work otherwise. |
| # |
| # 2. Just generate the bits by hand. This is fine if we can resolve |
| # everything, but it won't work if there are any relocs to deal |
| # with. |
| # |
| # Option 1 is nicer, and actually makes our lives easier: we don't have |
| # to properly parse expressions for immediate operands. But we don't |
| # know that every instruction has an encoding that maps perfectly on to |
| # one of supported encoding schemes. Those that do appear in |
| # self.mnem_to_rve. We try option 1 first, and fall back on option 2 if |
| # it fails. |
| rve = self.mnem_to_rve.get(self.key_sym.lower()) |
| if rve is not None: |
| line = self.mk_rve_line(insn, rve, op_to_expr) |
| else: |
| line = self.mk_raw_line(insn, op_to_expr) |
| |
| self.out_handle.write('# {}\n.line {}\n{}\n'.format( |
| reconstructed, self.line_number - 1, line)) |
| |
| def _continue_block_comment(self, line: str, pos: int) -> int: |
| '''Continue whitespace matching in a block comment |
| |
| Return end pos. Clear self.in_comment if we get to the end before EOL. |
| |
| ''' |
| assert self.in_comment |
| |
| # Search from pos for */ |
| idx = line.find('*/', pos) |
| |
| # If there is no such index, return EOL (and leave self.in_comment |
| # set). Don't eat the \n at the end of the line: that will be added by |
| # take_line. |
| if idx == -1: |
| return len(line) - 1 |
| |
| # Otherwise, update pos to just after it and then eat any trailing |
| # whitespace. |
| assert 0 <= idx <= len(line) - 2 |
| |
| self.in_comment = False |
| return self._eat_ws(line, idx + 2) |
| |
| def _continue_string(self, line: str, pos: int) -> int: |
| '''Continue reading a string''' |
| assert self.in_string |
| assert self.state == 1 |
| |
| while True: |
| # Search from pos for " (end of string) or \ (possible escape |
| # sequence) |
| quot_idx = line.find('"', pos) |
| esc_quot_idx = line.find('\\"', pos) |
| |
| if max(quot_idx, esc_quot_idx) < 0: |
| # EOL within string. |
| self.acc.append(line[pos:]) |
| return len(line) |
| |
| if quot_idx < 0: |
| # No " before EOL, but there is a \". Eat that and keep going. |
| self.acc.append(line[pos:esc_quot_idx + 2]) |
| pos = esc_quot_idx + 2 |
| continue |
| |
| if esc_quot_idx < 0 or quot_idx < esc_quot_idx: |
| # Either no \" or " comes first anyway |
| self.acc.append(line[pos:quot_idx + 1]) |
| self.in_string = False |
| return quot_idx + 1 |
| |
| def _eat_ws(self, line: str, pos: int) -> int: |
| '''Consume whitespace, updating FSM state if necessary''' |
| # Eat any blanks |
| match = re.match(r'[\t ]+', line[pos:]) |
| if match: |
| end = match.end() |
| self.acc.append(' ') |
| pos += end |
| |
| # Return if at EOL |
| if pos == len(line): |
| return pos |
| |
| # Spot a line comment ('#'). In that case, eat to EOL and return. |
| if line[pos] == '#': |
| return len(line) - 1 |
| |
| # The other possibility is a block comment ('/*'). If we're not looking |
| # at that, we can return the current position. |
| if line[pos:pos + 2] != '/*': |
| return pos |
| |
| # Otherwise, eat the /* and switch to reading block comments. Add a |
| # single space to acc to make sure we tokenize properly in examples |
| # like "foo/* xxx */bar" |
| self.in_comment = True |
| self.acc.append(' ') |
| |
| return self._continue_block_comment(line, pos + 2) |
| |
| def _eat_optional_label(self, line: str, pos: int) -> Tuple[int, bool]: |
| '''Consume an optional label''' |
| assert self.state == 0 |
| match = re.match(r'[0-9a-zA-Z_$.]+:', line[pos:]) |
| if match is None: |
| return (pos, False) |
| |
| end = pos + match.end() |
| self.acc.append(line[pos:end]) |
| return (self._eat_ws(line, end), True) |
| |
| def _eat_labels(self, line: str, pos: int) -> int: |
| '''Consume zero or more labels''' |
| assert self.state == 0 |
| found = True |
| while found: |
| pos, found = self._eat_optional_label(line, pos) |
| return pos |
| |
| def _eat_optional_token(self, line: str, pos: int) -> int: |
| '''Consume an optional token''' |
| assert self.state == 1 |
| assert self.key_sym is not None |
| assert not self.in_comment |
| assert not self.in_string |
| |
| assert pos < len(line) |
| if line[pos] == '"': |
| self.acc.append(line[pos]) |
| return self._continue_string(line, pos + 1) |
| |
| match = re.match(r'[^ \t"]*', line[pos:]) |
| assert match is not None |
| end = pos + match.end() |
| self.acc.append(match.group(0)) |
| return self._eat_ws(line, end) |
| |
| def _insn_for_keysym(self) -> Insn: |
| '''Find an instruction for the current key symbol''' |
| assert self.key_sym is not None |
| |
| # Most of the time, we'd hope the key sym appears in mnemonic_to_insn |
| low_key_sym = self.key_sym.lower() |
| insn = self.insns_file.mnemonic_to_insn.get(low_key_sym) |
| if insn is not None: |
| return insn |
| |
| # But we could have a glued operation, so key_sym might contain a |
| # following operation. Find the longest mnemonic with glued operations |
| # that is a prefix of key_sym (assuming there is one). |
| for insn in self.glued_insns_dec_len: |
| if low_key_sym.startswith(insn.mnemonic): |
| return insn |
| |
| raise RuntimeError('{}:{}: Unknown mnemonic: {!r}.'.format( |
| self.in_path, self.line_number, self.key_sym)) |
| |
| def _end_stmt_line(self) -> None: |
| '''Called at end of a stmt line to deal with any completed statement''' |
| assert self.state == 1 |
| assert self.key_sym is not None |
| |
| # If we're still in a comment or a string, keep going |
| if self.in_comment or self.in_string: |
| return |
| |
| # Otherwise, set state back to zero (@ beginning of statement) |
| self.state = 0 |
| |
| # If key_sym is a directive (starts with '.'), we can just pass it |
| # straight through. |
| if self.key_sym.startswith('.'): |
| self.out_handle.write(self.key_sym) |
| self.out_handle.write(''.join(self.acc)) |
| self.acc = [] |
| self.key_sym = None |
| return |
| |
| insn = self._insn_for_keysym() |
| |
| # Gather up everything after the mnemonic (possibly including some |
| # glued operands) as a string. |
| operands_str = self.key_sym[len(insn.mnemonic):] + ''.join(self.acc) |
| |
| match = insn.asm_pattern.match(operands_str.rstrip()) |
| if match is None: |
| raise RuntimeError( |
| '{}:{}: Cannot match syntax for {!r} ({!r}).'.format( |
| self.in_path, self.line_number, self.key_sym, |
| insn.syntax.render_doc())) |
| |
| op_to_val = {} # type: Dict[str, Optional[str]] |
| for op, grp in insn.pattern_op_to_grp.items(): |
| op_to_val[op] = match.group(grp) |
| |
| self.gen_line(insn, op_to_val) |
| self.acc = [] |
| self.key_sym = None |
| return |
| |
| def _continue_stmt(self, line: str, pos: int) -> None: |
| '''Continue reading statement, up to EOL''' |
| assert self.state == 1 |
| assert self.key_sym is not None |
| assert not self.in_comment |
| assert not self.in_string |
| |
| pos = self._eat_ws(line, pos) |
| while pos < len(line): |
| pos = self._eat_optional_token(line, pos) |
| |
| self._end_stmt_line() |
| |
| def _take_stmt_body(self, line: str, pos: int) -> None: |
| '''Read the body of a statement''' |
| assert self.state == 0 |
| assert self.key_sym is None |
| assert pos < len(line) |
| |
| match = re.match(r'[0-9a-zA-Z_$.]+', line[pos:]) |
| if match is None: |
| raise RuntimeError( |
| '{}:{}:{}: Expected key symbol, but found {!r}.'.format( |
| self.in_path, self.line_number, pos, line[pos:])) |
| |
| self.key_sym = match.group(0) |
| self.state = 1 |
| |
| # We don't add key_sym to acc here: it will be read from self.key_sym |
| # at the end of the instruction / directive. |
| self._continue_stmt(line, pos + match.end()) |
| return |
| |
| def take_line(self, line: str) -> None: |
| '''Consume a single line from the input''' |
| pos = 0 |
| self.line_number += 1 |
| |
| # Append a newline if the line doesn't have one. In practice, this just |
| # happens with the last line of a file that doesn't end with a newline. |
| if line and line[-1] != '\n': |
| line = line + '\n' |
| |
| # Finish up any block comment |
| if self.in_comment: |
| # Strings can't contain nested comments |
| assert not self.in_string |
| |
| pos = self._continue_block_comment(line, pos) |
| if self.in_comment: |
| self.acc.append('\n') |
| return |
| |
| # Finish up any nested string |
| if self.in_string: |
| assert not self.in_comment |
| assert self.state != 0 |
| pos = self._continue_string(line, pos) |
| if self.in_string: |
| return |
| |
| if self.state == 0: |
| # Waiting for statement |
| assert not self.in_string |
| |
| pos = self._eat_ws(line, pos) |
| # If we're at EOL, we're done (degenerate statement or block comment) |
| if line[pos] == '\n': |
| self.acc.append('\n') |
| return |
| |
| pos = self._eat_labels(line, pos) |
| # If we're at EOL, we're done (degenerate statement) |
| if line[pos] == '\n': |
| self.acc.append('\n') |
| return |
| |
| # Flush acc and then take the rest of the statement body. This |
| # always consumes the rest of the line (but might not finish the |
| # statement) |
| self.out_handle.write(''.join(self.acc)) |
| self.acc = [] |
| self._take_stmt_body(line, pos) |
| |
| elif self.state == 1: |
| # Part-way through a statement |
| self._continue_stmt(line, pos) |
| |
| else: |
| # Invalid state |
| assert 0 |
| |
| def at_eof(self) -> None: |
| '''Finish any tidy-up at EOF''' |
| if self.in_comment: |
| raise RuntimeError('Reached EOF while still in a comment.') |
| if self.in_string: |
| raise RuntimeError('Reached EOF while still in a string.') |
| |
| |
| def transform_input(out_handle: TextIO, in_path: str, in_handle: TextIO, |
| insns_file: InsnsFile, glued_insns_dec_len: List[Insn], |
| mnem_to_rve: Dict[str, RVEncoding]) -> None: |
| '''Transform an input file to make it suitable for riscv as''' |
| transformer = Transformer(out_handle, in_path, insns_file, |
| glued_insns_dec_len, mnem_to_rve) |
| for line in in_handle: |
| transformer.take_line(line) |
| transformer.at_eof() |
| |
| |
| def transform_inputs(out_dir: str, inputs: List[str], insns_file: InsnsFile, |
| mnem_to_rve: Dict[str, RVEncoding], |
| glued_insns_dec_len: List[Insn], |
| just_translate: bool) -> List[str]: |
| '''Transform inputs to make them suitable for riscv as''' |
| out_paths = [] |
| for idx, in_path in enumerate(inputs): |
| out_path = os.path.join(out_dir, str(idx)) |
| out_paths.append(out_path) |
| |
| in_handle = sys.stdin |
| pretty_in_path = 'stdin' |
| out_handle = sys.stdout |
| try: |
| if in_path != '--': |
| in_handle = open(in_path, 'r') |
| pretty_in_path = in_path |
| |
| if not just_translate: |
| out_handle = open(out_path, 'w') |
| |
| transform_input(out_handle, pretty_in_path, in_handle, insns_file, |
| glued_insns_dec_len, mnem_to_rve) |
| |
| finally: |
| if in_handle is not sys.stdin and in_handle is not None: |
| in_handle.close() |
| if out_handle is not sys.stdout: |
| out_handle.close() |
| |
| return out_paths |
| |
| |
| def run_binutils_as(other_args: List[str], inputs: List[str]) -> int: |
| '''Run binutils' as on transformed inputs |
| |
| Performs no output redirection and returns the process's exit code. |
| |
| ''' |
| as_name = find_tool('as') |
| |
| default_args = [ |
| # Don't ask the linker to do relaxation because, in some cases, this |
| # might generate a GP-relative load. OTBN doesn't treat x3 (gp) |
| # specially, so this won't work. |
| '-mno-relax', |
| # OTBN isn't a standard RISC-V architecture, disable .riscv.attributes. |
| '-mno-arch-attr', |
| # OTBN is based on RV32I without any hard float support. |
| '-mabi=ilp32', |
| ] |
| |
| cmd = [as_name] + default_args + other_args + inputs |
| try: |
| return subprocess.run(cmd).returncode |
| except FileNotFoundError: |
| sys.stderr.write('Unknown command: {!r}.\n'.format(as_name)) |
| return 127 |
| |
| |
| def main(argv: List[str]) -> int: |
| files, other_args, flags = parse_positionals(argv) |
| files = files or ['--'] |
| just_translate = '--otbn-translate' in flags |
| |
| # files is now a nonempty list of input files. Rather unusually, '--' |
| # (rather than '-') denotes standard input. |
| |
| try: |
| insns_file = load_insns_yaml() |
| except RuntimeError as err: |
| sys.stderr.write('{}\n'.format(err)) |
| return 1 |
| |
| # A list of instructions that have "glued operations" (which means their |
| # syntax doesn't require a space between the mnemonic and the first |
| # operation). Ordered from longest to shortest mnemonic, so that you can |
| # find a maximal prefix by linearly searching through the list and calling |
| # startswith. |
| glued_insns_dec_len = [] |
| for insn in insns_file.insns: |
| if insn.glued_ops: |
| glued_insns_dec_len.append(insn) |
| glued_insns_dec_len.sort(key=lambda insn: len(insn.mnemonic), reverse=True) |
| |
| # Check that any instruction that claims to have a Python pseudo-op |
| # assembler really does. |
| for insn in insns_file.insns: |
| if insn.python_pseudo_op: |
| if insn.mnemonic not in _PSEUDO_OP_ASSEMBLERS: |
| sys.stderr.write( |
| "Instruction {!r} has python-pseudo-op true, " |
| "but otbn_as.py doesn't have a custom assembler " |
| "for it.\n".format(insn.mnemonic)) |
| return 1 |
| |
| # Try to match up OTBN instruction encodings with .insn schemes (as stored |
| # in RISCV_FORMATS). |
| mnem_to_rve = find_insn_schemes(insns_file.mnemonic_to_insn) |
| |
| with tempfile.TemporaryDirectory(suffix='.otbn-as') as tmpdir: |
| try: |
| transformed = transform_inputs(tmpdir, files, insns_file, |
| mnem_to_rve, glued_insns_dec_len, |
| just_translate) |
| except RuntimeError as err: |
| sys.stderr.write('{}\n'.format(err)) |
| return 1 |
| |
| if just_translate: |
| # transform_inputs already printed out the translated code. We're |
| # done. |
| return 0 |
| |
| return run_binutils_as(transformed, other_args) |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main(sys.argv)) |