Add gentrace-renode: elaborates Renode traces This adds script based on `gentrace-spike.py` and Renode `execution_tracer_reader.py` Change-Id: I8a7f3cf7584fce222b65029242a1cfbafcaa3859
diff --git a/tbm/gentrace-renode.py b/tbm/gentrace-renode.py new file mode 100755 index 0000000..b4837fb --- /dev/null +++ b/tbm/gentrace-renode.py
@@ -0,0 +1,689 @@ +#! /usr/bin/env python3 +# Copyright 2023 Google LLC +# Copyright 2023 Antmicro +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Elaborate a Renode trace for TBM.""" + +import logging +from typing import IO, Optional, Sequence +import flatbuffers + +# Generated by `flatc`. +import FBInstruction.Instructions as FBInstrs + +import disassembler +from instruction import Instruction, is_vector_register +import utilities +from utilities import CallEvery +from utilities import FileFormat + +### Start of execution_tracer_reader.py +import argparse +import platform +import sys +import os +import gzip +from enum import Enum + +from ctypes import cdll, c_char_p, POINTER, c_void_p, c_ubyte, c_uint64, c_byte, c_size_t, cast + + +FILE_SIGNATURE = b"ReTrace" +FILE_VERSION = b"\x02" +HEADER_LENGTH = 10 +MEMORY_ACCESS_LENGTH = 9 +RISCV_VECTOR_CONFIGURATION_LENGTH = 16 + + +class AdditionalDataType(Enum): + Empty = 0 + MemoryAccess = 1 + RiscVVectorConfiguration = 2 + + +class MemoryAccessType(Enum): + MemoryIORead = 0 + MemoryIOWrite = 1 + MemoryRead = 2 + MemoryWrite = 3 + InsnFetch = 4 + + +class AdditionalData(): + def __init__(self, data_type, data_tuple): + self.data_type = data_type + self.data_tuple = data_tuple + + def __str__(self): + return "AdditionalData: data_type: {}, data_tuple: {}".format(self.data_type, self.data_tuple) + + +class Header(): + def __init__(self, pc_length, has_opcodes, extra_length=0, uses_thumb_flag=False, triple_and_model=None): + self.pc_length = pc_length + self.has_opcodes = has_opcodes + self.extra_length = extra_length + self.uses_thumb_flag = uses_thumb_flag + self.triple_and_model = triple_and_model + + def __str__(self): + return "Header: pc_length: {}, has_opcodes: {}, extra_length: {}, uses_thumb_flag: {}, triple_and_model: {}".format( + self.pc_length, self.has_opcodes, self.extra_length, self.uses_thumb_flag, self.triple_and_model) + + +def read_header(file): + if file.read(len(FILE_SIGNATURE)) != FILE_SIGNATURE: + raise InvalidFileFormatException("File signature isn't detected.") + + version = file.read(1) + if version != FILE_VERSION: + raise InvalidFileFormatException("Unsuported file format version") + + pc_length_raw = file.read(1) + opcodes_raw = file.read(1) + if len(pc_length_raw) != 1 or len(opcodes_raw) != 1: + raise InvalidFileFormatException("Invalid file header") + + if opcodes_raw[0] == 0: + return Header(pc_length_raw[0], False, 0, False, None) + elif opcodes_raw[0] == 1: + uses_thumb_flag_raw = file.read(1) + identifier_length_raw = file.read(1) + if len(uses_thumb_flag_raw) != 1 or len(identifier_length_raw) != 1: + raise InvalidFileFormatException("Invalid file header") + + uses_thumb_flag = uses_thumb_flag_raw[0] == 1 + identifier_length = identifier_length_raw[0] + triple_and_model_raw = file.read(identifier_length) + if len(triple_and_model_raw) != identifier_length: + raise InvalidFileFormatException("Invalid file header") + + triple_and_model = triple_and_model_raw.decode("utf-8") + extra_length = 2 + identifier_length + + return Header(pc_length_raw[0], True, extra_length, uses_thumb_flag, triple_and_model) + else: + raise InvalidFileFormatException("Invalid opcodes field at file header") + + +def read_file(file, disassemble, llvm_disas_path): + header = read_header(file) + return TraceData(file, header, disassemble, llvm_disas_path) + + +def bytes_to_hex(bytes, zero_padded=True): + integer = int.from_bytes(bytes, byteorder="little", signed=False) + format_string = "0{}X".format(len(bytes)*2) if zero_padded else "X" + return "0x{0:{fmt}}".format(integer, fmt=format_string) + + +class TraceData: + pc_length = 0 + has_opcodes = False + file = None + disassembler = None + disassembler_thumb = None + thumb_mode = False + instructions_left_in_block = 0 + + def __init__(self, file, header, disassemble, llvm_disas_path): + self.file = file + self.pc_length = int(header.pc_length) + self.has_pc = (self.pc_length != 0) + self.has_opcodes = bool(header.has_opcodes) + self.extra_length = header.extra_length + self.uses_thumb_flag = header.uses_thumb_flag + self.triple_and_model = header.triple_and_model + self.disassemble = disassemble + if self.disassemble: + triple, model = header.triple_and_model.split(" ") + self.disassembler = LLVMDisassembler(triple, model, llvm_disas_path) + if self.uses_thumb_flag: + self.disassembler_thumb = LLVMDisassembler("thumb", model, llvm_disas_path) + + def __iter__(self): + self.file.seek(HEADER_LENGTH + self.extra_length, 0) + return self + + def __next__(self): + additional_data = [] # list[AdditionalData] + + if self.uses_thumb_flag and self.instructions_left_in_block == 0: + thumb_flag_raw = self.file.read(1) + if len(thumb_flag_raw) != 1: + # No more data frames to read + raise StopIteration + + self.thumb_mode = thumb_flag_raw[0] == 1 + + block_length_raw = self.file.read(8) + if len(block_length_raw) != 8: + raise InvalidFileFormatException("Unexpected end of file") + + # The `instructions_left_in_block` counter is kept only for traces produced by cores that can switch between ARM and Thumb mode. + self.instructions_left_in_block = int.from_bytes(block_length_raw, byteorder="little", signed=False) + + if self.uses_thumb_flag: + self.instructions_left_in_block -= 1 + + pc = self.file.read(self.pc_length) + opcode_length = self.file.read(int(self.has_opcodes)) + + if self.pc_length != len(pc): + # No more data frames to read + raise StopIteration + if self.has_opcodes and len(opcode_length) == 0: + if self.has_pc: + raise InvalidFileFormatException("Unexpected end of file") + else: + # No more data frames to read + raise StopIteration + + if self.has_opcodes: + opcode_length = opcode_length[0] + opcode = self.file.read(opcode_length) + if len(opcode) != opcode_length: + raise InvalidFileFormatException("Unexpected end of file") + else: + opcode = b"" + + additional_data_type = AdditionalDataType(self.file.read(1)[0]) + while (additional_data_type is not AdditionalDataType.Empty): + if additional_data_type is AdditionalDataType.MemoryAccess: + data_tuple = self.parse_memory_access_data() + elif additional_data_type is AdditionalDataType.RiscVVectorConfiguration: + data_tuple = self.parse_riscv_vector_configuration_data() + + additional_data.append(AdditionalData(additional_data_type, data_tuple)) + + try: + additional_data_type = AdditionalDataType(self.file.read(1)[0]) + except IndexError: + break + return (pc, opcode, additional_data, self.thumb_mode) + + def parse_memory_access_data(self): + data = self.file.read(MEMORY_ACCESS_LENGTH) + if len(data) != MEMORY_ACCESS_LENGTH: + raise InvalidFileFormatException("Unexpected end of file") + type = MemoryAccessType(data[0]) + address = bytes_to_hex(data[1:]) + return (type, address) + + def parse_riscv_vector_configuration_data(self): + data = self.file.read(RISCV_VECTOR_CONFIGURATION_LENGTH) + if len(data) != RISCV_VECTOR_CONFIGURATION_LENGTH: + raise InvalidFileFormatException("Unexpected end of file") + vl = bytes_to_hex(data[0:8], zero_padded=False) + vtype = bytes_to_hex(data[8:16], zero_padded=False) + return (vl, vtype) + + def format_memory_access_data(self, additional_data): + (type, address) = additional_data + return f"{type.name} with address {address}" + + def format_riscv_vector_configuration_data(self, additional_data): + (vl, vtype) = additional_data + return f"Vector configured to VL: {vl}, VTYPE: {vtype}" + + def format_entry(self, entry): + (pc, opcode, additional_data, thumb_mode) = entry + if self.pc_length: + pc_str = bytes_to_hex(pc) + if self.has_opcodes: + opcode_str = bytes_to_hex(opcode) + output = "" + if self.pc_length and self.has_opcodes: + output = f"{pc_str}: {opcode_str}" + elif self.pc_length: + output = pc_str + elif self.has_opcodes: + output = opcode_str + else: + output = "" + + if self.has_opcodes and self.disassemble: + disas = self.disassembler_thumb if thumb_mode else self.disassembler + _, instruction = disas.get_instruction(opcode) + output += " " + instruction.decode("utf-8") + + for additional_data_entry in additional_data: + if additional_data_entry.data_type is AdditionalDataType.MemoryAccess: + output += "\n" + self.format_memory_access_data(additional_data_entry.data_tuple) + elif additional_data_entry.data_type is AdditionalDataType.RiscVVectorConfiguration: + output += "\n" + self.format_riscv_vector_configuration_data(additional_data_entry.data_tuple) + + return output + + +class InvalidFileFormatException(Exception): + pass + + +class LLVMDisassembler(): + def __init__(self, triple, cpu, llvm_disas_path): + try: + self.lib = cdll.LoadLibrary(llvm_disas_path) + except OSError: + raise Exception('Could not find valid `libllvm-disas` library. Please specify the correct path with the --llvm-disas-path argument.') + + self.__init_library() + + self._context = self.lib.llvm_create_disasm_cpu(c_char_p(triple.encode('utf-8')), c_char_p(cpu.encode('utf-8'))) + if not self._context: + raise Exception('CPU or triple name not detected by LLVM. Disassembling will not be possible.') + + def __del__(self): + if hasattr(self, '_context'): + self.lib.llvm_disasm_dispose(self._context) + + def __init_library(self): + self.lib.llvm_create_disasm_cpu.argtypes = [c_char_p, c_char_p] + self.lib.llvm_create_disasm_cpu.restype = POINTER(c_void_p) + + self.lib.llvm_disasm_dispose.argtypes = [POINTER(c_void_p)] + + self.lib.llvm_disasm_instruction.argtypes = [POINTER(c_void_p), POINTER(c_ubyte), c_uint64, c_char_p, c_size_t] + self.lib.llvm_disasm_instruction.restype = c_size_t + + def get_instruction(self, opcode): + opcode_buf = cast(c_char_p(opcode), POINTER(c_ubyte)) + disas_str = cast((c_byte * 1024)(), c_char_p) + + bytes_read = self.lib.llvm_disasm_instruction(self._context, opcode_buf, c_uint64(len(opcode)), disas_str, 1024) + + return (bytes_read, disas_str.value) + +### End of execution_tracer_reader.py + +logger = logging.getLogger("gentrace-renode") + +def extractBits(num: int, start: int, length: int) -> int: + binary = format(num, '064b') # convert number into binary string + end = len(binary) - start + start = end - length + 1 + kBitSubStr = binary[start : end+1] + return int(kBitSubStr, 2) + +def vsew2sew(vsew: int) -> int | None: + if vsew == 0b000: + return 8 + elif vsew == 0b001: + return 16 + elif vsew == 0b010: + return 32 + elif vsew == 0b011: + return 64 + else: + # Reserved + return None + +def vlmul2lmul(vlmul: int) -> int | float | None: + if vlmul == 0b000: + return 1 + elif vlmul == 0b001: + return 2 + elif vlmul == 0b010: + return 4 + elif vlmul == 0b011: + return 8 + elif vlmul == 0b111: + return 1/2 + elif vlmul == 0b110: + return 1/4 + elif vlmul == 0b101: + return 1/8 + else: # vlmul == 0b100 + # Reserved + return None + +class ElaborateTrace: + """Elaborate a trace from a Renode log file.""" + + def __init__(self, trace_data: TraceData, output_file: IO, + output_format: FileFormat, + output_buffer_size: int, + functions: Optional[Sequence[Sequence[int]]]) -> None: + """Init. + + Args: + trace_data: an open log file + output_file: an open output file + output_format: generate json or flatbuffers output. + functions: optional list of PC ranges to include in the output. + """ + self._trace_data = trace_data + self._output_file = output_file + self._output_format = output_format + self._functions = functions + + # The current instruction being processed. + # See the curr_instr @property below. + self._curr_instr = None + self._curr_vector_config = None + + # Buffer instructions before writing them to the output file. + self._instrs_buf = [] + self._output_buffer_size = output_buffer_size + + # The number of instructions included in the output trace (some might + # be buffered). + self.instr_count = 0 + + # If `discard_until` is set to some int, instructions from the trace + # are discarded until an instruction from address `discard_until` is + # read from the trace. + self._discard_until = None + + @property + def curr_instr(self) -> Instruction: + return self._curr_instr + + @curr_instr.setter + def curr_instr(self, instr: Instruction) -> None: + self.instr_count += 1 + + if self._curr_instr: + if instr.addr != self._curr_instr.addr + 4: + self._curr_instr.branch_target = instr.addr + + self.clear_curr_instr() + + self._curr_instr = instr + + def clear_curr_instr(self) -> None: + if self._curr_instr: + self._instrs_buf.append(self._curr_instr) + + if len(self._instrs_buf) == self._output_buffer_size: + self.write_to_file() + + self._curr_instr = None + + + def run(self) -> None: + for entry in self._trace_data: + # print(self._trace_data.format_entry(entry)) + if self.try_instruction(entry): + continue + + if self._discard_until: + continue + + # Flush out the instructions buffer. + self.clear_curr_instr() + if self._instrs_buf: + self.write_to_file() + + def try_instruction(self, entry) -> bool: + """Parse the first line of instruction execution.""" + (addr, opcode, additional_data, thumb_mode) = entry + addr_int = int.from_bytes(addr, byteorder="little", signed=False) + + if self._discard_until: + if addr_int == self._discard_until: + # We reached the desired location, stop discarding + # instructions. + self._discard_until = None + else: + # Discard this instruction + return True + + if self._functions is not None and all( + addr_int not in r for r in self._functions): + # This instruction is not to be included in the output. + self.clear_curr_instr() + return True + + opcode_int = int.from_bytes(opcode, byteorder="little", signed=False) + + disas = self._trace_data.disassembler_thumb if thumb_mode else self._trace_data.disassembler + _, instruction = disas.get_instruction(opcode) + instruction = instruction.decode("utf-8").strip() + instruction = instruction.replace('\t', ' ') + + mnemonic, operands = instruction.split(" ", 1) if " " in instruction else (instruction, "") + mnemonic = mnemonic.strip() + operands = operands.strip() + + ops = operands.split(", ") if operands != "" else [] + (inputs, outputs) = disassembler.asm_registers(mnemonic, ops) + + new_instr = Instruction(addr=addr_int, + opcode=opcode_int, + mnemonic=mnemonic, + operands=ops, + inputs=inputs, + outputs=outputs, + is_nop=disassembler.is_nop(mnemonic), + is_branch=disassembler.is_branch(mnemonic), + branch_target=None, + is_flush=disassembler.is_flush(mnemonic), + is_vctrl=disassembler.is_vctrl(mnemonic), + loads=[], + stores=[], + lmul=None, + sew=None, + vl=None) + self.curr_instr = new_instr + + # Add the additional data to the instruction. + for additional_data_entry in additional_data: + if additional_data_entry.data_type is AdditionalDataType.MemoryAccess: + type, address = additional_data_entry.data_tuple + addr = int(address, 16) + if type == MemoryAccessType.MemoryWrite or type == MemoryAccessType.MemoryIOWrite: + self.curr_instr.stores.append(addr) + # the instruction fetch is not included according to the documentation + elif type == MemoryAccessType.MemoryRead or type == MemoryAccessType.MemoryIORead: + self.curr_instr.loads.append(addr) + elif additional_data_entry.data_type is AdditionalDataType.RiscVVectorConfiguration: + vl, vtype = additional_data_entry.data_tuple + vtype_int = int(vtype, 16) + + # vector length multiplier + vlmul = extractBits(vtype_int, 0, 3) + self.curr_instr.lmul = vlmul2lmul(vlmul) + # element width + vsew = extractBits(vtype_int, 3, 3) + self.curr_instr.sew = vsew2sew(vsew) + + # vector length + self.curr_instr.vl = int(vl, 16) + + # Save current vector configuration + self._curr_vector_config = (self.curr_instr.lmul, self.curr_instr.sew, self.curr_instr.vl) + + if self._curr_vector_config is not None: + # Trace from Renode saves the vector configuration only for vctrl instructions that modify vl/vtype registers. + # Until Renode's format is enhanced, copy the current vector configuration to fields of vector register. + for _, regs in self.curr_instr.inputs_by_type().items(): + for reg in regs: + if is_vector_register(reg): + self.curr_instr.lmul, self.curr_instr.sew, self.curr_instr.vl = self._curr_vector_config + return True + + for _, regs in self.curr_instr.outputs_by_type().items(): + for reg in regs: + if is_vector_register(reg): + self.curr_instr.lmul, self.curr_instr.sew, self.curr_instr.vl = self._curr_vector_config + return True + + return True + + def write_to_file(self) -> None: + if self._output_format == FileFormat.JSON: + instrs = [i.to_json() for i in self._instrs_buf] + print("\n".join(instrs), file=self._output_file) + + else: + assert self._output_format == FileFormat.FLATBUFFERS + + builder = flatbuffers.Builder() + instrs = [i.fb_build(builder) for i in self._instrs_buf] + + FBInstrs.StartInstructionsVector(builder, len(instrs)) + for x in reversed(instrs): + builder.PrependUOffsetTRelative(x) + instrs = builder.EndVector() + + FBInstrs.Start(builder) + FBInstrs.AddInstructions(builder, instrs) + instrs = FBInstrs.End(builder) + + builder.Finish(instrs) + buf = builder.Output() + self._output_file.write(len(buf).to_bytes(4, byteorder="little")) + self._output_file.write(buf) + + # MutableSequence has no clear function + del self._instrs_buf[0:] + + +def get_parser() -> argparse.ArgumentParser: + """Return a command line parser.""" + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument("--cycles", + type=int, + help="Maximum length of trace", + metavar="N") + + parser.add_argument("--outfile", + default="out.trace", + help="Output file.", + metavar="OFILE") + + parser.add_argument("--json", + action="store_true", + help="Write the trace as a sequence of json objects" + " (instead of flat-buffers).") + + parser.add_argument("--output-buffer-size", + type=int, + default=100000, + help="For efficiency, a buffer in memory collects N" + " processed instructions, and write all of them to the" + " output together.", + metavar="N", + dest="output_buffer_size") + + # The -v flag is setup so that verbose holds the number of times the flag + # was used. This is the standard way to use -v, even though at the moment + # we have only two levels of verbosity: warning (the default, with no -v), + # and info. + parser.add_argument("--verbose", "-v", + default=0, + action="count", + help="Increase the verbosity level. By default only" + " errors and warnings will show. Use '-v' to also show" + " information messages.") + + return parser + + +def main(argv: Sequence[str]) -> int: + parser = get_parser() + ### Start of execution_tracer_reader.py + parser.add_argument("file", help="binary file") + parser.add_argument("-d", action="store_true", default=False, + help="decompress file, without the flag decompression is enabled based on a file extension") + parser.add_argument("--force-disable-decompression", action="store_true", default=False) + + parser.add_argument("--disassemble", action="store_true", default=False) + parser.add_argument("--llvm-disas-path", default=None, help="path to libllvm-disas library") + ### End of execution_tracer_reader.py + args = parser.parse_args(argv) + + ### Start of execution_tracer_reader.py + # Look for the libllvm-disas library in default location + if args.disassemble and args.llvm_disas_path == None: + p = platform.system() + if p == 'Darwin': + ext = '.dylib' + elif p == 'Windows': + ext = '.dll' + else: + ext = '.so' + + lib_name = 'libllvm-disas' + ext + + lib_search_paths = [ + os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, "lib", "resources", "llvm"), + os.path.dirname(os.path.realpath(__file__)), + os.getcwd() + ] + + for search_path in lib_search_paths: + lib_path = os.path.join(search_path, lib_name) + if os.path.isfile(lib_path): + args.llvm_disas_path = lib_path + break + + if args.llvm_disas_path == None: + raise Exception('Could not find ' + lib_name + ' in any of the following locations: ' + ', '.join([os.path.abspath(path) for path in lib_search_paths])) + ### End of execution_tracer_reader.py + + log_level = logging.WARNING + if args.verbose > 0: + log_level = logging.INFO + + utilities.logging_config(log_level) + + ### Start of execution_tracer_reader.py + try: + filename, file_extension = os.path.splitext(args.file) + if (args.d or file_extension == ".gz") and not args.force_disable_decompression: + file_open = gzip.open + else: + file_open = open + + with file_open(args.file, "rb") as input_file: + trace_data = read_file(input_file, args.disassemble, args.llvm_disas_path) + + if args.json: + fmt = FileFormat.JSON + mode = "w" + encoding = "ascii" + else: + fmt = FileFormat.FLATBUFFERS + mode = "wb" + encoding = None + + with open(args.outfile, mode, encoding=encoding) as output_file: + gen = ElaborateTrace(trace_data, output_file, fmt, + args.output_buffer_size, None) + with CallEvery(30, lambda: logger.info("processed %d instructions", + gen.instr_count)): + gen.run() + + print(f"Processed {gen.instr_count} instructions") + + except InvalidFileFormatException as err: + sys.exit(f"Error: {err}") + except KeyboardInterrupt: + sys.exit(1) + except Exception as err: + sys.exit(err) + ### End of execution_tracer_reader.py + + return 0 + + +if __name__ == '__main__': + sys.exit(main(sys.argv[1:]))