blob: 1c1eb9230bec35da7e1af61d0a80db2daac0a5b8 [file] [log] [blame]
#!/usr/bin/env python3
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
"""Script to convert logs placed in given sections into SystemVerilog-friendly
database.
The tool uses the pyelftools utility to extract the log fields from a given
section and the strings from read only sections. It processes the log fields
& the strings and converts them into a database. The script produces 2 outputs:
- <name_logs.txt, which is the log database
- <name>_rodata.txt which contains {addr: string} pairs.
"""
import argparse
import os
import re
import struct
import sys
from elftools.elf import elffile
# A printf statement in C code is converted into a single write to a reserved
# address in the RAM. The value written is the address of the log_fields_t
# struct constucted from the log. It has the following fields:
# severity (int), 4 bytes: 0 (I), 1 (W), 2 (E), 3 (F)
# file_name (int, ptr), 4 bytes: Pointer to file_name string.
# Line no (int), 4 bytes: Line number of the log message.
# Nargs (int), 4 bytes: Number of arguments the format string takes.
# format (int, ptr), 4 bytes: Log format string.
#
# Total size of log_fields_t: 20 bytes.
LOGS_FIELDS_SECTION = '.logs.fields'
LOGS_FIELDS_SIZE = 20
RODATA_SECTION = '.rodata'
def cleanup_newlines(string):
'''Replaces newlines with a carriage return.
The reason for doing so if a newline is encountered in the middle of a
string, it ends up adding that newline in the output files this script
generates. The output of this script is consumed by a monitor written in
SystemVerilog (hw/dv/sv/sw_logger_if), a language with limited parsing
/ processing capability. So we make the parsing easier on the SV side by
putting all multiline strings on a single line, separated by a single
carriage return instead, which the SV monitor can easily replace with
a newline.'''
return re.sub(r"[\n\r]+", "\r", string).strip()
def cleanup_format(_format):
'''Converts C style format specifiers to SV style.
It makes the following substitutions:
- Change %[N]?i, %[N]?u --> %[N]?d
- Change %[N]?x, %[N]?p --> %[N]?h
- Change %[N]?X --> %[N]?H
The below is a non-standard format specifier added in OpenTitan
(see sw/device/lib/base/print.c for more details). A single %!s specifier
consumes 2 arguments instead of 1 and hence has to converted as such to
prevent the log monitor in SystemVerilog from throwing an error at runtime.
The %!{x, X, y, Y} specifiers have the same property, but can print garbage,
so they're converted to pointers instead.
- Change %![N]?s --> %[N]?s[%d].
- Change %![N]?[xXyY] --> %[N]?h.
- Change %![N]?b --> %[N]?d.'''
_format = re.sub(r"%(-?\d*)[iu]", r"%\1d", _format)
_format = re.sub(r"%(-?\d*)[xp]", r"%\1h", _format)
_format = re.sub(r"%(-?\d*)X", r"%\1H", _format)
_format = re.sub(r"%!(-?\d*)s", r"%\1s[%d]", _format)
_format = re.sub(r"%!(-?\d*)[xXyY]", r"%\1h[%d]", _format)
_format = re.sub(r"%!(-?\d*)b", r"%\1d[%d]", _format)
_format = re.sub(r"%([bcodhHs])", r"%0\1", _format)
return cleanup_newlines(_format)
def get_string_format_specifier_indices(_format):
'''Returns the indices of string format specifiers %s in the format string.
Example: a = %d, %%b = %%%2c, %%%% c = %5s, %% d = %o, e = %x, f = %-1s
The function will return: `2 5` because the 2nd and the 5th arg to the
format are strings. The '%%' does not accept an arg so they are ignored.
The returned value is a string of indices separated by a single space.
It is assumed that _format has been passed through `cleanup_format()`.
'''
pattern = r'''
% # literal "%"
(?:[-+0 #]{0,5}) # optional flags
(?:\d+|\*)? # width
(?:\.(?:\d+|\*))? # precision
(?:l|ll)? # size
([cdiouxpXshH]) # type (returned if matched)
| # OR
%(%) # literal "%%" (returned if matched)
'''
m = re.findall(pattern, _format, re.X)
# With the above example, the output of the pattern match is:
# [('d', ''), ('', '%'), ('', '%'), ('c', ''), and so on..]
index = 0
result = []
for match in m:
if match[1] == '%':
continue
if match[0] == 's':
result.append(str(index))
index += 1
return ' '.join(result).strip()
def prune_filename(filename):
'This function prunes the filename to only display the hierarchy under sw/'
hier = "sw/device"
index = filename.find(hier)
return (filename if index == -1 else filename[index:])
def get_addr_strings(ro_contents):
'''Construct {addr: string} dict from all read-only sections.
This function processes the read-only sections of the elf supplied as
a list of ro_content tuples comprising of base addr, size and data in bytes
and converts it into an {addr: (string, length} dict which is returned.
We preserve the original length of the string becuase the string may
go through cleanup methods which will alter it.'''
result = {}
for ro_content in ro_contents:
str_start = 0
base_addr, size, data = ro_content
while (str_start < size):
str_end = data.find(b'\0', str_start)
# Skip the remainder of this section since it can't contain any
# C-strings if there are no null bytes.
if str_end == -1:
break
# Skip if start and end is the same
if str_start == str_end:
str_start += 1
continue
# Get full string address by adding base addr to the start.
addr = base_addr + str_start
length = str_end - str_start
string = cleanup_newlines(data[str_start:str_end].decode(
'utf-8', errors='replace'))
if addr in result:
exc_msg = "Error: duplicate {addr: string} pair encountered\n"
exc_msg += "addr: {} string: {}\n".format(addr, result[addr])
exc_msg += "addr: {} string: {}\n".format(addr, string)
raise IndexError(exc_msg)
result[addr] = (string, length)
str_start = str_end + 1
return result
def get_str_at_addr(str_addr, addr_strings):
'''Returns the string at the provided addr.
It may be possible that the input addr is an offset within the string.
If true, then it returns remainder of the string starting at the offset.'''
for addr in addr_strings.keys():
string, length = addr_strings[addr]
if addr <= str_addr < addr + length:
return string[str_addr - addr:].strip()
raise KeyError(f"string at addr {str_addr:x} not found")
def extract_sw_logs(elf_file, logs_fields_section):
'''This function extracts contents from the logs fields section, and the
read only sections, processes them and generates a tuple of (results) -
log with fields and (rodata) - constant strings with their addresses.
'''
# Open the elf file.
with open(elf_file, 'rb') as f:
elf = elffile.ELFFile(f)
ro_contents = []
for section_idx in range(elf.num_sections()):
section = elf.get_section(section_idx)
# Only consider sections stored in the image.
if section.header['sh_type'] != "SHT_PROGBITS":
continue
# Ignore the logs fields section.
if section.name == logs_fields_section:
continue
# Ignore the debug sections.
if section.name.startswith(".debug"):
continue
base_addr = int(section.header['sh_addr'])
size = int(section.header['sh_size'])
data = section.data()
ro_contents.append((base_addr, size, data))
addr_strings = get_addr_strings(ro_contents)
# Dump the {addr: string} data.
rodata = ""
for addr in addr_strings.keys():
rodata += "addr: {}\n".format(hex(addr)[2:])
string, _ = addr_strings[addr]
rodata += "string: {}\n".format(string)
# Parse the logs fields section to extract the logs.
section = elf.get_section_by_name(name=logs_fields_section)
if section:
logs_base_addr = int(section.header['sh_addr'])
logs_size = int(section.header['sh_size'])
logs_data = section.data()
else:
print("Error: {} section not found in {}".format(
logs_fields_section, elf_file))
sys.exit(1)
# Dump the logs with fields.
result = ""
num_logs = logs_size // LOGS_FIELDS_SIZE
for i in range(num_logs):
start = i * LOGS_FIELDS_SIZE
end = start + LOGS_FIELDS_SIZE
severity, file_addr, line, nargs, format_addr = struct.unpack(
'IIIII', logs_data[start:end])
result += "addr: {}\n".format(hex(logs_base_addr + start)[2:])
result += "severity: {}\n".format(severity)
result += "file: {}\n".format(
prune_filename(get_str_at_addr(file_addr, addr_strings)))
result += "line: {}\n".format(line)
result += "nargs: {}\n".format(nargs)
fmt = cleanup_format(get_str_at_addr(format_addr, addr_strings))
result += "format: {}\n".format(fmt)
result += "str_arg_idx: {}\n".format(
get_string_format_specifier_indices(fmt))
return rodata, result
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--elf-file', '-e', required=True, help="Elf file")
parser.add_argument('--logs-fields-section',
'-f',
default=LOGS_FIELDS_SECTION,
help="Elf section where log fields are written.")
parser.add_argument('--name',
'-n',
required=True,
help="Type of the SW elf being processed.")
parser.add_argument('--outdir',
'-o',
required=True,
help="Output directory.")
args = parser.parse_args()
os.makedirs(args.outdir, exist_ok=True)
rodata, result = extract_sw_logs(args.elf_file, args.logs_fields_section)
outfile = os.path.join(args.outdir, args.name + ".rodata.txt")
with open(outfile, "w", encoding='utf-8') as f:
f.write(rodata.strip())
outfile = os.path.join(args.outdir, args.name + ".logs.txt")
with open(outfile, "w", encoding='utf-8') as f:
f.write(result.strip())
if __name__ == "__main__":
main()