[dv, sw] "Backdoor" C->SV logging - This PR enables C logging that bypasses UART and uses a "backdoor" mechanism. - The log strings are initialized in a dummy section that is not actually loaded into the memory. When the elf file is generated, the log strings (with their addresses) are extracted and converted into a SV friendly database. The C code on the Ibex does not process the log strings at all, it just writes the address of the log string to a reserved memory location in the RAM that the SV side is monitoring. Any parameters passed to the format string are also subsequently written to that same address. THe SV looks up the format log string and its parameter values as it sees them on the bus and process the log into the final string message. The format string includes information such as the severity, file name and the line number. Based on the severity, the log is then printed with the corresponding `uvm_*` facilities. Signed-off-by: Srikrishna Iyer <sriyer@google.com>

commit: 57235527d1083b5d29e04114dfb71bc5d07792f6 [log] [tgz]
author: Srikrishna Iyer <sriyer@google.com> Mon Mar 09 11:03:38 2020 -0700
committer: sriyerg <46467186+sriyerg@users.noreply.github.com> Thu Apr 09 09:17:51 2020 -0700
tree: 6ce40ef93b0eeb51e2790641554a7b31d2fcdd01
parent: 65663f5be380a46cf2076aa32dc7b4e4d124f1cf [diff] [blame]
diff --git a/util/device_sw_utils/extract_sw_logs.py b/util/device_sw_utils/extract_sw_logs.py
new file mode 100755
index 0000000..fa3e11b
--- /dev/null
+++ b/util/device_sw_utils/extract_sw_logs.py

@@ -0,0 +1,215 @@
+#!/usr/bin/env python3
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+"""Script to convert logs placed in given sections into SystemVerilog-friendly
+database.
+
+The tool uses the pyelftools utility to extract the log fields from a given
+section and the strings from read only sections. It processes the log fields
+& the strings and converts them into a database. The script produces 2 outputs:
+- <name_logs.txt, which is the log database
+- <name>_rodata.txt which contains {addr: string} pairs.
+"""
+
+import argparse
+import os
+import re
+import struct
+import sys
+
+from elftools.elf import elffile
+
+# A printf statement in C code is converted into a single write to a reserved
+# address in the RAM. The value written is the address of the log_fields_t
+# struct constucted from the log. It has the following fields:
+# severity (int), 4 bytes:        0 (I), 1 (W), 2 (E), 3 (F)
+# file_name (int, ptr), 4 bytes:  Pointer to file_name string.
+# Line no (int), 4 bytes:         Line number of the log message.
+# Nargs (int), 4 bytes:           Number of arguments the format string takes.
+# format (int, ptr), 4 bytes:     Log format string.
+#
+# Total size of log_fields_t: 20 bytes.
+LOGS_FIELDS_SECTION = '.logs.fields'
+LOGS_FIELDS_SIZE = 20
+RODATA_SECTION = '.rodata'
+
+
+def cleanup_newlines(string):
+    '''Replaces newlines with a carriage return.
+
+    The reason for doing so if a neline is encountered in the middle of a
+    string, it ends up adding that newline in the output files this script
+    generates. The output of this script is consumed by a monitor written in
+    SystemVerilog (hw/dv/sv/sw_logger_if), a language with limited parsing
+    / processing support.'''
+    return re.sub(r"[\n\r]+", "\r", string).strip()
+
+
+def cleanup_format(_format):
+    '''Converts C style format specifiers to SV style.
+
+    It makes the folllowing substitutions:
+    - Change %[N]?i, %[N]?u --> %[N]?d
+    - Change %[N]?x, %[N]?p --> %[N]?h
+    - Change %[N]?X         --> %[N]?H
+
+    The below is a non-standard format specifier added in OpenTitan
+    (see sw/device/lib/base/print.c for more details). A single %z specifier
+    consumes 2 arguments instead of 1 and hence has to converted as such to
+    prevent the log monitor in SystemVerilog from throwing an error at runtime.
+    - Change %[N]?z         --> %[N]?s[%d].'''
+    _format = re.sub(r"(%-?\d*)[iu]", r"\1d", _format)
+    _format = re.sub(r"(%-?\d*)[xp]", r"\1h", _format)
+    _format = re.sub(r"(%-?\d*)X", r"\1H", _format)
+    _format = re.sub(r"(%-?\d*)z", r"\1s[%d]", _format)
+    _format = re.sub(r"%([bcodhHs])", r"%0\1", _format)
+    return cleanup_newlines(_format)
+
+
+def prune_filename(filename):
+    'This function prunes the filename to only display the hierarchy under sw/'
+    hier = "sw/device"
+    index = filename.find(hier)
+    return (filename if index == -1 else filename[index:])
+
+
+def get_addr_strings(ro_contents):
+    '''Construct {addr: string} dict from all read-only sections.
+
+    This function processes the read-only sections of the elf supplied as
+    a list of ro_content tuples comprising of base addr, size and data in bytes
+    and converts it into an {addr: string} dict which is returned.'''
+    result = {}
+    for ro_content in ro_contents:
+        str_start = 0
+        base_addr, size, data = ro_content
+        while (str_start < size):
+            str_end = data.find(b'\0', str_start)
+            # Skip if start and end is the same
+            if str_start == str_end:
+                str_start += 1
+                continue
+            # Get full string address by adding base addr to the start.
+            addr = base_addr + str_start
+            string = cleanup_newlines(data[str_start:str_end].decode(
+                'utf-8', errors='replace'))
+            if addr in result:
+                exc_msg = "Error: duplicate {addr: string} pair encountered\n"
+                exc_msg += "addr: {} string: {}\n".format(addr, result[addr])
+                exc_msg += "addr: {} string: {}\n".format(addr, string)
+                raise IndexError(exc_msg)
+            result[addr] = string
+            str_start = str_end + 1
+    return result
+
+
+def get_str_at_addr(str_addr, addr_strings):
+    '''Returns the string at the provided addr.
+
+    It may be possible that the input addr is an offset within the string.
+    If true, then it returns remainder of the string starting at the offset.'''
+    for addr in addr_strings.keys():
+        if addr <= str_addr < addr + len(addr_strings[addr]):
+            return addr_strings[addr][str_addr - addr:].strip()
+    raise KeyError("string at addr {} not found".format(str_addr))
+
+
+def extract_sw_logs(elf_file, logs_fields_section, ro_sections):
+    '''This function extracts contents from the logs fields section, and the
+    read only sections, processes them and generates a tuple of (results) -
+    log with fields and (rodata) - constant strings with their addresses.
+    '''
+    # Open the elf file.
+    with open(elf_file, 'rb') as f:
+        elf = elffile.ELFFile(f)
+        # Parse the ro sections to get {addr: string} pairs.
+        ro_contents = []
+        for ro_section in ro_sections:
+            section = elf.get_section_by_name(name=ro_section)
+            if section:
+                base_addr = int(section.header['sh_addr'])
+                size = int(section.header['sh_size'])
+                data = section.data()
+                ro_contents.append((base_addr, size, data))
+            else:
+                print("Error: {} section not found in {}".format(
+                    ro_section, elf_file))
+                sys.exit(1)
+        addr_strings = get_addr_strings(ro_contents)
+
+        # Dump the {addr: string} data.
+        rodata = ""
+        for addr in addr_strings.keys():
+            rodata += "addr: {}\n".format(hex(addr)[2:])
+            string = cleanup_newlines(addr_strings[addr])
+            rodata += "string: {}\n".format(string)
+
+        # Parse the logs fields section to extract the logs.
+        section = elf.get_section_by_name(name=logs_fields_section)
+        if section:
+            logs_base_addr = int(section.header['sh_addr'])
+            logs_size = int(section.header['sh_size'])
+            logs_data = section.data()
+        else:
+            print("Error: {} section not found in {}".format(
+                logs_fields_section, elf_file))
+            sys.exit(1)
+
+        # Dump the logs with fields.
+        result = ""
+        num_logs = logs_size // LOGS_FIELDS_SIZE
+        for i in range(num_logs):
+            start = i * LOGS_FIELDS_SIZE
+            end = start + LOGS_FIELDS_SIZE
+            severity, file_addr, line, nargs, format_addr = struct.unpack(
+                'IIIII', logs_data[start:end])
+            result += "addr: {}\n".format(hex(logs_base_addr + start)[2:])
+            result += "severity: {}\n".format(severity)
+            result += "file: {}\n".format(
+                prune_filename(get_str_at_addr(file_addr, addr_strings)))
+            result += "line: {}\n".format(line)
+            result += "nargs: {}\n".format(nargs)
+            result += "format: {}\n".format(
+                cleanup_format(get_str_at_addr(format_addr, addr_strings)))
+
+        return rodata, result
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--elf-file', '-e', required=True, help="Elf file")
+    parser.add_argument('--logs-fields-section',
+                        '-f',
+                        default=LOGS_FIELDS_SECTION,
+                        help="Elf section where log fields are written.")
+    parser.add_argument('--rodata-sections',
+                        '-r',
+                        default=[RODATA_SECTION],
+                        nargs="+",
+                        help="Elf sections with rodata.")
+    parser.add_argument('--name',
+                        '-n',
+                        required=True,
+                        help="Type of the SW elf being processed.")
+    parser.add_argument('--outdir',
+                        '-o',
+                        required=True,
+                        help="Output directory.")
+    args = parser.parse_args()
+
+    os.makedirs(args.outdir, exist_ok=True)
+    rodata, result = extract_sw_logs(args.elf_file, args.logs_fields_section,
+                                     args.rodata_sections)
+
+    outfile = os.path.join(args.outdir, args.name + "_rodata.txt")
+    with open(outfile, "w", encoding='utf-8') as f:
+        f.write(rodata.strip())
+
+    outfile = os.path.join(args.outdir, args.name + "_logs.txt")
+    with open(outfile, "w", encoding='utf-8') as f:
+        f.write(result.strip())
+
+
+if __name__ == "__main__":
+    main()
commit	57235527d1083b5d29e04114dfb71bc5d07792f6	[log] [tgz]
author	Srikrishna Iyer <sriyer@google.com>	Mon Mar 09 11:03:38 2020 -0700
committer	sriyerg <46467186+sriyerg@users.noreply.github.com>	Thu Apr 09 09:17:51 2020 -0700
tree	6ce40ef93b0eeb51e2790641554a7b31d2fcdd01
parent	65663f5be380a46cf2076aa32dc7b4e4d124f1cf [diff] [blame]