Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # Copyright lowRISC contributors. |
| 3 | # Licensed under the Apache License, Version 2.0, see LICENSE for details. |
| 4 | # SPDX-License-Identifier: Apache-2.0 |
| 5 | """Script to convert logs placed in given sections into SystemVerilog-friendly |
| 6 | database. |
| 7 | |
| 8 | The tool uses the pyelftools utility to extract the log fields from a given |
| 9 | section and the strings from read only sections. It processes the log fields |
| 10 | & the strings and converts them into a database. The script produces 2 outputs: |
| 11 | - <name_logs.txt, which is the log database |
| 12 | - <name>_rodata.txt which contains {addr: string} pairs. |
| 13 | """ |
| 14 | |
| 15 | import argparse |
| 16 | import os |
| 17 | import re |
| 18 | import struct |
| 19 | import sys |
| 20 | |
| 21 | from elftools.elf import elffile |
| 22 | |
| 23 | # A printf statement in C code is converted into a single write to a reserved |
| 24 | # address in the RAM. The value written is the address of the log_fields_t |
| 25 | # struct constucted from the log. It has the following fields: |
| 26 | # severity (int), 4 bytes: 0 (I), 1 (W), 2 (E), 3 (F) |
| 27 | # file_name (int, ptr), 4 bytes: Pointer to file_name string. |
| 28 | # Line no (int), 4 bytes: Line number of the log message. |
| 29 | # Nargs (int), 4 bytes: Number of arguments the format string takes. |
| 30 | # format (int, ptr), 4 bytes: Log format string. |
| 31 | # |
| 32 | # Total size of log_fields_t: 20 bytes. |
| 33 | LOGS_FIELDS_SECTION = '.logs.fields' |
| 34 | LOGS_FIELDS_SIZE = 20 |
| 35 | RODATA_SECTION = '.rodata' |
| 36 | |
| 37 | |
| 38 | def cleanup_newlines(string): |
| 39 | '''Replaces newlines with a carriage return. |
| 40 | |
Srikrishna Iyer | 6d3f9f8 | 2020-04-17 19:51:04 -0700 | [diff] [blame] | 41 | The reason for doing so if a newline is encountered in the middle of a |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 42 | string, it ends up adding that newline in the output files this script |
| 43 | generates. The output of this script is consumed by a monitor written in |
| 44 | SystemVerilog (hw/dv/sv/sw_logger_if), a language with limited parsing |
Srikrishna Iyer | 6d3f9f8 | 2020-04-17 19:51:04 -0700 | [diff] [blame] | 45 | / processing capability. So we make the parsing easier on the SV side by |
| 46 | putting all multiline strings on a single line, separated by a single |
| 47 | carriage return instead, which the SV monitor can easily replace with |
| 48 | a newline.''' |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 49 | return re.sub(r"[\n\r]+", "\r", string).strip() |
| 50 | |
| 51 | |
| 52 | def cleanup_format(_format): |
| 53 | '''Converts C style format specifiers to SV style. |
| 54 | |
| 55 | It makes the folllowing substitutions: |
| 56 | - Change %[N]?i, %[N]?u --> %[N]?d |
| 57 | - Change %[N]?x, %[N]?p --> %[N]?h |
| 58 | - Change %[N]?X --> %[N]?H |
| 59 | |
| 60 | The below is a non-standard format specifier added in OpenTitan |
| 61 | (see sw/device/lib/base/print.c for more details). A single %z specifier |
| 62 | consumes 2 arguments instead of 1 and hence has to converted as such to |
| 63 | prevent the log monitor in SystemVerilog from throwing an error at runtime. |
| 64 | - Change %[N]?z --> %[N]?s[%d].''' |
| 65 | _format = re.sub(r"(%-?\d*)[iu]", r"\1d", _format) |
| 66 | _format = re.sub(r"(%-?\d*)[xp]", r"\1h", _format) |
| 67 | _format = re.sub(r"(%-?\d*)X", r"\1H", _format) |
| 68 | _format = re.sub(r"(%-?\d*)z", r"\1s[%d]", _format) |
| 69 | _format = re.sub(r"%([bcodhHs])", r"%0\1", _format) |
| 70 | return cleanup_newlines(_format) |
| 71 | |
| 72 | |
Srikrishna Iyer | 6d3f9f8 | 2020-04-17 19:51:04 -0700 | [diff] [blame] | 73 | def get_string_format_specifier_indices(_format): |
| 74 | '''Returns the indices of string format specifiers %s in the format string. |
| 75 | |
| 76 | Example: a = %d, %%b = %%%2c, %%%% c = %5s, %% d = %o, e = %x, f = %-1s |
| 77 | The function will return: `2 5` because the 2nd and the 5th arg to the |
| 78 | format are strings. The '%%' does not accept an arg so they are ignored. |
| 79 | The returned value is a string of indices separated by a single space. |
| 80 | |
| 81 | It is assumed that _format has been passed through `cleanup_format()`. |
| 82 | ''' |
| 83 | pattern = ''' |
| 84 | % # literal "%" |
| 85 | (?:[-+0 #]{0,5}) # optional flags |
| 86 | (?:\d+|\*)? # width |
| 87 | (?:\.(?:\d+|\*))? # precision |
| 88 | (?:l|ll)? # size |
| 89 | ([cdiouxpXshH]) # type (returned if matched) |
| 90 | | # OR |
| 91 | %(%) # literal "%%" (returned if matched) |
| 92 | ''' |
| 93 | m = re.findall(pattern, _format, re.X) |
| 94 | # With the above example, the output of the pattern match is: |
| 95 | # [('d', ''), ('', '%'), ('', '%'), ('c', ''), and so on..] |
| 96 | index = 0 |
| 97 | result = [] |
| 98 | for match in m: |
| 99 | if match[1] == '%': continue |
| 100 | if match[0] == 's': result.append(str(index)) |
| 101 | index += 1 |
| 102 | return ' '.join(result).strip() |
| 103 | |
| 104 | |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 105 | def prune_filename(filename): |
| 106 | 'This function prunes the filename to only display the hierarchy under sw/' |
| 107 | hier = "sw/device" |
| 108 | index = filename.find(hier) |
| 109 | return (filename if index == -1 else filename[index:]) |
| 110 | |
| 111 | |
| 112 | def get_addr_strings(ro_contents): |
| 113 | '''Construct {addr: string} dict from all read-only sections. |
| 114 | |
| 115 | This function processes the read-only sections of the elf supplied as |
| 116 | a list of ro_content tuples comprising of base addr, size and data in bytes |
| 117 | and converts it into an {addr: string} dict which is returned.''' |
| 118 | result = {} |
| 119 | for ro_content in ro_contents: |
| 120 | str_start = 0 |
| 121 | base_addr, size, data = ro_content |
| 122 | while (str_start < size): |
| 123 | str_end = data.find(b'\0', str_start) |
| 124 | # Skip if start and end is the same |
| 125 | if str_start == str_end: |
| 126 | str_start += 1 |
| 127 | continue |
| 128 | # Get full string address by adding base addr to the start. |
| 129 | addr = base_addr + str_start |
| 130 | string = cleanup_newlines(data[str_start:str_end].decode( |
| 131 | 'utf-8', errors='replace')) |
| 132 | if addr in result: |
| 133 | exc_msg = "Error: duplicate {addr: string} pair encountered\n" |
| 134 | exc_msg += "addr: {} string: {}\n".format(addr, result[addr]) |
| 135 | exc_msg += "addr: {} string: {}\n".format(addr, string) |
| 136 | raise IndexError(exc_msg) |
| 137 | result[addr] = string |
| 138 | str_start = str_end + 1 |
| 139 | return result |
| 140 | |
| 141 | |
| 142 | def get_str_at_addr(str_addr, addr_strings): |
| 143 | '''Returns the string at the provided addr. |
| 144 | |
| 145 | It may be possible that the input addr is an offset within the string. |
| 146 | If true, then it returns remainder of the string starting at the offset.''' |
| 147 | for addr in addr_strings.keys(): |
| 148 | if addr <= str_addr < addr + len(addr_strings[addr]): |
| 149 | return addr_strings[addr][str_addr - addr:].strip() |
| 150 | raise KeyError("string at addr {} not found".format(str_addr)) |
| 151 | |
| 152 | |
| 153 | def extract_sw_logs(elf_file, logs_fields_section, ro_sections): |
| 154 | '''This function extracts contents from the logs fields section, and the |
| 155 | read only sections, processes them and generates a tuple of (results) - |
| 156 | log with fields and (rodata) - constant strings with their addresses. |
| 157 | ''' |
| 158 | # Open the elf file. |
| 159 | with open(elf_file, 'rb') as f: |
| 160 | elf = elffile.ELFFile(f) |
| 161 | # Parse the ro sections to get {addr: string} pairs. |
| 162 | ro_contents = [] |
| 163 | for ro_section in ro_sections: |
| 164 | section = elf.get_section_by_name(name=ro_section) |
| 165 | if section: |
| 166 | base_addr = int(section.header['sh_addr']) |
| 167 | size = int(section.header['sh_size']) |
| 168 | data = section.data() |
| 169 | ro_contents.append((base_addr, size, data)) |
| 170 | else: |
| 171 | print("Error: {} section not found in {}".format( |
| 172 | ro_section, elf_file)) |
| 173 | sys.exit(1) |
| 174 | addr_strings = get_addr_strings(ro_contents) |
| 175 | |
| 176 | # Dump the {addr: string} data. |
| 177 | rodata = "" |
| 178 | for addr in addr_strings.keys(): |
| 179 | rodata += "addr: {}\n".format(hex(addr)[2:]) |
| 180 | string = cleanup_newlines(addr_strings[addr]) |
| 181 | rodata += "string: {}\n".format(string) |
| 182 | |
| 183 | # Parse the logs fields section to extract the logs. |
| 184 | section = elf.get_section_by_name(name=logs_fields_section) |
| 185 | if section: |
| 186 | logs_base_addr = int(section.header['sh_addr']) |
| 187 | logs_size = int(section.header['sh_size']) |
| 188 | logs_data = section.data() |
| 189 | else: |
| 190 | print("Error: {} section not found in {}".format( |
| 191 | logs_fields_section, elf_file)) |
| 192 | sys.exit(1) |
| 193 | |
| 194 | # Dump the logs with fields. |
| 195 | result = "" |
| 196 | num_logs = logs_size // LOGS_FIELDS_SIZE |
| 197 | for i in range(num_logs): |
| 198 | start = i * LOGS_FIELDS_SIZE |
| 199 | end = start + LOGS_FIELDS_SIZE |
| 200 | severity, file_addr, line, nargs, format_addr = struct.unpack( |
| 201 | 'IIIII', logs_data[start:end]) |
| 202 | result += "addr: {}\n".format(hex(logs_base_addr + start)[2:]) |
| 203 | result += "severity: {}\n".format(severity) |
| 204 | result += "file: {}\n".format( |
| 205 | prune_filename(get_str_at_addr(file_addr, addr_strings))) |
| 206 | result += "line: {}\n".format(line) |
| 207 | result += "nargs: {}\n".format(nargs) |
Srikrishna Iyer | 6d3f9f8 | 2020-04-17 19:51:04 -0700 | [diff] [blame] | 208 | fmt = cleanup_format(get_str_at_addr(format_addr, addr_strings)) |
| 209 | result += "format: {}\n".format(fmt) |
| 210 | result += "str_arg_idx: {}\n".format( |
| 211 | get_string_format_specifier_indices(fmt)) |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 212 | |
| 213 | return rodata, result |
| 214 | |
| 215 | |
| 216 | def main(): |
| 217 | parser = argparse.ArgumentParser() |
| 218 | parser.add_argument('--elf-file', '-e', required=True, help="Elf file") |
| 219 | parser.add_argument('--logs-fields-section', |
| 220 | '-f', |
| 221 | default=LOGS_FIELDS_SECTION, |
| 222 | help="Elf section where log fields are written.") |
| 223 | parser.add_argument('--rodata-sections', |
| 224 | '-r', |
| 225 | default=[RODATA_SECTION], |
| 226 | nargs="+", |
| 227 | help="Elf sections with rodata.") |
| 228 | parser.add_argument('--name', |
| 229 | '-n', |
| 230 | required=True, |
| 231 | help="Type of the SW elf being processed.") |
| 232 | parser.add_argument('--outdir', |
| 233 | '-o', |
| 234 | required=True, |
| 235 | help="Output directory.") |
| 236 | args = parser.parse_args() |
| 237 | |
| 238 | os.makedirs(args.outdir, exist_ok=True) |
| 239 | rodata, result = extract_sw_logs(args.elf_file, args.logs_fields_section, |
| 240 | args.rodata_sections) |
| 241 | |
| 242 | outfile = os.path.join(args.outdir, args.name + "_rodata.txt") |
| 243 | with open(outfile, "w", encoding='utf-8') as f: |
| 244 | f.write(rodata.strip()) |
| 245 | |
| 246 | outfile = os.path.join(args.outdir, args.name + "_logs.txt") |
| 247 | with open(outfile, "w", encoding='utf-8') as f: |
| 248 | f.write(result.strip()) |
| 249 | |
| 250 | |
| 251 | if __name__ == "__main__": |
| 252 | main() |