Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # Copyright lowRISC contributors. |
| 3 | # Licensed under the Apache License, Version 2.0, see LICENSE for details. |
| 4 | # SPDX-License-Identifier: Apache-2.0 |
| 5 | """Script to convert logs placed in given sections into SystemVerilog-friendly |
| 6 | database. |
| 7 | |
| 8 | The tool uses the pyelftools utility to extract the log fields from a given |
| 9 | section and the strings from read only sections. It processes the log fields |
| 10 | & the strings and converts them into a database. The script produces 2 outputs: |
| 11 | - <name_logs.txt, which is the log database |
| 12 | - <name>_rodata.txt which contains {addr: string} pairs. |
| 13 | """ |
| 14 | |
| 15 | import argparse |
| 16 | import os |
| 17 | import re |
| 18 | import struct |
| 19 | import sys |
| 20 | |
| 21 | from elftools.elf import elffile |
| 22 | |
| 23 | # A printf statement in C code is converted into a single write to a reserved |
| 24 | # address in the RAM. The value written is the address of the log_fields_t |
| 25 | # struct constucted from the log. It has the following fields: |
| 26 | # severity (int), 4 bytes: 0 (I), 1 (W), 2 (E), 3 (F) |
| 27 | # file_name (int, ptr), 4 bytes: Pointer to file_name string. |
| 28 | # Line no (int), 4 bytes: Line number of the log message. |
| 29 | # Nargs (int), 4 bytes: Number of arguments the format string takes. |
| 30 | # format (int, ptr), 4 bytes: Log format string. |
| 31 | # |
| 32 | # Total size of log_fields_t: 20 bytes. |
| 33 | LOGS_FIELDS_SECTION = '.logs.fields' |
| 34 | LOGS_FIELDS_SIZE = 20 |
| 35 | RODATA_SECTION = '.rodata' |
| 36 | |
| 37 | |
| 38 | def cleanup_newlines(string): |
| 39 | '''Replaces newlines with a carriage return. |
| 40 | |
Srikrishna Iyer | 6d3f9f8 | 2020-04-17 19:51:04 -0700 | [diff] [blame] | 41 | The reason for doing so if a newline is encountered in the middle of a |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 42 | string, it ends up adding that newline in the output files this script |
| 43 | generates. The output of this script is consumed by a monitor written in |
| 44 | SystemVerilog (hw/dv/sv/sw_logger_if), a language with limited parsing |
Srikrishna Iyer | 6d3f9f8 | 2020-04-17 19:51:04 -0700 | [diff] [blame] | 45 | / processing capability. So we make the parsing easier on the SV side by |
| 46 | putting all multiline strings on a single line, separated by a single |
| 47 | carriage return instead, which the SV monitor can easily replace with |
| 48 | a newline.''' |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 49 | return re.sub(r"[\n\r]+", "\r", string).strip() |
| 50 | |
| 51 | |
| 52 | def cleanup_format(_format): |
| 53 | '''Converts C style format specifiers to SV style. |
| 54 | |
Miguel Young de la Sota | 4dace3e | 2022-03-04 11:35:45 -0500 | [diff] [blame] | 55 | It makes the following substitutions: |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 56 | - Change %[N]?i, %[N]?u --> %[N]?d |
| 57 | - Change %[N]?x, %[N]?p --> %[N]?h |
| 58 | - Change %[N]?X --> %[N]?H |
| 59 | |
| 60 | The below is a non-standard format specifier added in OpenTitan |
Miguel Young de la Sota | 4dace3e | 2022-03-04 11:35:45 -0500 | [diff] [blame] | 61 | (see sw/device/lib/base/print.c for more details). A single %!s specifier |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 62 | consumes 2 arguments instead of 1 and hence has to converted as such to |
| 63 | prevent the log monitor in SystemVerilog from throwing an error at runtime. |
Miguel Young de la Sota | 4dace3e | 2022-03-04 11:35:45 -0500 | [diff] [blame] | 64 | The %!{x, X, y, Y} specifiers have the same property, but can print garbage, |
| 65 | so they're converted to pointers instead. |
| 66 | - Change %![N]?s --> %[N]?s[%d]. |
Miguel Young de la Sota | 6623098 | 2022-03-23 12:08:06 -0400 | [diff] [blame] | 67 | - Change %![N]?[xXyY] --> %[N]?h. |
| 68 | - Change %![N]?b --> %[N]?d.''' |
Miguel Young de la Sota | 4dace3e | 2022-03-04 11:35:45 -0500 | [diff] [blame] | 69 | _format = re.sub(r"%(-?\d*)[iu]", r"%\1d", _format) |
| 70 | _format = re.sub(r"%(-?\d*)[xp]", r"%\1h", _format) |
| 71 | _format = re.sub(r"%(-?\d*)X", r"%\1H", _format) |
| 72 | _format = re.sub(r"%!(-?\d*)s", r"%\1s[%d]", _format) |
| 73 | _format = re.sub(r"%!(-?\d*)[xXyY]", r"%\1h[%d]", _format) |
Miguel Young de la Sota | 6623098 | 2022-03-23 12:08:06 -0400 | [diff] [blame] | 74 | _format = re.sub(r"%!(-?\d*)b", r"%\1d[%d]", _format) |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 75 | _format = re.sub(r"%([bcodhHs])", r"%0\1", _format) |
| 76 | return cleanup_newlines(_format) |
| 77 | |
| 78 | |
Srikrishna Iyer | 6d3f9f8 | 2020-04-17 19:51:04 -0700 | [diff] [blame] | 79 | def get_string_format_specifier_indices(_format): |
| 80 | '''Returns the indices of string format specifiers %s in the format string. |
| 81 | |
| 82 | Example: a = %d, %%b = %%%2c, %%%% c = %5s, %% d = %o, e = %x, f = %-1s |
| 83 | The function will return: `2 5` because the 2nd and the 5th arg to the |
| 84 | format are strings. The '%%' does not accept an arg so they are ignored. |
| 85 | The returned value is a string of indices separated by a single space. |
| 86 | |
| 87 | It is assumed that _format has been passed through `cleanup_format()`. |
| 88 | ''' |
| 89 | pattern = ''' |
| 90 | % # literal "%" |
| 91 | (?:[-+0 #]{0,5}) # optional flags |
| 92 | (?:\d+|\*)? # width |
| 93 | (?:\.(?:\d+|\*))? # precision |
| 94 | (?:l|ll)? # size |
| 95 | ([cdiouxpXshH]) # type (returned if matched) |
| 96 | | # OR |
| 97 | %(%) # literal "%%" (returned if matched) |
| 98 | ''' |
| 99 | m = re.findall(pattern, _format, re.X) |
| 100 | # With the above example, the output of the pattern match is: |
| 101 | # [('d', ''), ('', '%'), ('', '%'), ('c', ''), and so on..] |
| 102 | index = 0 |
| 103 | result = [] |
| 104 | for match in m: |
| 105 | if match[1] == '%': continue |
| 106 | if match[0] == 's': result.append(str(index)) |
| 107 | index += 1 |
| 108 | return ' '.join(result).strip() |
| 109 | |
| 110 | |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 111 | def prune_filename(filename): |
| 112 | 'This function prunes the filename to only display the hierarchy under sw/' |
| 113 | hier = "sw/device" |
| 114 | index = filename.find(hier) |
| 115 | return (filename if index == -1 else filename[index:]) |
| 116 | |
| 117 | |
| 118 | def get_addr_strings(ro_contents): |
| 119 | '''Construct {addr: string} dict from all read-only sections. |
| 120 | |
| 121 | This function processes the read-only sections of the elf supplied as |
| 122 | a list of ro_content tuples comprising of base addr, size and data in bytes |
| 123 | and converts it into an {addr: string} dict which is returned.''' |
| 124 | result = {} |
| 125 | for ro_content in ro_contents: |
| 126 | str_start = 0 |
| 127 | base_addr, size, data = ro_content |
| 128 | while (str_start < size): |
| 129 | str_end = data.find(b'\0', str_start) |
Alphan Ulusoy | 9dfa392 | 2021-05-19 15:54:35 -0400 | [diff] [blame] | 130 | # Skip the remainder of this section since it can't contain any C-strings if |
| 131 | # there are no nul bytes. |
| 132 | if str_end == -1: |
| 133 | break |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 134 | # Skip if start and end is the same |
| 135 | if str_start == str_end: |
| 136 | str_start += 1 |
| 137 | continue |
| 138 | # Get full string address by adding base addr to the start. |
| 139 | addr = base_addr + str_start |
| 140 | string = cleanup_newlines(data[str_start:str_end].decode( |
| 141 | 'utf-8', errors='replace')) |
| 142 | if addr in result: |
| 143 | exc_msg = "Error: duplicate {addr: string} pair encountered\n" |
| 144 | exc_msg += "addr: {} string: {}\n".format(addr, result[addr]) |
| 145 | exc_msg += "addr: {} string: {}\n".format(addr, string) |
| 146 | raise IndexError(exc_msg) |
| 147 | result[addr] = string |
| 148 | str_start = str_end + 1 |
| 149 | return result |
| 150 | |
| 151 | |
| 152 | def get_str_at_addr(str_addr, addr_strings): |
| 153 | '''Returns the string at the provided addr. |
| 154 | |
| 155 | It may be possible that the input addr is an offset within the string. |
| 156 | If true, then it returns remainder of the string starting at the offset.''' |
| 157 | for addr in addr_strings.keys(): |
| 158 | if addr <= str_addr < addr + len(addr_strings[addr]): |
| 159 | return addr_strings[addr][str_addr - addr:].strip() |
| 160 | raise KeyError("string at addr {} not found".format(str_addr)) |
| 161 | |
| 162 | |
| 163 | def extract_sw_logs(elf_file, logs_fields_section, ro_sections): |
| 164 | '''This function extracts contents from the logs fields section, and the |
| 165 | read only sections, processes them and generates a tuple of (results) - |
| 166 | log with fields and (rodata) - constant strings with their addresses. |
| 167 | ''' |
| 168 | # Open the elf file. |
| 169 | with open(elf_file, 'rb') as f: |
| 170 | elf = elffile.ELFFile(f) |
| 171 | # Parse the ro sections to get {addr: string} pairs. |
| 172 | ro_contents = [] |
| 173 | for ro_section in ro_sections: |
| 174 | section = elf.get_section_by_name(name=ro_section) |
| 175 | if section: |
| 176 | base_addr = int(section.header['sh_addr']) |
| 177 | size = int(section.header['sh_size']) |
| 178 | data = section.data() |
| 179 | ro_contents.append((base_addr, size, data)) |
| 180 | else: |
| 181 | print("Error: {} section not found in {}".format( |
| 182 | ro_section, elf_file)) |
| 183 | sys.exit(1) |
| 184 | addr_strings = get_addr_strings(ro_contents) |
| 185 | |
| 186 | # Dump the {addr: string} data. |
| 187 | rodata = "" |
| 188 | for addr in addr_strings.keys(): |
| 189 | rodata += "addr: {}\n".format(hex(addr)[2:]) |
| 190 | string = cleanup_newlines(addr_strings[addr]) |
| 191 | rodata += "string: {}\n".format(string) |
| 192 | |
| 193 | # Parse the logs fields section to extract the logs. |
| 194 | section = elf.get_section_by_name(name=logs_fields_section) |
| 195 | if section: |
| 196 | logs_base_addr = int(section.header['sh_addr']) |
| 197 | logs_size = int(section.header['sh_size']) |
| 198 | logs_data = section.data() |
| 199 | else: |
| 200 | print("Error: {} section not found in {}".format( |
| 201 | logs_fields_section, elf_file)) |
| 202 | sys.exit(1) |
| 203 | |
| 204 | # Dump the logs with fields. |
| 205 | result = "" |
| 206 | num_logs = logs_size // LOGS_FIELDS_SIZE |
| 207 | for i in range(num_logs): |
| 208 | start = i * LOGS_FIELDS_SIZE |
| 209 | end = start + LOGS_FIELDS_SIZE |
| 210 | severity, file_addr, line, nargs, format_addr = struct.unpack( |
| 211 | 'IIIII', logs_data[start:end]) |
| 212 | result += "addr: {}\n".format(hex(logs_base_addr + start)[2:]) |
| 213 | result += "severity: {}\n".format(severity) |
| 214 | result += "file: {}\n".format( |
| 215 | prune_filename(get_str_at_addr(file_addr, addr_strings))) |
| 216 | result += "line: {}\n".format(line) |
| 217 | result += "nargs: {}\n".format(nargs) |
Srikrishna Iyer | 6d3f9f8 | 2020-04-17 19:51:04 -0700 | [diff] [blame] | 218 | fmt = cleanup_format(get_str_at_addr(format_addr, addr_strings)) |
| 219 | result += "format: {}\n".format(fmt) |
| 220 | result += "str_arg_idx: {}\n".format( |
| 221 | get_string_format_specifier_indices(fmt)) |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 222 | |
| 223 | return rodata, result |
| 224 | |
| 225 | |
| 226 | def main(): |
| 227 | parser = argparse.ArgumentParser() |
| 228 | parser.add_argument('--elf-file', '-e', required=True, help="Elf file") |
| 229 | parser.add_argument('--logs-fields-section', |
| 230 | '-f', |
| 231 | default=LOGS_FIELDS_SECTION, |
| 232 | help="Elf section where log fields are written.") |
| 233 | parser.add_argument('--rodata-sections', |
| 234 | '-r', |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 235 | nargs="+", |
Srikrishna Iyer | 250a399 | 2020-11-20 23:27:10 -0800 | [diff] [blame] | 236 | action="append", |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 237 | help="Elf sections with rodata.") |
| 238 | parser.add_argument('--name', |
| 239 | '-n', |
| 240 | required=True, |
| 241 | help="Type of the SW elf being processed.") |
| 242 | parser.add_argument('--outdir', |
| 243 | '-o', |
| 244 | required=True, |
| 245 | help="Output directory.") |
| 246 | args = parser.parse_args() |
| 247 | |
Srikrishna Iyer | 250a399 | 2020-11-20 23:27:10 -0800 | [diff] [blame] | 248 | if args.rodata_sections is None: |
| 249 | ro_sections = [RODATA_SECTION] |
| 250 | else: |
| 251 | # TODO: We want the `--rodata-sections` arg to have the 'extend' action |
| 252 | # which is only available in Python 3.8. To maintain compatibility with |
| 253 | # Python 3.6 (which is the minimum required version for OpenTitan), we |
| 254 | # flatten the list here instead. |
| 255 | ro_sections = list( |
| 256 | set([section for lst in args.rodata_sections for section in lst])) |
| 257 | |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 258 | os.makedirs(args.outdir, exist_ok=True) |
| 259 | rodata, result = extract_sw_logs(args.elf_file, args.logs_fields_section, |
Srikrishna Iyer | 250a399 | 2020-11-20 23:27:10 -0800 | [diff] [blame] | 260 | ro_sections) |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 261 | |
Srikrishna Iyer | 250a399 | 2020-11-20 23:27:10 -0800 | [diff] [blame] | 262 | outfile = os.path.join(args.outdir, args.name + ".rodata.txt") |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 263 | with open(outfile, "w", encoding='utf-8') as f: |
| 264 | f.write(rodata.strip()) |
| 265 | |
Srikrishna Iyer | 250a399 | 2020-11-20 23:27:10 -0800 | [diff] [blame] | 266 | outfile = os.path.join(args.outdir, args.name + ".logs.txt") |
Srikrishna Iyer | 5723552 | 2020-03-09 11:03:38 -0700 | [diff] [blame] | 267 | with open(outfile, "w", encoding='utf-8') as f: |
| 268 | f.write(result.strip()) |
| 269 | |
| 270 | |
| 271 | if __name__ == "__main__": |
| 272 | main() |