blob: 27d75c4688289eb3c749228adb6ef75371ce9775 [file] [log] [blame]
Srikrishna Iyer57235522020-03-09 11:03:38 -07001#!/usr/bin/env python3
2# Copyright lowRISC contributors.
3# Licensed under the Apache License, Version 2.0, see LICENSE for details.
4# SPDX-License-Identifier: Apache-2.0
5"""Script to convert logs placed in given sections into SystemVerilog-friendly
6database.
7
8The tool uses the pyelftools utility to extract the log fields from a given
9section and the strings from read only sections. It processes the log fields
10& the strings and converts them into a database. The script produces 2 outputs:
11- <name_logs.txt, which is the log database
12- <name>_rodata.txt which contains {addr: string} pairs.
13"""
14
15import argparse
16import os
17import re
18import struct
19import sys
20
21from elftools.elf import elffile
22
23# A printf statement in C code is converted into a single write to a reserved
24# address in the RAM. The value written is the address of the log_fields_t
25# struct constucted from the log. It has the following fields:
26# severity (int), 4 bytes: 0 (I), 1 (W), 2 (E), 3 (F)
27# file_name (int, ptr), 4 bytes: Pointer to file_name string.
28# Line no (int), 4 bytes: Line number of the log message.
29# Nargs (int), 4 bytes: Number of arguments the format string takes.
30# format (int, ptr), 4 bytes: Log format string.
31#
32# Total size of log_fields_t: 20 bytes.
33LOGS_FIELDS_SECTION = '.logs.fields'
34LOGS_FIELDS_SIZE = 20
35RODATA_SECTION = '.rodata'
36
37
38def cleanup_newlines(string):
39 '''Replaces newlines with a carriage return.
40
Srikrishna Iyer6d3f9f82020-04-17 19:51:04 -070041 The reason for doing so if a newline is encountered in the middle of a
Srikrishna Iyer57235522020-03-09 11:03:38 -070042 string, it ends up adding that newline in the output files this script
43 generates. The output of this script is consumed by a monitor written in
44 SystemVerilog (hw/dv/sv/sw_logger_if), a language with limited parsing
Srikrishna Iyer6d3f9f82020-04-17 19:51:04 -070045 / processing capability. So we make the parsing easier on the SV side by
46 putting all multiline strings on a single line, separated by a single
47 carriage return instead, which the SV monitor can easily replace with
48 a newline.'''
Srikrishna Iyer57235522020-03-09 11:03:38 -070049 return re.sub(r"[\n\r]+", "\r", string).strip()
50
51
52def cleanup_format(_format):
53 '''Converts C style format specifiers to SV style.
54
55 It makes the folllowing substitutions:
56 - Change %[N]?i, %[N]?u --> %[N]?d
57 - Change %[N]?x, %[N]?p --> %[N]?h
58 - Change %[N]?X --> %[N]?H
59
60 The below is a non-standard format specifier added in OpenTitan
61 (see sw/device/lib/base/print.c for more details). A single %z specifier
62 consumes 2 arguments instead of 1 and hence has to converted as such to
63 prevent the log monitor in SystemVerilog from throwing an error at runtime.
64 - Change %[N]?z --> %[N]?s[%d].'''
65 _format = re.sub(r"(%-?\d*)[iu]", r"\1d", _format)
66 _format = re.sub(r"(%-?\d*)[xp]", r"\1h", _format)
67 _format = re.sub(r"(%-?\d*)X", r"\1H", _format)
68 _format = re.sub(r"(%-?\d*)z", r"\1s[%d]", _format)
69 _format = re.sub(r"%([bcodhHs])", r"%0\1", _format)
70 return cleanup_newlines(_format)
71
72
Srikrishna Iyer6d3f9f82020-04-17 19:51:04 -070073def get_string_format_specifier_indices(_format):
74 '''Returns the indices of string format specifiers %s in the format string.
75
76 Example: a = %d, %%b = %%%2c, %%%% c = %5s, %% d = %o, e = %x, f = %-1s
77 The function will return: `2 5` because the 2nd and the 5th arg to the
78 format are strings. The '%%' does not accept an arg so they are ignored.
79 The returned value is a string of indices separated by a single space.
80
81 It is assumed that _format has been passed through `cleanup_format()`.
82 '''
83 pattern = '''
84 % # literal "%"
85 (?:[-+0 #]{0,5}) # optional flags
86 (?:\d+|\*)? # width
87 (?:\.(?:\d+|\*))? # precision
88 (?:l|ll)? # size
89 ([cdiouxpXshH]) # type (returned if matched)
90 | # OR
91 %(%) # literal "%%" (returned if matched)
92 '''
93 m = re.findall(pattern, _format, re.X)
94 # With the above example, the output of the pattern match is:
95 # [('d', ''), ('', '%'), ('', '%'), ('c', ''), and so on..]
96 index = 0
97 result = []
98 for match in m:
99 if match[1] == '%': continue
100 if match[0] == 's': result.append(str(index))
101 index += 1
102 return ' '.join(result).strip()
103
104
Srikrishna Iyer57235522020-03-09 11:03:38 -0700105def prune_filename(filename):
106 'This function prunes the filename to only display the hierarchy under sw/'
107 hier = "sw/device"
108 index = filename.find(hier)
109 return (filename if index == -1 else filename[index:])
110
111
112def get_addr_strings(ro_contents):
113 '''Construct {addr: string} dict from all read-only sections.
114
115 This function processes the read-only sections of the elf supplied as
116 a list of ro_content tuples comprising of base addr, size and data in bytes
117 and converts it into an {addr: string} dict which is returned.'''
118 result = {}
119 for ro_content in ro_contents:
120 str_start = 0
121 base_addr, size, data = ro_content
122 while (str_start < size):
123 str_end = data.find(b'\0', str_start)
Alphan Ulusoy9dfa3922021-05-19 15:54:35 -0400124 # Skip the remainder of this section since it can't contain any C-strings if
125 # there are no nul bytes.
126 if str_end == -1:
127 break
Srikrishna Iyer57235522020-03-09 11:03:38 -0700128 # Skip if start and end is the same
129 if str_start == str_end:
130 str_start += 1
131 continue
132 # Get full string address by adding base addr to the start.
133 addr = base_addr + str_start
134 string = cleanup_newlines(data[str_start:str_end].decode(
135 'utf-8', errors='replace'))
136 if addr in result:
137 exc_msg = "Error: duplicate {addr: string} pair encountered\n"
138 exc_msg += "addr: {} string: {}\n".format(addr, result[addr])
139 exc_msg += "addr: {} string: {}\n".format(addr, string)
140 raise IndexError(exc_msg)
141 result[addr] = string
142 str_start = str_end + 1
143 return result
144
145
146def get_str_at_addr(str_addr, addr_strings):
147 '''Returns the string at the provided addr.
148
149 It may be possible that the input addr is an offset within the string.
150 If true, then it returns remainder of the string starting at the offset.'''
151 for addr in addr_strings.keys():
152 if addr <= str_addr < addr + len(addr_strings[addr]):
153 return addr_strings[addr][str_addr - addr:].strip()
154 raise KeyError("string at addr {} not found".format(str_addr))
155
156
157def extract_sw_logs(elf_file, logs_fields_section, ro_sections):
158 '''This function extracts contents from the logs fields section, and the
159 read only sections, processes them and generates a tuple of (results) -
160 log with fields and (rodata) - constant strings with their addresses.
161 '''
162 # Open the elf file.
163 with open(elf_file, 'rb') as f:
164 elf = elffile.ELFFile(f)
165 # Parse the ro sections to get {addr: string} pairs.
166 ro_contents = []
167 for ro_section in ro_sections:
168 section = elf.get_section_by_name(name=ro_section)
169 if section:
170 base_addr = int(section.header['sh_addr'])
171 size = int(section.header['sh_size'])
172 data = section.data()
173 ro_contents.append((base_addr, size, data))
174 else:
175 print("Error: {} section not found in {}".format(
176 ro_section, elf_file))
177 sys.exit(1)
178 addr_strings = get_addr_strings(ro_contents)
179
180 # Dump the {addr: string} data.
181 rodata = ""
182 for addr in addr_strings.keys():
183 rodata += "addr: {}\n".format(hex(addr)[2:])
184 string = cleanup_newlines(addr_strings[addr])
185 rodata += "string: {}\n".format(string)
186
187 # Parse the logs fields section to extract the logs.
188 section = elf.get_section_by_name(name=logs_fields_section)
189 if section:
190 logs_base_addr = int(section.header['sh_addr'])
191 logs_size = int(section.header['sh_size'])
192 logs_data = section.data()
193 else:
194 print("Error: {} section not found in {}".format(
195 logs_fields_section, elf_file))
196 sys.exit(1)
197
198 # Dump the logs with fields.
199 result = ""
200 num_logs = logs_size // LOGS_FIELDS_SIZE
201 for i in range(num_logs):
202 start = i * LOGS_FIELDS_SIZE
203 end = start + LOGS_FIELDS_SIZE
204 severity, file_addr, line, nargs, format_addr = struct.unpack(
205 'IIIII', logs_data[start:end])
206 result += "addr: {}\n".format(hex(logs_base_addr + start)[2:])
207 result += "severity: {}\n".format(severity)
208 result += "file: {}\n".format(
209 prune_filename(get_str_at_addr(file_addr, addr_strings)))
210 result += "line: {}\n".format(line)
211 result += "nargs: {}\n".format(nargs)
Srikrishna Iyer6d3f9f82020-04-17 19:51:04 -0700212 fmt = cleanup_format(get_str_at_addr(format_addr, addr_strings))
213 result += "format: {}\n".format(fmt)
214 result += "str_arg_idx: {}\n".format(
215 get_string_format_specifier_indices(fmt))
Srikrishna Iyer57235522020-03-09 11:03:38 -0700216
217 return rodata, result
218
219
220def main():
221 parser = argparse.ArgumentParser()
222 parser.add_argument('--elf-file', '-e', required=True, help="Elf file")
223 parser.add_argument('--logs-fields-section',
224 '-f',
225 default=LOGS_FIELDS_SECTION,
226 help="Elf section where log fields are written.")
227 parser.add_argument('--rodata-sections',
228 '-r',
Srikrishna Iyer57235522020-03-09 11:03:38 -0700229 nargs="+",
Srikrishna Iyer250a3992020-11-20 23:27:10 -0800230 action="append",
Srikrishna Iyer57235522020-03-09 11:03:38 -0700231 help="Elf sections with rodata.")
232 parser.add_argument('--name',
233 '-n',
234 required=True,
235 help="Type of the SW elf being processed.")
236 parser.add_argument('--outdir',
237 '-o',
238 required=True,
239 help="Output directory.")
240 args = parser.parse_args()
241
Srikrishna Iyer250a3992020-11-20 23:27:10 -0800242 if args.rodata_sections is None:
243 ro_sections = [RODATA_SECTION]
244 else:
245 # TODO: We want the `--rodata-sections` arg to have the 'extend' action
246 # which is only available in Python 3.8. To maintain compatibility with
247 # Python 3.6 (which is the minimum required version for OpenTitan), we
248 # flatten the list here instead.
249 ro_sections = list(
250 set([section for lst in args.rodata_sections for section in lst]))
251
Srikrishna Iyer57235522020-03-09 11:03:38 -0700252 os.makedirs(args.outdir, exist_ok=True)
253 rodata, result = extract_sw_logs(args.elf_file, args.logs_fields_section,
Srikrishna Iyer250a3992020-11-20 23:27:10 -0800254 ro_sections)
Srikrishna Iyer57235522020-03-09 11:03:38 -0700255
Srikrishna Iyer250a3992020-11-20 23:27:10 -0800256 outfile = os.path.join(args.outdir, args.name + ".rodata.txt")
Srikrishna Iyer57235522020-03-09 11:03:38 -0700257 with open(outfile, "w", encoding='utf-8') as f:
258 f.write(rodata.strip())
259
Srikrishna Iyer250a3992020-11-20 23:27:10 -0800260 outfile = os.path.join(args.outdir, args.name + ".logs.txt")
Srikrishna Iyer57235522020-03-09 11:03:38 -0700261 with open(outfile, "w", encoding='utf-8') as f:
262 f.write(result.strip())
263
264
265if __name__ == "__main__":
266 main()