blob: e18c0eef7266c0e8963ed33e4d2c6c785953c3e2 [file] [log] [blame]
Srikrishna Iyer57235522020-03-09 11:03:38 -07001#!/usr/bin/env python3
2# Copyright lowRISC contributors.
3# Licensed under the Apache License, Version 2.0, see LICENSE for details.
4# SPDX-License-Identifier: Apache-2.0
5"""Script to convert logs placed in given sections into SystemVerilog-friendly
6database.
7
8The tool uses the pyelftools utility to extract the log fields from a given
9section and the strings from read only sections. It processes the log fields
10& the strings and converts them into a database. The script produces 2 outputs:
11- <name_logs.txt, which is the log database
12- <name>_rodata.txt which contains {addr: string} pairs.
13"""
14
15import argparse
16import os
17import re
18import struct
19import sys
20
21from elftools.elf import elffile
22
23# A printf statement in C code is converted into a single write to a reserved
24# address in the RAM. The value written is the address of the log_fields_t
25# struct constucted from the log. It has the following fields:
26# severity (int), 4 bytes: 0 (I), 1 (W), 2 (E), 3 (F)
27# file_name (int, ptr), 4 bytes: Pointer to file_name string.
28# Line no (int), 4 bytes: Line number of the log message.
29# Nargs (int), 4 bytes: Number of arguments the format string takes.
30# format (int, ptr), 4 bytes: Log format string.
31#
32# Total size of log_fields_t: 20 bytes.
33LOGS_FIELDS_SECTION = '.logs.fields'
34LOGS_FIELDS_SIZE = 20
35RODATA_SECTION = '.rodata'
36
37
38def cleanup_newlines(string):
39 '''Replaces newlines with a carriage return.
40
Srikrishna Iyer6d3f9f82020-04-17 19:51:04 -070041 The reason for doing so if a newline is encountered in the middle of a
Srikrishna Iyer57235522020-03-09 11:03:38 -070042 string, it ends up adding that newline in the output files this script
43 generates. The output of this script is consumed by a monitor written in
44 SystemVerilog (hw/dv/sv/sw_logger_if), a language with limited parsing
Srikrishna Iyer6d3f9f82020-04-17 19:51:04 -070045 / processing capability. So we make the parsing easier on the SV side by
46 putting all multiline strings on a single line, separated by a single
47 carriage return instead, which the SV monitor can easily replace with
48 a newline.'''
Srikrishna Iyer57235522020-03-09 11:03:38 -070049 return re.sub(r"[\n\r]+", "\r", string).strip()
50
51
52def cleanup_format(_format):
53 '''Converts C style format specifiers to SV style.
54
Miguel Young de la Sota4dace3e2022-03-04 11:35:45 -050055 It makes the following substitutions:
Srikrishna Iyer57235522020-03-09 11:03:38 -070056 - Change %[N]?i, %[N]?u --> %[N]?d
57 - Change %[N]?x, %[N]?p --> %[N]?h
58 - Change %[N]?X --> %[N]?H
59
60 The below is a non-standard format specifier added in OpenTitan
Miguel Young de la Sota4dace3e2022-03-04 11:35:45 -050061 (see sw/device/lib/base/print.c for more details). A single %!s specifier
Srikrishna Iyer57235522020-03-09 11:03:38 -070062 consumes 2 arguments instead of 1 and hence has to converted as such to
63 prevent the log monitor in SystemVerilog from throwing an error at runtime.
Miguel Young de la Sota4dace3e2022-03-04 11:35:45 -050064 The %!{x, X, y, Y} specifiers have the same property, but can print garbage,
65 so they're converted to pointers instead.
66 - Change %![N]?s --> %[N]?s[%d].
Miguel Young de la Sota66230982022-03-23 12:08:06 -040067 - Change %![N]?[xXyY] --> %[N]?h.
68 - Change %![N]?b --> %[N]?d.'''
Miguel Young de la Sota4dace3e2022-03-04 11:35:45 -050069 _format = re.sub(r"%(-?\d*)[iu]", r"%\1d", _format)
70 _format = re.sub(r"%(-?\d*)[xp]", r"%\1h", _format)
71 _format = re.sub(r"%(-?\d*)X", r"%\1H", _format)
72 _format = re.sub(r"%!(-?\d*)s", r"%\1s[%d]", _format)
73 _format = re.sub(r"%!(-?\d*)[xXyY]", r"%\1h[%d]", _format)
Miguel Young de la Sota66230982022-03-23 12:08:06 -040074 _format = re.sub(r"%!(-?\d*)b", r"%\1d[%d]", _format)
Srikrishna Iyer57235522020-03-09 11:03:38 -070075 _format = re.sub(r"%([bcodhHs])", r"%0\1", _format)
76 return cleanup_newlines(_format)
77
78
Srikrishna Iyer6d3f9f82020-04-17 19:51:04 -070079def get_string_format_specifier_indices(_format):
80 '''Returns the indices of string format specifiers %s in the format string.
81
82 Example: a = %d, %%b = %%%2c, %%%% c = %5s, %% d = %o, e = %x, f = %-1s
83 The function will return: `2 5` because the 2nd and the 5th arg to the
84 format are strings. The '%%' does not accept an arg so they are ignored.
85 The returned value is a string of indices separated by a single space.
86
87 It is assumed that _format has been passed through `cleanup_format()`.
88 '''
89 pattern = '''
90 % # literal "%"
91 (?:[-+0 #]{0,5}) # optional flags
92 (?:\d+|\*)? # width
93 (?:\.(?:\d+|\*))? # precision
94 (?:l|ll)? # size
95 ([cdiouxpXshH]) # type (returned if matched)
96 | # OR
97 %(%) # literal "%%" (returned if matched)
98 '''
99 m = re.findall(pattern, _format, re.X)
100 # With the above example, the output of the pattern match is:
101 # [('d', ''), ('', '%'), ('', '%'), ('c', ''), and so on..]
102 index = 0
103 result = []
104 for match in m:
105 if match[1] == '%': continue
106 if match[0] == 's': result.append(str(index))
107 index += 1
108 return ' '.join(result).strip()
109
110
Srikrishna Iyer57235522020-03-09 11:03:38 -0700111def prune_filename(filename):
112 'This function prunes the filename to only display the hierarchy under sw/'
113 hier = "sw/device"
114 index = filename.find(hier)
115 return (filename if index == -1 else filename[index:])
116
117
118def get_addr_strings(ro_contents):
119 '''Construct {addr: string} dict from all read-only sections.
120
121 This function processes the read-only sections of the elf supplied as
122 a list of ro_content tuples comprising of base addr, size and data in bytes
123 and converts it into an {addr: string} dict which is returned.'''
124 result = {}
125 for ro_content in ro_contents:
126 str_start = 0
127 base_addr, size, data = ro_content
128 while (str_start < size):
129 str_end = data.find(b'\0', str_start)
Alphan Ulusoy9dfa3922021-05-19 15:54:35 -0400130 # Skip the remainder of this section since it can't contain any C-strings if
131 # there are no nul bytes.
132 if str_end == -1:
133 break
Srikrishna Iyer57235522020-03-09 11:03:38 -0700134 # Skip if start and end is the same
135 if str_start == str_end:
136 str_start += 1
137 continue
138 # Get full string address by adding base addr to the start.
139 addr = base_addr + str_start
140 string = cleanup_newlines(data[str_start:str_end].decode(
141 'utf-8', errors='replace'))
142 if addr in result:
143 exc_msg = "Error: duplicate {addr: string} pair encountered\n"
144 exc_msg += "addr: {} string: {}\n".format(addr, result[addr])
145 exc_msg += "addr: {} string: {}\n".format(addr, string)
146 raise IndexError(exc_msg)
147 result[addr] = string
148 str_start = str_end + 1
149 return result
150
151
152def get_str_at_addr(str_addr, addr_strings):
153 '''Returns the string at the provided addr.
154
155 It may be possible that the input addr is an offset within the string.
156 If true, then it returns remainder of the string starting at the offset.'''
157 for addr in addr_strings.keys():
158 if addr <= str_addr < addr + len(addr_strings[addr]):
159 return addr_strings[addr][str_addr - addr:].strip()
160 raise KeyError("string at addr {} not found".format(str_addr))
161
162
163def extract_sw_logs(elf_file, logs_fields_section, ro_sections):
164 '''This function extracts contents from the logs fields section, and the
165 read only sections, processes them and generates a tuple of (results) -
166 log with fields and (rodata) - constant strings with their addresses.
167 '''
168 # Open the elf file.
169 with open(elf_file, 'rb') as f:
170 elf = elffile.ELFFile(f)
171 # Parse the ro sections to get {addr: string} pairs.
172 ro_contents = []
173 for ro_section in ro_sections:
174 section = elf.get_section_by_name(name=ro_section)
175 if section:
176 base_addr = int(section.header['sh_addr'])
177 size = int(section.header['sh_size'])
178 data = section.data()
179 ro_contents.append((base_addr, size, data))
180 else:
181 print("Error: {} section not found in {}".format(
182 ro_section, elf_file))
183 sys.exit(1)
184 addr_strings = get_addr_strings(ro_contents)
185
186 # Dump the {addr: string} data.
187 rodata = ""
188 for addr in addr_strings.keys():
189 rodata += "addr: {}\n".format(hex(addr)[2:])
190 string = cleanup_newlines(addr_strings[addr])
191 rodata += "string: {}\n".format(string)
192
193 # Parse the logs fields section to extract the logs.
194 section = elf.get_section_by_name(name=logs_fields_section)
195 if section:
196 logs_base_addr = int(section.header['sh_addr'])
197 logs_size = int(section.header['sh_size'])
198 logs_data = section.data()
199 else:
200 print("Error: {} section not found in {}".format(
201 logs_fields_section, elf_file))
202 sys.exit(1)
203
204 # Dump the logs with fields.
205 result = ""
206 num_logs = logs_size // LOGS_FIELDS_SIZE
207 for i in range(num_logs):
208 start = i * LOGS_FIELDS_SIZE
209 end = start + LOGS_FIELDS_SIZE
210 severity, file_addr, line, nargs, format_addr = struct.unpack(
211 'IIIII', logs_data[start:end])
212 result += "addr: {}\n".format(hex(logs_base_addr + start)[2:])
213 result += "severity: {}\n".format(severity)
214 result += "file: {}\n".format(
215 prune_filename(get_str_at_addr(file_addr, addr_strings)))
216 result += "line: {}\n".format(line)
217 result += "nargs: {}\n".format(nargs)
Srikrishna Iyer6d3f9f82020-04-17 19:51:04 -0700218 fmt = cleanup_format(get_str_at_addr(format_addr, addr_strings))
219 result += "format: {}\n".format(fmt)
220 result += "str_arg_idx: {}\n".format(
221 get_string_format_specifier_indices(fmt))
Srikrishna Iyer57235522020-03-09 11:03:38 -0700222
223 return rodata, result
224
225
226def main():
227 parser = argparse.ArgumentParser()
228 parser.add_argument('--elf-file', '-e', required=True, help="Elf file")
229 parser.add_argument('--logs-fields-section',
230 '-f',
231 default=LOGS_FIELDS_SECTION,
232 help="Elf section where log fields are written.")
233 parser.add_argument('--rodata-sections',
234 '-r',
Srikrishna Iyer57235522020-03-09 11:03:38 -0700235 nargs="+",
Srikrishna Iyer250a3992020-11-20 23:27:10 -0800236 action="append",
Srikrishna Iyer57235522020-03-09 11:03:38 -0700237 help="Elf sections with rodata.")
238 parser.add_argument('--name',
239 '-n',
240 required=True,
241 help="Type of the SW elf being processed.")
242 parser.add_argument('--outdir',
243 '-o',
244 required=True,
245 help="Output directory.")
246 args = parser.parse_args()
247
Srikrishna Iyer250a3992020-11-20 23:27:10 -0800248 if args.rodata_sections is None:
249 ro_sections = [RODATA_SECTION]
250 else:
251 # TODO: We want the `--rodata-sections` arg to have the 'extend' action
252 # which is only available in Python 3.8. To maintain compatibility with
253 # Python 3.6 (which is the minimum required version for OpenTitan), we
254 # flatten the list here instead.
255 ro_sections = list(
256 set([section for lst in args.rodata_sections for section in lst]))
257
Srikrishna Iyer57235522020-03-09 11:03:38 -0700258 os.makedirs(args.outdir, exist_ok=True)
259 rodata, result = extract_sw_logs(args.elf_file, args.logs_fields_section,
Srikrishna Iyer250a3992020-11-20 23:27:10 -0800260 ro_sections)
Srikrishna Iyer57235522020-03-09 11:03:38 -0700261
Srikrishna Iyer250a3992020-11-20 23:27:10 -0800262 outfile = os.path.join(args.outdir, args.name + ".rodata.txt")
Srikrishna Iyer57235522020-03-09 11:03:38 -0700263 with open(outfile, "w", encoding='utf-8') as f:
264 f.write(rodata.strip())
265
Srikrishna Iyer250a3992020-11-20 23:27:10 -0800266 outfile = os.path.join(args.outdir, args.name + ".logs.txt")
Srikrishna Iyer57235522020-03-09 11:03:38 -0700267 with open(outfile, "w", encoding='utf-8') as f:
268 f.write(result.strip())
269
270
271if __name__ == "__main__":
272 main()