blob: a6a6794b84593df6f531876b6a6e1e5a2b60e88d [file] [log] [blame]
Srikrishna Iyer57235522020-03-09 11:03:38 -07001#!/usr/bin/env python3
2# Copyright lowRISC contributors.
3# Licensed under the Apache License, Version 2.0, see LICENSE for details.
4# SPDX-License-Identifier: Apache-2.0
5"""Script to convert logs placed in given sections into SystemVerilog-friendly
6database.
7
8The tool uses the pyelftools utility to extract the log fields from a given
9section and the strings from read only sections. It processes the log fields
10& the strings and converts them into a database. The script produces 2 outputs:
11- <name_logs.txt, which is the log database
12- <name>_rodata.txt which contains {addr: string} pairs.
13"""
14
15import argparse
16import os
17import re
18import struct
19import sys
20
21from elftools.elf import elffile
22
23# A printf statement in C code is converted into a single write to a reserved
24# address in the RAM. The value written is the address of the log_fields_t
25# struct constucted from the log. It has the following fields:
26# severity (int), 4 bytes: 0 (I), 1 (W), 2 (E), 3 (F)
27# file_name (int, ptr), 4 bytes: Pointer to file_name string.
28# Line no (int), 4 bytes: Line number of the log message.
29# Nargs (int), 4 bytes: Number of arguments the format string takes.
30# format (int, ptr), 4 bytes: Log format string.
31#
32# Total size of log_fields_t: 20 bytes.
33LOGS_FIELDS_SECTION = '.logs.fields'
34LOGS_FIELDS_SIZE = 20
35RODATA_SECTION = '.rodata'
36
37
38def cleanup_newlines(string):
39 '''Replaces newlines with a carriage return.
40
Srikrishna Iyer6d3f9f82020-04-17 19:51:04 -070041 The reason for doing so if a newline is encountered in the middle of a
Srikrishna Iyer57235522020-03-09 11:03:38 -070042 string, it ends up adding that newline in the output files this script
43 generates. The output of this script is consumed by a monitor written in
44 SystemVerilog (hw/dv/sv/sw_logger_if), a language with limited parsing
Srikrishna Iyer6d3f9f82020-04-17 19:51:04 -070045 / processing capability. So we make the parsing easier on the SV side by
46 putting all multiline strings on a single line, separated by a single
47 carriage return instead, which the SV monitor can easily replace with
48 a newline.'''
Srikrishna Iyer57235522020-03-09 11:03:38 -070049 return re.sub(r"[\n\r]+", "\r", string).strip()
50
51
52def cleanup_format(_format):
53 '''Converts C style format specifiers to SV style.
54
55 It makes the folllowing substitutions:
56 - Change %[N]?i, %[N]?u --> %[N]?d
57 - Change %[N]?x, %[N]?p --> %[N]?h
58 - Change %[N]?X --> %[N]?H
59
60 The below is a non-standard format specifier added in OpenTitan
61 (see sw/device/lib/base/print.c for more details). A single %z specifier
62 consumes 2 arguments instead of 1 and hence has to converted as such to
63 prevent the log monitor in SystemVerilog from throwing an error at runtime.
64 - Change %[N]?z --> %[N]?s[%d].'''
65 _format = re.sub(r"(%-?\d*)[iu]", r"\1d", _format)
66 _format = re.sub(r"(%-?\d*)[xp]", r"\1h", _format)
67 _format = re.sub(r"(%-?\d*)X", r"\1H", _format)
68 _format = re.sub(r"(%-?\d*)z", r"\1s[%d]", _format)
69 _format = re.sub(r"%([bcodhHs])", r"%0\1", _format)
70 return cleanup_newlines(_format)
71
72
Srikrishna Iyer6d3f9f82020-04-17 19:51:04 -070073def get_string_format_specifier_indices(_format):
74 '''Returns the indices of string format specifiers %s in the format string.
75
76 Example: a = %d, %%b = %%%2c, %%%% c = %5s, %% d = %o, e = %x, f = %-1s
77 The function will return: `2 5` because the 2nd and the 5th arg to the
78 format are strings. The '%%' does not accept an arg so they are ignored.
79 The returned value is a string of indices separated by a single space.
80
81 It is assumed that _format has been passed through `cleanup_format()`.
82 '''
83 pattern = '''
84 % # literal "%"
85 (?:[-+0 #]{0,5}) # optional flags
86 (?:\d+|\*)? # width
87 (?:\.(?:\d+|\*))? # precision
88 (?:l|ll)? # size
89 ([cdiouxpXshH]) # type (returned if matched)
90 | # OR
91 %(%) # literal "%%" (returned if matched)
92 '''
93 m = re.findall(pattern, _format, re.X)
94 # With the above example, the output of the pattern match is:
95 # [('d', ''), ('', '%'), ('', '%'), ('c', ''), and so on..]
96 index = 0
97 result = []
98 for match in m:
99 if match[1] == '%': continue
100 if match[0] == 's': result.append(str(index))
101 index += 1
102 return ' '.join(result).strip()
103
104
Srikrishna Iyer57235522020-03-09 11:03:38 -0700105def prune_filename(filename):
106 'This function prunes the filename to only display the hierarchy under sw/'
107 hier = "sw/device"
108 index = filename.find(hier)
109 return (filename if index == -1 else filename[index:])
110
111
112def get_addr_strings(ro_contents):
113 '''Construct {addr: string} dict from all read-only sections.
114
115 This function processes the read-only sections of the elf supplied as
116 a list of ro_content tuples comprising of base addr, size and data in bytes
117 and converts it into an {addr: string} dict which is returned.'''
118 result = {}
119 for ro_content in ro_contents:
120 str_start = 0
121 base_addr, size, data = ro_content
122 while (str_start < size):
123 str_end = data.find(b'\0', str_start)
124 # Skip if start and end is the same
125 if str_start == str_end:
126 str_start += 1
127 continue
128 # Get full string address by adding base addr to the start.
129 addr = base_addr + str_start
130 string = cleanup_newlines(data[str_start:str_end].decode(
131 'utf-8', errors='replace'))
132 if addr in result:
133 exc_msg = "Error: duplicate {addr: string} pair encountered\n"
134 exc_msg += "addr: {} string: {}\n".format(addr, result[addr])
135 exc_msg += "addr: {} string: {}\n".format(addr, string)
136 raise IndexError(exc_msg)
137 result[addr] = string
138 str_start = str_end + 1
139 return result
140
141
142def get_str_at_addr(str_addr, addr_strings):
143 '''Returns the string at the provided addr.
144
145 It may be possible that the input addr is an offset within the string.
146 If true, then it returns remainder of the string starting at the offset.'''
147 for addr in addr_strings.keys():
148 if addr <= str_addr < addr + len(addr_strings[addr]):
149 return addr_strings[addr][str_addr - addr:].strip()
150 raise KeyError("string at addr {} not found".format(str_addr))
151
152
153def extract_sw_logs(elf_file, logs_fields_section, ro_sections):
154 '''This function extracts contents from the logs fields section, and the
155 read only sections, processes them and generates a tuple of (results) -
156 log with fields and (rodata) - constant strings with their addresses.
157 '''
158 # Open the elf file.
159 with open(elf_file, 'rb') as f:
160 elf = elffile.ELFFile(f)
161 # Parse the ro sections to get {addr: string} pairs.
162 ro_contents = []
163 for ro_section in ro_sections:
164 section = elf.get_section_by_name(name=ro_section)
165 if section:
166 base_addr = int(section.header['sh_addr'])
167 size = int(section.header['sh_size'])
168 data = section.data()
169 ro_contents.append((base_addr, size, data))
170 else:
171 print("Error: {} section not found in {}".format(
172 ro_section, elf_file))
173 sys.exit(1)
174 addr_strings = get_addr_strings(ro_contents)
175
176 # Dump the {addr: string} data.
177 rodata = ""
178 for addr in addr_strings.keys():
179 rodata += "addr: {}\n".format(hex(addr)[2:])
180 string = cleanup_newlines(addr_strings[addr])
181 rodata += "string: {}\n".format(string)
182
183 # Parse the logs fields section to extract the logs.
184 section = elf.get_section_by_name(name=logs_fields_section)
185 if section:
186 logs_base_addr = int(section.header['sh_addr'])
187 logs_size = int(section.header['sh_size'])
188 logs_data = section.data()
189 else:
190 print("Error: {} section not found in {}".format(
191 logs_fields_section, elf_file))
192 sys.exit(1)
193
194 # Dump the logs with fields.
195 result = ""
196 num_logs = logs_size // LOGS_FIELDS_SIZE
197 for i in range(num_logs):
198 start = i * LOGS_FIELDS_SIZE
199 end = start + LOGS_FIELDS_SIZE
200 severity, file_addr, line, nargs, format_addr = struct.unpack(
201 'IIIII', logs_data[start:end])
202 result += "addr: {}\n".format(hex(logs_base_addr + start)[2:])
203 result += "severity: {}\n".format(severity)
204 result += "file: {}\n".format(
205 prune_filename(get_str_at_addr(file_addr, addr_strings)))
206 result += "line: {}\n".format(line)
207 result += "nargs: {}\n".format(nargs)
Srikrishna Iyer6d3f9f82020-04-17 19:51:04 -0700208 fmt = cleanup_format(get_str_at_addr(format_addr, addr_strings))
209 result += "format: {}\n".format(fmt)
210 result += "str_arg_idx: {}\n".format(
211 get_string_format_specifier_indices(fmt))
Srikrishna Iyer57235522020-03-09 11:03:38 -0700212
213 return rodata, result
214
215
216def main():
217 parser = argparse.ArgumentParser()
218 parser.add_argument('--elf-file', '-e', required=True, help="Elf file")
219 parser.add_argument('--logs-fields-section',
220 '-f',
221 default=LOGS_FIELDS_SECTION,
222 help="Elf section where log fields are written.")
223 parser.add_argument('--rodata-sections',
224 '-r',
225 default=[RODATA_SECTION],
226 nargs="+",
227 help="Elf sections with rodata.")
228 parser.add_argument('--name',
229 '-n',
230 required=True,
231 help="Type of the SW elf being processed.")
232 parser.add_argument('--outdir',
233 '-o',
234 required=True,
235 help="Output directory.")
236 args = parser.parse_args()
237
238 os.makedirs(args.outdir, exist_ok=True)
239 rodata, result = extract_sw_logs(args.elf_file, args.logs_fields_section,
240 args.rodata_sections)
241
242 outfile = os.path.join(args.outdir, args.name + "_rodata.txt")
243 with open(outfile, "w", encoding='utf-8') as f:
244 f.write(rodata.strip())
245
246 outfile = os.path.join(args.outdir, args.name + "_logs.txt")
247 with open(outfile, "w", encoding='utf-8') as f:
248 f.write(result.strip())
249
250
251if __name__ == "__main__":
252 main()