Blame - util/device_sw_utils/extract_sw_logs.py - 3p/lowrisc/opentitan

blob: 27d75c4688289eb3c749228adb6ef75371ce9775 [file] [log] [blame]

Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	1	#!/usr/bin/env python3
				2	# Copyright lowRISC contributors.
				3	# Licensed under the Apache License, Version 2.0, see LICENSE for details.
				4	# SPDX-License-Identifier: Apache-2.0
				5	"""Script to convert logs placed in given sections into SystemVerilog-friendly
				6	database.
				7
				8	The tool uses the pyelftools utility to extract the log fields from a given
				9	section and the strings from read only sections. It processes the log fields
				10	& the strings and converts them into a database. The script produces 2 outputs:
				11	- <name_logs.txt, which is the log database
				12	- <name>_rodata.txt which contains {addr: string} pairs.
				13	"""
				14
				15	import argparse
				16	import os
				17	import re
				18	import struct
				19	import sys
				20
				21	from elftools.elf import elffile
				22
				23	# A printf statement in C code is converted into a single write to a reserved
				24	# address in the RAM. The value written is the address of the log_fields_t
				25	# struct constucted from the log. It has the following fields:
				26	# severity (int), 4 bytes: 0 (I), 1 (W), 2 (E), 3 (F)
				27	# file_name (int, ptr), 4 bytes: Pointer to file_name string.
				28	# Line no (int), 4 bytes: Line number of the log message.
				29	# Nargs (int), 4 bytes: Number of arguments the format string takes.
				30	# format (int, ptr), 4 bytes: Log format string.
				31	#
				32	# Total size of log_fields_t: 20 bytes.
				33	LOGS_FIELDS_SECTION = '.logs.fields'
				34	LOGS_FIELDS_SIZE = 20
				35	RODATA_SECTION = '.rodata'
				36
				37
				38	def cleanup_newlines(string):
				39	'''Replaces newlines with a carriage return.
				40
Srikrishna Iyer	6d3f9f8	2020-04-17 19:51:04 -0700	[diff] [blame]	41	The reason for doing so if a newline is encountered in the middle of a
Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	42	string, it ends up adding that newline in the output files this script
				43	generates. The output of this script is consumed by a monitor written in
				44	SystemVerilog (hw/dv/sv/sw_logger_if), a language with limited parsing
Srikrishna Iyer	6d3f9f8	2020-04-17 19:51:04 -0700	[diff] [blame]	45	/ processing capability. So we make the parsing easier on the SV side by
				46	putting all multiline strings on a single line, separated by a single
				47	carriage return instead, which the SV monitor can easily replace with
				48	a newline.'''
Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	49	return re.sub(r"[\n\r]+", "\r", string).strip()
				50
				51
				52	def cleanup_format(_format):
				53	'''Converts C style format specifiers to SV style.
				54
				55	It makes the folllowing substitutions:
				56	- Change %[N]?i, %[N]?u --> %[N]?d
				57	- Change %[N]?x, %[N]?p --> %[N]?h
				58	- Change %[N]?X --> %[N]?H
				59
				60	The below is a non-standard format specifier added in OpenTitan
				61	(see sw/device/lib/base/print.c for more details). A single %z specifier
				62	consumes 2 arguments instead of 1 and hence has to converted as such to
				63	prevent the log monitor in SystemVerilog from throwing an error at runtime.
				64	- Change %[N]?z --> %[N]?s[%d].'''
				65	_format = re.sub(r"(%-?\d*)[iu]", r"\1d", _format)
				66	_format = re.sub(r"(%-?\d*)[xp]", r"\1h", _format)
				67	_format = re.sub(r"(%-?\d*)X", r"\1H", _format)
				68	_format = re.sub(r"(%-?\d*)z", r"\1s[%d]", _format)
				69	_format = re.sub(r"%([bcodhHs])", r"%0\1", _format)
				70	return cleanup_newlines(_format)
				71
				72
Srikrishna Iyer	6d3f9f8	2020-04-17 19:51:04 -0700	[diff] [blame]	73	def get_string_format_specifier_indices(_format):
				74	'''Returns the indices of string format specifiers %s in the format string.
				75
				76	Example: a = %d, %%b = %%%2c, %%%% c = %5s, %% d = %o, e = %x, f = %-1s
				77	The function will return: `2 5` because the 2nd and the 5th arg to the
				78	format are strings. The '%%' does not accept an arg so they are ignored.
				79	The returned value is a string of indices separated by a single space.
				80
				81	It is assumed that _format has been passed through `cleanup_format()`.
				82	'''
				83	pattern = '''
				84	% # literal "%"
				85	(?:[-+0 #]{0,5}) # optional flags
				86	(?:\d+\|\*)? # width
				87	(?:\.(?:\d+\|\*))? # precision
				88	(?:l\|ll)? # size
				89	([cdiouxpXshH]) # type (returned if matched)
				90	\| # OR
				91	%(%) # literal "%%" (returned if matched)
				92	'''
				93	m = re.findall(pattern, _format, re.X)
				94	# With the above example, the output of the pattern match is:
				95	# [('d', ''), ('', '%'), ('', '%'), ('c', ''), and so on..]
				96	index = 0
				97	result = []
				98	for match in m:
				99	if match[1] == '%': continue
				100	if match[0] == 's': result.append(str(index))
				101	index += 1
				102	return ' '.join(result).strip()
				103
				104
Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	105	def prune_filename(filename):
				106	'This function prunes the filename to only display the hierarchy under sw/'
				107	hier = "sw/device"
				108	index = filename.find(hier)
				109	return (filename if index == -1 else filename[index:])
				110
				111
				112	def get_addr_strings(ro_contents):
				113	'''Construct {addr: string} dict from all read-only sections.
				114
				115	This function processes the read-only sections of the elf supplied as
				116	a list of ro_content tuples comprising of base addr, size and data in bytes
				117	and converts it into an {addr: string} dict which is returned.'''
				118	result = {}
				119	for ro_content in ro_contents:
				120	str_start = 0
				121	base_addr, size, data = ro_content
				122	while (str_start < size):
				123	str_end = data.find(b'\0', str_start)
Alphan Ulusoy	9dfa392	2021-05-19 15:54:35 -0400	[diff] [blame]	124	# Skip the remainder of this section since it can't contain any C-strings if
				125	# there are no nul bytes.
				126	if str_end == -1:
				127	break
Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	128	# Skip if start and end is the same
				129	if str_start == str_end:
				130	str_start += 1
				131	continue
				132	# Get full string address by adding base addr to the start.
				133	addr = base_addr + str_start
				134	string = cleanup_newlines(data[str_start:str_end].decode(
				135	'utf-8', errors='replace'))
				136	if addr in result:
				137	exc_msg = "Error: duplicate {addr: string} pair encountered\n"
				138	exc_msg += "addr: {} string: {}\n".format(addr, result[addr])
				139	exc_msg += "addr: {} string: {}\n".format(addr, string)
				140	raise IndexError(exc_msg)
				141	result[addr] = string
				142	str_start = str_end + 1
				143	return result
				144
				145
				146	def get_str_at_addr(str_addr, addr_strings):
				147	'''Returns the string at the provided addr.
				148
				149	It may be possible that the input addr is an offset within the string.
				150	If true, then it returns remainder of the string starting at the offset.'''
				151	for addr in addr_strings.keys():
				152	if addr <= str_addr < addr + len(addr_strings[addr]):
				153	return addr_strings[addr][str_addr - addr:].strip()
				154	raise KeyError("string at addr {} not found".format(str_addr))
				155
				156
				157	def extract_sw_logs(elf_file, logs_fields_section, ro_sections):
				158	'''This function extracts contents from the logs fields section, and the
				159	read only sections, processes them and generates a tuple of (results) -
				160	log with fields and (rodata) - constant strings with their addresses.
				161	'''
				162	# Open the elf file.
				163	with open(elf_file, 'rb') as f:
				164	elf = elffile.ELFFile(f)
				165	# Parse the ro sections to get {addr: string} pairs.
				166	ro_contents = []
				167	for ro_section in ro_sections:
				168	section = elf.get_section_by_name(name=ro_section)
				169	if section:
				170	base_addr = int(section.header['sh_addr'])
				171	size = int(section.header['sh_size'])
				172	data = section.data()
				173	ro_contents.append((base_addr, size, data))
				174	else:
				175	print("Error: {} section not found in {}".format(
				176	ro_section, elf_file))
				177	sys.exit(1)
				178	addr_strings = get_addr_strings(ro_contents)
				179
				180	# Dump the {addr: string} data.
				181	rodata = ""
				182	for addr in addr_strings.keys():
				183	rodata += "addr: {}\n".format(hex(addr)[2:])
				184	string = cleanup_newlines(addr_strings[addr])
				185	rodata += "string: {}\n".format(string)
				186
				187	# Parse the logs fields section to extract the logs.
				188	section = elf.get_section_by_name(name=logs_fields_section)
				189	if section:
				190	logs_base_addr = int(section.header['sh_addr'])
				191	logs_size = int(section.header['sh_size'])
				192	logs_data = section.data()
				193	else:
				194	print("Error: {} section not found in {}".format(
				195	logs_fields_section, elf_file))
				196	sys.exit(1)
				197
				198	# Dump the logs with fields.
				199	result = ""
				200	num_logs = logs_size // LOGS_FIELDS_SIZE
				201	for i in range(num_logs):
				202	start = i * LOGS_FIELDS_SIZE
				203	end = start + LOGS_FIELDS_SIZE
				204	severity, file_addr, line, nargs, format_addr = struct.unpack(
				205	'IIIII', logs_data[start:end])
				206	result += "addr: {}\n".format(hex(logs_base_addr + start)[2:])
				207	result += "severity: {}\n".format(severity)
				208	result += "file: {}\n".format(
				209	prune_filename(get_str_at_addr(file_addr, addr_strings)))
				210	result += "line: {}\n".format(line)
				211	result += "nargs: {}\n".format(nargs)
Srikrishna Iyer	6d3f9f8	2020-04-17 19:51:04 -0700	[diff] [blame]	212	fmt = cleanup_format(get_str_at_addr(format_addr, addr_strings))
				213	result += "format: {}\n".format(fmt)
				214	result += "str_arg_idx: {}\n".format(
				215	get_string_format_specifier_indices(fmt))
Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	216
				217	return rodata, result
				218
				219
				220	def main():
				221	parser = argparse.ArgumentParser()
				222	parser.add_argument('--elf-file', '-e', required=True, help="Elf file")
				223	parser.add_argument('--logs-fields-section',
				224	'-f',
				225	default=LOGS_FIELDS_SECTION,
				226	help="Elf section where log fields are written.")
				227	parser.add_argument('--rodata-sections',
				228	'-r',
Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	229	nargs="+",
Srikrishna Iyer	250a399	2020-11-20 23:27:10 -0800	[diff] [blame]	230	action="append",
Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	231	help="Elf sections with rodata.")
				232	parser.add_argument('--name',
				233	'-n',
				234	required=True,
				235	help="Type of the SW elf being processed.")
				236	parser.add_argument('--outdir',
				237	'-o',
				238	required=True,
				239	help="Output directory.")
				240	args = parser.parse_args()
				241
Srikrishna Iyer	250a399	2020-11-20 23:27:10 -0800	[diff] [blame]	242	if args.rodata_sections is None:
				243	ro_sections = [RODATA_SECTION]
				244	else:
				245	# TODO: We want the `--rodata-sections` arg to have the 'extend' action
				246	# which is only available in Python 3.8. To maintain compatibility with
				247	# Python 3.6 (which is the minimum required version for OpenTitan), we
				248	# flatten the list here instead.
				249	ro_sections = list(
				250	set([section for lst in args.rodata_sections for section in lst]))
				251
Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	252	os.makedirs(args.outdir, exist_ok=True)
				253	rodata, result = extract_sw_logs(args.elf_file, args.logs_fields_section,
Srikrishna Iyer	250a399	2020-11-20 23:27:10 -0800	[diff] [blame]	254	ro_sections)
Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	255
Srikrishna Iyer	250a399	2020-11-20 23:27:10 -0800	[diff] [blame]	256	outfile = os.path.join(args.outdir, args.name + ".rodata.txt")
Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	257	with open(outfile, "w", encoding='utf-8') as f:
				258	f.write(rodata.strip())
				259
Srikrishna Iyer	250a399	2020-11-20 23:27:10 -0800	[diff] [blame]	260	outfile = os.path.join(args.outdir, args.name + ".logs.txt")
Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	261	with open(outfile, "w", encoding='utf-8') as f:
				262	f.write(result.strip())
				263
				264
				265	if __name__ == "__main__":
				266	main()