Blame - util/device_sw_utils/extract_sw_logs.py - 3p/lowrisc/opentitan

blob: a6a6794b84593df6f531876b6a6e1e5a2b60e88d [file] [log] [blame]

Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	1	#!/usr/bin/env python3
				2	# Copyright lowRISC contributors.
				3	# Licensed under the Apache License, Version 2.0, see LICENSE for details.
				4	# SPDX-License-Identifier: Apache-2.0
				5	"""Script to convert logs placed in given sections into SystemVerilog-friendly
				6	database.
				7
				8	The tool uses the pyelftools utility to extract the log fields from a given
				9	section and the strings from read only sections. It processes the log fields
				10	& the strings and converts them into a database. The script produces 2 outputs:
				11	- <name_logs.txt, which is the log database
				12	- <name>_rodata.txt which contains {addr: string} pairs.
				13	"""
				14
				15	import argparse
				16	import os
				17	import re
				18	import struct
				19	import sys
				20
				21	from elftools.elf import elffile
				22
				23	# A printf statement in C code is converted into a single write to a reserved
				24	# address in the RAM. The value written is the address of the log_fields_t
				25	# struct constucted from the log. It has the following fields:
				26	# severity (int), 4 bytes: 0 (I), 1 (W), 2 (E), 3 (F)
				27	# file_name (int, ptr), 4 bytes: Pointer to file_name string.
				28	# Line no (int), 4 bytes: Line number of the log message.
				29	# Nargs (int), 4 bytes: Number of arguments the format string takes.
				30	# format (int, ptr), 4 bytes: Log format string.
				31	#
				32	# Total size of log_fields_t: 20 bytes.
				33	LOGS_FIELDS_SECTION = '.logs.fields'
				34	LOGS_FIELDS_SIZE = 20
				35	RODATA_SECTION = '.rodata'
				36
				37
				38	def cleanup_newlines(string):
				39	'''Replaces newlines with a carriage return.
				40
Srikrishna Iyer	6d3f9f8	2020-04-17 19:51:04 -0700	[diff] [blame]	41	The reason for doing so if a newline is encountered in the middle of a
Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	42	string, it ends up adding that newline in the output files this script
				43	generates. The output of this script is consumed by a monitor written in
				44	SystemVerilog (hw/dv/sv/sw_logger_if), a language with limited parsing
Srikrishna Iyer	6d3f9f8	2020-04-17 19:51:04 -0700	[diff] [blame]	45	/ processing capability. So we make the parsing easier on the SV side by
				46	putting all multiline strings on a single line, separated by a single
				47	carriage return instead, which the SV monitor can easily replace with
				48	a newline.'''
Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	49	return re.sub(r"[\n\r]+", "\r", string).strip()
				50
				51
				52	def cleanup_format(_format):
				53	'''Converts C style format specifiers to SV style.
				54
				55	It makes the folllowing substitutions:
				56	- Change %[N]?i, %[N]?u --> %[N]?d
				57	- Change %[N]?x, %[N]?p --> %[N]?h
				58	- Change %[N]?X --> %[N]?H
				59
				60	The below is a non-standard format specifier added in OpenTitan
				61	(see sw/device/lib/base/print.c for more details). A single %z specifier
				62	consumes 2 arguments instead of 1 and hence has to converted as such to
				63	prevent the log monitor in SystemVerilog from throwing an error at runtime.
				64	- Change %[N]?z --> %[N]?s[%d].'''
				65	_format = re.sub(r"(%-?\d*)[iu]", r"\1d", _format)
				66	_format = re.sub(r"(%-?\d*)[xp]", r"\1h", _format)
				67	_format = re.sub(r"(%-?\d*)X", r"\1H", _format)
				68	_format = re.sub(r"(%-?\d*)z", r"\1s[%d]", _format)
				69	_format = re.sub(r"%([bcodhHs])", r"%0\1", _format)
				70	return cleanup_newlines(_format)
				71
				72
Srikrishna Iyer	6d3f9f8	2020-04-17 19:51:04 -0700	[diff] [blame]	73	def get_string_format_specifier_indices(_format):
				74	'''Returns the indices of string format specifiers %s in the format string.
				75
				76	Example: a = %d, %%b = %%%2c, %%%% c = %5s, %% d = %o, e = %x, f = %-1s
				77	The function will return: `2 5` because the 2nd and the 5th arg to the
				78	format are strings. The '%%' does not accept an arg so they are ignored.
				79	The returned value is a string of indices separated by a single space.
				80
				81	It is assumed that _format has been passed through `cleanup_format()`.
				82	'''
				83	pattern = '''
				84	% # literal "%"
				85	(?:[-+0 #]{0,5}) # optional flags
				86	(?:\d+\|\*)? # width
				87	(?:\.(?:\d+\|\*))? # precision
				88	(?:l\|ll)? # size
				89	([cdiouxpXshH]) # type (returned if matched)
				90	\| # OR
				91	%(%) # literal "%%" (returned if matched)
				92	'''
				93	m = re.findall(pattern, _format, re.X)
				94	# With the above example, the output of the pattern match is:
				95	# [('d', ''), ('', '%'), ('', '%'), ('c', ''), and so on..]
				96	index = 0
				97	result = []
				98	for match in m:
				99	if match[1] == '%': continue
				100	if match[0] == 's': result.append(str(index))
				101	index += 1
				102	return ' '.join(result).strip()
				103
				104
Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	105	def prune_filename(filename):
				106	'This function prunes the filename to only display the hierarchy under sw/'
				107	hier = "sw/device"
				108	index = filename.find(hier)
				109	return (filename if index == -1 else filename[index:])
				110
				111
				112	def get_addr_strings(ro_contents):
				113	'''Construct {addr: string} dict from all read-only sections.
				114
				115	This function processes the read-only sections of the elf supplied as
				116	a list of ro_content tuples comprising of base addr, size and data in bytes
				117	and converts it into an {addr: string} dict which is returned.'''
				118	result = {}
				119	for ro_content in ro_contents:
				120	str_start = 0
				121	base_addr, size, data = ro_content
				122	while (str_start < size):
				123	str_end = data.find(b'\0', str_start)
				124	# Skip if start and end is the same
				125	if str_start == str_end:
				126	str_start += 1
				127	continue
				128	# Get full string address by adding base addr to the start.
				129	addr = base_addr + str_start
				130	string = cleanup_newlines(data[str_start:str_end].decode(
				131	'utf-8', errors='replace'))
				132	if addr in result:
				133	exc_msg = "Error: duplicate {addr: string} pair encountered\n"
				134	exc_msg += "addr: {} string: {}\n".format(addr, result[addr])
				135	exc_msg += "addr: {} string: {}\n".format(addr, string)
				136	raise IndexError(exc_msg)
				137	result[addr] = string
				138	str_start = str_end + 1
				139	return result
				140
				141
				142	def get_str_at_addr(str_addr, addr_strings):
				143	'''Returns the string at the provided addr.
				144
				145	It may be possible that the input addr is an offset within the string.
				146	If true, then it returns remainder of the string starting at the offset.'''
				147	for addr in addr_strings.keys():
				148	if addr <= str_addr < addr + len(addr_strings[addr]):
				149	return addr_strings[addr][str_addr - addr:].strip()
				150	raise KeyError("string at addr {} not found".format(str_addr))
				151
				152
				153	def extract_sw_logs(elf_file, logs_fields_section, ro_sections):
				154	'''This function extracts contents from the logs fields section, and the
				155	read only sections, processes them and generates a tuple of (results) -
				156	log with fields and (rodata) - constant strings with their addresses.
				157	'''
				158	# Open the elf file.
				159	with open(elf_file, 'rb') as f:
				160	elf = elffile.ELFFile(f)
				161	# Parse the ro sections to get {addr: string} pairs.
				162	ro_contents = []
				163	for ro_section in ro_sections:
				164	section = elf.get_section_by_name(name=ro_section)
				165	if section:
				166	base_addr = int(section.header['sh_addr'])
				167	size = int(section.header['sh_size'])
				168	data = section.data()
				169	ro_contents.append((base_addr, size, data))
				170	else:
				171	print("Error: {} section not found in {}".format(
				172	ro_section, elf_file))
				173	sys.exit(1)
				174	addr_strings = get_addr_strings(ro_contents)
				175
				176	# Dump the {addr: string} data.
				177	rodata = ""
				178	for addr in addr_strings.keys():
				179	rodata += "addr: {}\n".format(hex(addr)[2:])
				180	string = cleanup_newlines(addr_strings[addr])
				181	rodata += "string: {}\n".format(string)
				182
				183	# Parse the logs fields section to extract the logs.
				184	section = elf.get_section_by_name(name=logs_fields_section)
				185	if section:
				186	logs_base_addr = int(section.header['sh_addr'])
				187	logs_size = int(section.header['sh_size'])
				188	logs_data = section.data()
				189	else:
				190	print("Error: {} section not found in {}".format(
				191	logs_fields_section, elf_file))
				192	sys.exit(1)
				193
				194	# Dump the logs with fields.
				195	result = ""
				196	num_logs = logs_size // LOGS_FIELDS_SIZE
				197	for i in range(num_logs):
				198	start = i * LOGS_FIELDS_SIZE
				199	end = start + LOGS_FIELDS_SIZE
				200	severity, file_addr, line, nargs, format_addr = struct.unpack(
				201	'IIIII', logs_data[start:end])
				202	result += "addr: {}\n".format(hex(logs_base_addr + start)[2:])
				203	result += "severity: {}\n".format(severity)
				204	result += "file: {}\n".format(
				205	prune_filename(get_str_at_addr(file_addr, addr_strings)))
				206	result += "line: {}\n".format(line)
				207	result += "nargs: {}\n".format(nargs)
Srikrishna Iyer	6d3f9f8	2020-04-17 19:51:04 -0700	[diff] [blame]	208	fmt = cleanup_format(get_str_at_addr(format_addr, addr_strings))
				209	result += "format: {}\n".format(fmt)
				210	result += "str_arg_idx: {}\n".format(
				211	get_string_format_specifier_indices(fmt))
Srikrishna Iyer	5723552	2020-03-09 11:03:38 -0700	[diff] [blame]	212
				213	return rodata, result
				214
				215
				216	def main():
				217	parser = argparse.ArgumentParser()
				218	parser.add_argument('--elf-file', '-e', required=True, help="Elf file")
				219	parser.add_argument('--logs-fields-section',
				220	'-f',
				221	default=LOGS_FIELDS_SECTION,
				222	help="Elf section where log fields are written.")
				223	parser.add_argument('--rodata-sections',
				224	'-r',
				225	default=[RODATA_SECTION],
				226	nargs="+",
				227	help="Elf sections with rodata.")
				228	parser.add_argument('--name',
				229	'-n',
				230	required=True,
				231	help="Type of the SW elf being processed.")
				232	parser.add_argument('--outdir',
				233	'-o',
				234	required=True,
				235	help="Output directory.")
				236	args = parser.parse_args()
				237
				238	os.makedirs(args.outdir, exist_ok=True)
				239	rodata, result = extract_sw_logs(args.elf_file, args.logs_fields_section,
				240	args.rodata_sections)
				241
				242	outfile = os.path.join(args.outdir, args.name + "_rodata.txt")
				243	with open(outfile, "w", encoding='utf-8') as f:
				244	f.write(rodata.strip())
				245
				246	outfile = os.path.join(args.outdir, args.name + "_logs.txt")
				247	with open(outfile, "w", encoding='utf-8') as f:
				248	f.write(result.strip())
				249
				250
				251	if __name__ == "__main__":
				252	main()