Blame - util/device_sw_utils/extract_sw_logs.py - 3p/lowrisc/opentitan

2020-03-09 11:03:38 -0700

[diff] [blame]

1

#!/usr/bin/env python3

2

# Copyright lowRISC contributors.

3

# Licensed under the Apache License, Version 2.0, see LICENSE for details.

4

# SPDX-License-Identifier: Apache-2.0

5

"""Script to convert logs placed in given sections into SystemVerilog-friendly

6

database.

7

8

The tool uses the pyelftools utility to extract the log fields from a given

9

section and the strings from read only sections. It processes the log fields

10

& the strings and converts them into a database. The script produces 2 outputs:

11

- <name_logs.txt, which is the log database

12

- <name>_rodata.txt which contains {addr: string} pairs.

"""

import argparse

import os

import re

import struct

import sys

from elftools.elf import elffile

22

23

# A printf statement in C code is converted into a single write to a reserved

24

# address in the RAM. The value written is the address of the log_fields_t

25

# struct constucted from the log. It has the following fields:

26

# severity (int), 4 bytes: 0 (I), 1 (W), 2 (E), 3 (F)

27

# file_name (int, ptr), 4 bytes: Pointer to file_name string.

28

# Line no (int), 4 bytes: Line number of the log message.

29

# Nargs (int), 4 bytes: Number of arguments the format string takes.

30

# format (int, ptr), 4 bytes: Log format string.

31

#

32

# Total size of log_fields_t: 20 bytes.

33

LOGS_FIELDS_SECTION = '.logs.fields'

34

LOGS_FIELDS_SIZE = 20

35

RODATA_SECTION = '.rodata'

36

37

38

def cleanup_newlines(string):

39

'''Replaces newlines with a carriage return.

40

Srikrishna Iyer

2020-04-17 19:51:04 -0700

[diff] [blame]

41

The reason for doing so if a newline is encountered in the middle of a

Srikrishna Iyer

2020-03-09 11:03:38 -0700

[diff] [blame]

42

string, it ends up adding that newline in the output files this script

43

generates. The output of this script is consumed by a monitor written in

44

SystemVerilog (hw/dv/sv/sw_logger_if), a language with limited parsing

Srikrishna Iyer

2020-04-17 19:51:04 -0700

[diff] [blame]

45

/ processing capability. So we make the parsing easier on the SV side by

46

putting all multiline strings on a single line, separated by a single

47

carriage return instead, which the SV monitor can easily replace with

48

a newline.'''

Srikrishna Iyer

2020-03-09 11:03:38 -0700

[diff] [blame]

49

return re.sub(r"[\n\r]+", "\r", string).strip()

50

51

52

def cleanup_format(_format):

53

'''Converts C style format specifiers to SV style.

54

Miguel Young de la Sota

2022-03-04 11:35:45 -0500

[diff] [blame]

55

It makes the following substitutions:

Srikrishna Iyer

2020-03-09 11:03:38 -0700

[diff] [blame]

56

- Change %[N]?i, %[N]?u --> %[N]?d

57

- Change %[N]?x, %[N]?p --> %[N]?h

58

- Change %[N]?X --> %[N]?H

59

60

The below is a non-standard format specifier added in OpenTitan

Miguel Young de la Sota

2022-03-04 11:35:45 -0500

[diff] [blame]

61

(see sw/device/lib/base/print.c for more details). A single %!s specifier

Srikrishna Iyer

2020-03-09 11:03:38 -0700

[diff] [blame]

62

consumes 2 arguments instead of 1 and hence has to converted as such to

63

prevent the log monitor in SystemVerilog from throwing an error at runtime.

Miguel Young de la Sota

2022-03-04 11:35:45 -0500

[diff] [blame]

64

The %!{x, X, y, Y} specifiers have the same property, but can print garbage,

65

so they're converted to pointers instead.

66

- Change %![N]?s --> %[N]?s[%d].

Miguel Young de la Sota

6623098

2022-03-23 12:08:06 -0400

[diff] [blame]

67

- Change %![N]?[xXyY] --> %[N]?h.

68

- Change %![N]?b --> %[N]?d.'''

Miguel Young de la Sota

2022-03-04 11:35:45 -0500

[diff] [blame]

69

_format = re.sub(r"%(-?\d*)[iu]", r"%\1d", _format)

70

_format = re.sub(r"%(-?\d*)[xp]", r"%\1h", _format)

71

_format = re.sub(r"%(-?\d*)X", r"%\1H", _format)

72

_format = re.sub(r"%!(-?\d*)s", r"%\1s[%d]", _format)

73

_format = re.sub(r"%!(-?\d*)[xXyY]", r"%\1h[%d]", _format)

Miguel Young de la Sota

6623098

2022-03-23 12:08:06 -0400

[diff] [blame]

74

_format = re.sub(r"%!(-?\d*)b", r"%\1d[%d]", _format)

Srikrishna Iyer

2020-03-09 11:03:38 -0700

[diff] [blame]

75

_format = re.sub(r"%([bcodhHs])", r"%0\1", _format)

76

return cleanup_newlines(_format)

77

78

Srikrishna Iyer

2020-04-17 19:51:04 -0700

[diff] [blame]

79

def get_string_format_specifier_indices(_format):

80

'''Returns the indices of string format specifiers %s in the format string.

81

82

Example: a = %d, %%b = %%%2c, %%%% c = %5s, %% d = %o, e = %x, f = %-1s

83

The function will return: `2 5` because the 2nd and the 5th arg to the

84

format are strings. The '%%' does not accept an arg so they are ignored.

85

The returned value is a string of indices separated by a single space.

86

87

It is assumed that _format has been passed through `cleanup_format()`.

'''

pattern = '''

% # literal "%"

(?:[-+0 #]{0,5}) # optional flags

92

(?:\d+|\*)? # width

93

(?:\.(?:\d+|\*))? # precision

94

(?:l|ll)? # size

95

([cdiouxpXshH]) # type (returned if matched)

96

| # OR

97

%(%) # literal "%%" (returned if matched)

98

'''

99

m = re.findall(pattern, _format, re.X)

100

# With the above example, the output of the pattern match is:

101

# [('d', ''), ('', '%'), ('', '%'), ('c', ''), and so on..]

index = 0

result = []

for match in m:

if match[1] == '%': continue

106

if match[0] == 's': result.append(str(index))

107

index += 1

108

return ' '.join(result).strip()

109

110

Srikrishna Iyer

2020-03-09 11:03:38 -0700

[diff] [blame]

111

def prune_filename(filename):

112

'This function prunes the filename to only display the hierarchy under sw/'

113

hier = "sw/device"

114

index = filename.find(hier)

115

return (filename if index == -1 else filename[index:])

116

117

118

def get_addr_strings(ro_contents):

119

'''Construct {addr: string} dict from all read-only sections.

120

121

This function processes the read-only sections of the elf supplied as

122

a list of ro_content tuples comprising of base addr, size and data in bytes

123

and converts it into an {addr: string} dict which is returned.'''

124

result = {}

125

for ro_content in ro_contents:

126

str_start = 0

127

base_addr, size, data = ro_content

128

while (str_start < size):

129

str_end = data.find(b'\0', str_start)

Alphan Ulusoy

9dfa392

2021-05-19 15:54:35 -0400

[diff] [blame]

130

# Skip the remainder of this section since it can't contain any C-strings if

131

# there are no nul bytes.

132

if str_end == -1:

133

break

Srikrishna Iyer

2020-03-09 11:03:38 -0700

[diff] [blame]

134

# Skip if start and end is the same

135

if str_start == str_end:

136

str_start += 1

137

continue

138

# Get full string address by adding base addr to the start.

139

addr = base_addr + str_start

140

string = cleanup_newlines(data[str_start:str_end].decode(

141

'utf-8', errors='replace'))

142

if addr in result:

143

exc_msg = "Error: duplicate {addr: string} pair encountered\n"

144

exc_msg += "addr: {} string: {}\n".format(addr, result[addr])

145

exc_msg += "addr: {} string: {}\n".format(addr, string)

146

raise IndexError(exc_msg)

147

result[addr] = string

148

str_start = str_end + 1

return result

def get_str_at_addr(str_addr, addr_strings):

153

'''Returns the string at the provided addr.

154

155

It may be possible that the input addr is an offset within the string.

156

If true, then it returns remainder of the string starting at the offset.'''

157

for addr in addr_strings.keys():

158

if addr <= str_addr < addr + len(addr_strings[addr]):

159

return addr_strings[addr][str_addr - addr:].strip()

160

raise KeyError("string at addr {} not found".format(str_addr))

161

162

163

def extract_sw_logs(elf_file, logs_fields_section, ro_sections):

164

'''This function extracts contents from the logs fields section, and the

165

read only sections, processes them and generates a tuple of (results) -

166

log with fields and (rodata) - constant strings with their addresses.

167

'''

168

# Open the elf file.

169

with open(elf_file, 'rb') as f:

170

elf = elffile.ELFFile(f)

171

# Parse the ro sections to get {addr: string} pairs.

172

ro_contents = []

173

for ro_section in ro_sections:

174

section = elf.get_section_by_name(name=ro_section)

175

if section:

176

base_addr = int(section.header['sh_addr'])

177

size = int(section.header['sh_size'])

178

data = section.data()

179

ro_contents.append((base_addr, size, data))

180

else:

181

print("Error: {} section not found in {}".format(

182

ro_section, elf_file))

183

sys.exit(1)

184

addr_strings = get_addr_strings(ro_contents)

185

186

# Dump the {addr: string} data.

187

rodata = ""

188

for addr in addr_strings.keys():

189

rodata += "addr: {}\n".format(hex(addr)[2:])

190

string = cleanup_newlines(addr_strings[addr])

191

rodata += "string: {}\n".format(string)

192

193

# Parse the logs fields section to extract the logs.

194

section = elf.get_section_by_name(name=logs_fields_section)

195

if section:

196

logs_base_addr = int(section.header['sh_addr'])

197

logs_size = int(section.header['sh_size'])

198

logs_data = section.data()

199

else:

200

print("Error: {} section not found in {}".format(

201

logs_fields_section, elf_file))

202

sys.exit(1)

203

204

# Dump the logs with fields.

205

result = ""

206

num_logs = logs_size // LOGS_FIELDS_SIZE

207

for i in range(num_logs):

208

start = i * LOGS_FIELDS_SIZE

209

end = start + LOGS_FIELDS_SIZE

210

severity, file_addr, line, nargs, format_addr = struct.unpack(

211

'IIIII', logs_data[start:end])

212

result += "addr: {}\n".format(hex(logs_base_addr + start)[2:])

213

result += "severity: {}\n".format(severity)

214

result += "file: {}\n".format(

215

prune_filename(get_str_at_addr(file_addr, addr_strings)))

216

result += "line: {}\n".format(line)

217

result += "nargs: {}\n".format(nargs)

Srikrishna Iyer

2020-04-17 19:51:04 -0700

[diff] [blame]

218

fmt = cleanup_format(get_str_at_addr(format_addr, addr_strings))

219

result += "format: {}\n".format(fmt)

220

result += "str_arg_idx: {}\n".format(

221

get_string_format_specifier_indices(fmt))

Srikrishna Iyer

2020-03-09 11:03:38 -0700

[diff] [blame]

222

223

return rodata, result

def main():

parser = argparse.ArgumentParser()

228

parser.add_argument('--elf-file', '-e', required=True, help="Elf file")

229

parser.add_argument('--logs-fields-section',

230

'-f',

231

default=LOGS_FIELDS_SECTION,

232

help="Elf section where log fields are written.")

233

parser.add_argument('--rodata-sections',

234

'-r',

Srikrishna Iyer

2020-03-09 11:03:38 -0700

[diff] [blame]

235

nargs="+",

Srikrishna Iyer

2020-11-20 23:27:10 -0800

[diff] [blame]

236

action="append",

Srikrishna Iyer

2020-03-09 11:03:38 -0700

[diff] [blame]

237

help="Elf sections with rodata.")

238

parser.add_argument('--name',

239

'-n',

240

required=True,

241

help="Type of the SW elf being processed.")

242

parser.add_argument('--outdir',

243

'-o',

244

required=True,

245

help="Output directory.")

246

args = parser.parse_args()

247

Srikrishna Iyer

2020-11-20 23:27:10 -0800

[diff] [blame]

248

if args.rodata_sections is None:

249

ro_sections = [RODATA_SECTION]

250

else:

251

# TODO: We want the `--rodata-sections` arg to have the 'extend' action

252

# which is only available in Python 3.8. To maintain compatibility with

253

# Python 3.6 (which is the minimum required version for OpenTitan), we

254

# flatten the list here instead.

255

ro_sections = list(

256

set([section for lst in args.rodata_sections for section in lst]))

257

Srikrishna Iyer

2020-03-09 11:03:38 -0700

[diff] [blame]

258

os.makedirs(args.outdir, exist_ok=True)

259

rodata, result = extract_sw_logs(args.elf_file, args.logs_fields_section,

Srikrishna Iyer

2020-11-20 23:27:10 -0800

[diff] [blame]

260

ro_sections)

Srikrishna Iyer

2020-03-09 11:03:38 -0700

[diff] [blame]

261

Srikrishna Iyer

2020-11-20 23:27:10 -0800

[diff] [blame]

262

outfile = os.path.join(args.outdir, args.name + ".rodata.txt")

Srikrishna Iyer

2020-03-09 11:03:38 -0700

[diff] [blame]

263

with open(outfile, "w", encoding='utf-8') as f:

264

f.write(rodata.strip())

265

Srikrishna Iyer

2020-11-20 23:27:10 -0800

[diff] [blame]

266

outfile = os.path.join(args.outdir, args.name + ".logs.txt")

Srikrishna Iyer