blob: 10be8e15a6e22408fd9c2bcd9a32471641e16bc8 [file] [log] [blame]
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
from collections import Counter
import typing
from typing import Dict, List, Optional, Tuple
from elftools.dwarf.dwarfinfo import DWARFInfo # type: ignore
from elftools.elf.elffile import ELFFile # type: ignore
from elftools.elf.sections import SymbolTableSection # type: ignore
from tabulate import tabulate
from .insn import BEQ, BNE, ECALL, JAL, JALR, LOOP, LOOPI
from .isa import OTBNInsn
from .state import OTBNState
class ExecutionStats:
def __init__(self, program: List[OTBNInsn]) -> None:
# Executed program (the contents of the instruction memory).
self.program = program
self.stall_count = 0
self.insn_histo = Counter() # type: typing.Counter[str]
self.func_calls = [] # type: List[Dict[str, int]]
self.loops = [] # type: List[Dict[str, int]]
# Histogram indexed by the length of the (extended) basic block.
self.basic_block_histo = Counter() # type: typing.Counter[int]
self.ext_basic_block_histo = Counter() # type: typing.Counter[int]
self._current_basic_block_len = 0
self._current_ext_basic_block_len = 0
def get_insn_count(self) -> int:
'''Get the number of executed instructions.'''
return sum(self.insn_histo.values())
def record_stall(self) -> None:
'''Record a single stall cycle.'''
self.stall_count += 1
def _insn_at_addr(self, addr: int) -> Optional[OTBNInsn]:
'''Get the instruction at a given address.'''
assert addr % 4 == 0
assert addr >= 0
word_addr = addr >> 2
return self.program[word_addr]
def record_insn(self,
insn: OTBNInsn,
state_bc: OTBNState) -> None:
'''Record the execution of an instruction.
insn is the currently executed instruction. state_bc is the state of
OTBN before the instruction is committed.
'''
pc = state_bc.pc
is_jump = isinstance(insn, JAL) or isinstance(insn, JALR)
is_branch = isinstance(insn, BEQ) or isinstance(insn, BNE)
# Instruction histogram
self.insn_histo[insn.insn.mnemonic] += 1
# Function calls
# - Direct function calls: jal x1, <offset>
# - Indirect function calls: jalr x1, <grs1>, 0
if is_jump and insn.grd == 1: # type: ignore
call_stack = state_bc.peek_call_stack()
if call_stack:
caller_func = call_stack[0]
else:
caller_func = state_bc.get_start_addr()
self.func_calls.append({
'call_site': pc,
'caller_func': caller_func,
'callee_func': state_bc.get_next_pc(),
})
# Loops
if isinstance(insn, LOOP) or isinstance(insn, LOOPI):
assert state_bc.in_loop()
iterations = state_bc.loop_stack.stack[-1].loop_count
self.loops.append({
'loop_addr': pc,
'loop_len': insn.bodysize,
'iterations': iterations,
})
is_last_insn_in_loop_body = state_bc.loop_stack.is_last_insn_in_loop_body(pc)
# Basic blocks
# A basic block is a linear sequence of code ending with an instruction
# that can potentially change the control flow: a jump, a branch, the
# last instruction in a loop (LOOP or LOOPI) body, or an ECALL. The
# length of the basic block equals the number of instructions within the
# basic block.
self._current_basic_block_len += 1
if (is_jump or is_branch or is_last_insn_in_loop_body or
isinstance(insn, ECALL)):
self.basic_block_histo[self._current_basic_block_len] += 1
self._current_basic_block_len = 0
# Extended basic blocks
# An extended basic block is a sequence of one or more basic blocks
# which can be statically determined at compile time.
# Extended basic blocks end with a branch, the last instruction in a
# LOOP body, or an ECALL instruction.
# Determine if the current instruction is the last instruction in a LOOP
# body (only LOOP, not LOOPI!).
finishing_loop = False
if is_last_insn_in_loop_body:
loop_insn_addr = state_bc.loop_stack.stack[-1].get_loop_insn_addr()
finishing_loop = isinstance(self._insn_at_addr(loop_insn_addr), LOOP)
self._current_ext_basic_block_len += 1
if is_branch or finishing_loop or isinstance(insn, ECALL):
self.ext_basic_block_histo[self._current_ext_basic_block_len] += 1
self._current_ext_basic_block_len = 0
def _dwarf_decode_file_line(dwarf_info: DWARFInfo,
address: int) -> Optional[Tuple[str, int]]:
# Go over all the line programs in the DWARF information, looking for
# one that describes the given address.
for CU in dwarf_info.iter_CUs():
# First, look at line programs to find the file/line for the address
lineprog = dwarf_info.line_program_for_CU(CU)
prevstate = None
for entry in lineprog.get_entries():
# We're interested in those entries where a new state is assigned
if entry.state is None:
continue
if entry.state.end_sequence:
# if the line number sequence ends, clear prevstate.
prevstate = None
continue
# Looking for a range of addresses in two consecutive states that
# contain the required address.
if prevstate and prevstate.address <= address < entry.state.address:
filename = lineprog['file_entry'][prevstate.file - 1].name.decode('utf-8')
line = prevstate.line
return filename, line
prevstate = entry.state
return None
def _get_addr_symbol_map(elf_file: ELFFile) -> Dict[int, str]:
section = elf_file.get_section_by_name('.symtab')
if not isinstance(section, SymbolTableSection):
return {}
return {sym.entry.st_value: sym.name for sym in section.iter_symbols()}
class ExecutionStatAnalyzer:
# Assumed clock frequency of OTBN, in MHz.
FREQ_MHZ = 100
def __init__(self, stats: ExecutionStats, elf_file_path: str):
self._elf_file = ELFFile(open(elf_file_path, 'rb'))
self._stats = stats
self._addr_symbol_map = _get_addr_symbol_map(self._elf_file)
def _describe_imem_addr(self, address: int) -> str:
symbol_name = None
if address in self._addr_symbol_map:
symbol_name = self._addr_symbol_map[address]
else:
# |func_addr| is the largest possible |sym_addr| which is at most
# |address|.
func_addr = 0
for sym_addr in self._addr_symbol_map.keys():
if sym_addr <= address and sym_addr > func_addr:
func_addr = sym_addr
symbol_name = self._addr_symbol_map[func_addr] + f"+{address - func_addr:#x}"
file_line = None
if self._elf_file.has_dwarf_info():
dwarf_info = self._elf_file.get_dwarf_info()
file_line = _dwarf_decode_file_line(dwarf_info, address)
add_info = []
if symbol_name:
add_info.append(f"{symbol_name}")
if file_line:
add_info.append(f"at {file_line[0]}:{file_line[1]}")
str = f"{address:#x}"
if add_info:
str += ' (' + ' '.join(add_info) + ')'
return str
def dump(self) -> str:
out = ""
out += "\n"
out += "Execution time\n"
out += "--------------\n"
out += self._dump_execution_time()
out += "\n\n"
out += "Instruction frequencies\n"
out += "-----------------------\n"
out += self._dump_insn_histo()
out += "\n\n"
out += "Basic block statistics\n"
out += "----------------------\n"
out += self._dump_basic_block_stats()
out += "\n\n"
out += "Function call statistics\n"
out += "------------------------\n"
out += self._dump_function_call_stats()
out += "\n\n"
out += "Loop statistics\n"
out += "---------------\n"
out += self._dump_loop_stats()
out += "\n"
return out
def _dump_execution_time(self) -> str:
insn_count = self._stats.get_insn_count()
stall_count = self._stats.stall_count
stall_percent = stall_count / insn_count * 100
cycles = insn_count + stall_count
time_ms = cycles / (self.FREQ_MHZ * 1e6) * 1e3
out = f"OTBN executed {insn_count} instructions in {cycles} cycles.\n"
out += f"The execution stalled for {stall_count} cycles "
out += f"({stall_percent:.01f} percent).\n"
out += f"The execution would take {time_ms:.02f} ms "
out += f"@ {self.FREQ_MHZ} MHz.\n"
return out
def _dump_insn_histo(self) -> str:
return tabulate(self._stats.insn_histo.most_common(),
headers=['instruction', 'count']) + "\n"
def _dump_basic_block_stats(self) -> str:
out = []
out += ["", "Number of instructions within a basic block", ""]
out += [tabulate(sorted(self._stats.basic_block_histo.items()),
headers=['# of instr.', 'frequency'])]
out += ['', '']
out += ["Number of instructions within an extended basic block", ""]
out.append(tabulate(sorted(self._stats.ext_basic_block_histo.items()),
headers=['# of instr.', 'frequency']))
return '\n'.join(out)
def _dump_function_call_stats(self) -> str:
'''Dump function call statistics'''
if not self._stats.func_calls:
return "No functions were called.\n"
out = ""
# TODO: Add the start address as function?
# Build function call graphs and a call site index
# caller-indexed == forward, callee-indexed == reverse
# The call graphs are on function granularity; the call sites dictionary
# is indexed by the called function, but uses the call site as value.
callgraph = {} # type: Dict[int, typing.Counter[int]]
rev_callgraph = {} # type: Dict[int, typing.Counter[int]]
rev_callsites = {} # type: Dict[int, typing.Counter[int]]
for c in self._stats.func_calls:
if c['caller_func'] not in callgraph:
callgraph[c['caller_func']] = Counter()
callgraph[c['caller_func']][c['callee_func']] += 1
if c['callee_func'] not in rev_callgraph:
rev_callgraph[c['callee_func']] = Counter()
rev_callgraph[c['callee_func']][c['caller_func']] += 1
if c['callee_func'] not in rev_callsites:
rev_callsites[c['callee_func']] = Counter()
rev_callsites[c['callee_func']][c['call_site']] += 1
total_leaf_calls = 0
total_calls_to_funcs_with_one_callsite = 0
total_func_calls = 0
for rev_callee_func, rev_caller_funcs in rev_callgraph.items():
has_one_callsite = False
func = self._describe_imem_addr(rev_callee_func)
out += f"Function {func}\n"
out += " is called from the following functions\n"
for rev_caller_func, cnt in rev_caller_funcs.most_common():
func = self._describe_imem_addr(rev_caller_func)
out += f" * {cnt} times by function {func}\n"
out += " from the following call sites\n"
for rev_callsite, cnt in rev_callsites[rev_callee_func].most_common():
func = self._describe_imem_addr(rev_callsite)
out += f" * {cnt} times from {func}\n"
has_one_callsite = len(rev_callsites[rev_callee_func]) == 1
out += " calls\n"
if rev_callee_func not in callgraph:
out += " no other function (leaf function).\n"
if not has_one_callsite:
# We don't count it as leaf function call if it has only one
# call site to prevent double-counting these as optimization
# opportunity.
total_leaf_calls += sum(rev_caller_funcs.values())
else:
caller_funcs = callgraph[rev_callee_func]
for caller_func, cnt in caller_funcs.most_common():
func = self._describe_imem_addr(caller_func)
out += f" * {cnt} times function {func}\n"
out += "\n"
if has_one_callsite:
total_calls_to_funcs_with_one_callsite += (
rev_caller_funcs.most_common()[0][1])
total_func_calls += sum(rev_caller_funcs.values())
out += "\n"
# Function call statistics
total_calls_req_call = (total_func_calls - total_leaf_calls -
total_calls_to_funcs_with_one_callsite)
out += f"Of a total of {total_func_calls} function calls, there were\n"
out += (f" {total_calls_to_funcs_with_one_callsite} function calls "
f"to a function with only one call site (call/ret can be "
f"replaced with static jumps)\n")
out += (f" {total_leaf_calls} leaf function calls "
f"(no function prologue/epilogue needed)\n")
out += (f"Overall, {total_calls_req_call} of {total_func_calls} "
f"({(total_calls_req_call / total_func_calls * 100):.02f} "
f"percent) calls need full function call semantics.\n")
return out
def _dump_loop_stats(self) -> str:
loops = self._stats.loops
loop_cnt = len(loops)
out = f"Loops: {loop_cnt}\n"
if loop_cnt != 0:
loop_len_values = [loop['loop_len'] for loop in loops]
loop_len_min = min(loop_len_values)
loop_len_max = max(loop_len_values)
loop_len_avg = sum(loop_len_values) / loop_cnt
loop_iterations_values = [loop['iterations'] for loop in loops]
loop_iterations_min = min(loop_iterations_values)
loop_iterations_max = max(loop_iterations_values)
loop_iterations_avg = sum(loop_iterations_values) / loop_cnt
out += "Loop body length (instructions): "
out += f"min: {loop_len_min}, max: {loop_len_max}, "
out += f"avg: {loop_len_avg:.02f}\n"
out += f"Number of iterations: min: {loop_iterations_min}, "
out += f"max: {loop_iterations_max}, "
out += f"avg: {loop_iterations_avg:.02f}\n"
return out