[otbnsim] Collect execution statistics from ISS
The ISS now has the ability to connect statistics during the program
execution and to analyze those statistics. The analysis includes:
* Execution statistics
* Instruction histograms
* Function call statistics
* Loop statistics
This commit also contains a tiny test to ensure that the statistics we
collect match expectations.
Signed-off-by: Philipp Wagner <phw@lowrisc.org>
diff --git a/hw/ip/otbn/dv/otbnsim/sim/sim.py b/hw/ip/otbn/dv/otbnsim/sim/sim.py
index 5383443..4af4012 100644
--- a/hw/ip/otbn/dv/otbnsim/sim/sim.py
+++ b/hw/ip/otbn/dv/otbnsim/sim/sim.py
@@ -6,6 +6,7 @@
from .isa import OTBNInsn
from .state import OTBNState
+from .stats import ExecutionStats
from .trace import Trace
_TEST_RND_DATA = \
@@ -16,6 +17,7 @@
def __init__(self) -> None:
self.state = OTBNState()
self.program = [] # type: List[OTBNInsn]
+ self.stats = ExecutionStats()
def load_program(self, program: List[OTBNInsn]) -> None:
self.program = program.copy()
@@ -23,7 +25,7 @@
def load_data(self, data: bytes) -> None:
self.state.dmem.load_le_words(data)
- def run(self, verbose: bool) -> int:
+ def run(self, verbose: bool, collect_stats: bool) -> int:
'''Run until ECALL.
Return the number of cycles taken.
@@ -34,7 +36,7 @@
# valid when in free running mode as nothing else will.
self.state.set_urnd_reseed_complete()
while self.state.running:
- self.step(verbose)
+ self.step(verbose, collect_stats)
insn_count += 1
if self.state.wsrs.RND.pending_request:
@@ -44,7 +46,9 @@
return insn_count
- def step(self, verbose: bool) -> Tuple[Optional[OTBNInsn], List[Trace]]:
+ def step(self,
+ verbose: bool,
+ collect_stats: bool) -> Tuple[Optional[OTBNInsn], List[Trace]]:
'''Run a single instruction.
Returns the instruction, together with a list of the architectural
@@ -78,11 +82,17 @@
self.state.commit(sim_stalled=True)
disasm = '(stall)'
changes = []
+
+ if collect_stats:
+ self.stats.record_stall()
else:
self.state.pre_insn(insn.affects_control)
insn.execute(self.state)
self.state.post_insn()
+ if collect_stats:
+ self.stats.record_insn(pc_before, insn, self.state)
+
if self.state.pending_halt:
# Roll back any pending state changes, ensuring that a faulting
# instruction doesn't actually do anything (this also aborts an
diff --git a/hw/ip/otbn/dv/otbnsim/sim/state.py b/hw/ip/otbn/dv/otbnsim/sim/state.py
index db8effe..325991f 100644
--- a/hw/ip/otbn/dv/otbnsim/sim/state.py
+++ b/hw/ip/otbn/dv/otbnsim/sim/state.py
@@ -29,6 +29,8 @@
self.pc = 0
self.pc_next = None # type: Optional[int]
+ self.start_addr = None # type: Optional[int]
+
_, imem_size = get_memory_layout()['IMEM']
self.imem_size = imem_size
@@ -147,6 +149,7 @@
self._urnd_reseed_complete = False
self.pc = addr
+ self.start_addr = addr
# Reset CSRs, WSRs and loop stack
self.csrs = CSRFile()
diff --git a/hw/ip/otbn/dv/otbnsim/sim/stats.py b/hw/ip/otbn/dv/otbnsim/sim/stats.py
new file mode 100644
index 0000000..4481ff3
--- /dev/null
+++ b/hw/ip/otbn/dv/otbnsim/sim/stats.py
@@ -0,0 +1,298 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+from collections import Counter
+import typing
+from typing import Dict, List, Optional, Tuple
+
+from elftools.dwarf.dwarfinfo import DWARFInfo # type: ignore
+from elftools.elf.elffile import ELFFile # type: ignore
+from elftools.elf.sections import SymbolTableSection # type: ignore
+from tabulate import tabulate
+
+from .insn import JAL, JALR, LOOP, LOOPI
+from .isa import OTBNInsn
+from .state import OTBNState
+
+
+class ExecutionStats:
+ def __init__(self) -> None:
+ self.stall_count = 0
+ self.insn_histo = Counter() # type: typing.Counter[str]
+ self.func_calls = [] # type: List[Dict[str, int]]
+ self.loops = [] # type: List[Dict[str, int]]
+
+ @property
+ def insn_count(self) -> int:
+ ''' Get the number of executed instructions. '''
+ return sum(self.insn_histo.values())
+
+ def record_stall(self) -> None:
+ ''' Record a single stall cycle. '''
+ self.stall_count += 1
+
+ def record_insn(self,
+ pc: int,
+ insn: OTBNInsn,
+ state_before_commit: OTBNState) -> None:
+ ''' Record the execution of an instruction. '''
+
+ # Instruction histogram
+ self.insn_histo[insn.insn.mnemonic] += 1
+
+ # Function calls
+ # - Direct function calls: jal x1, <offset>
+ # - Indirect function calls: jalr x1, <grs1>, 0
+ if (isinstance(insn, JAL) or isinstance(insn, JALR)) and insn.grd == 1:
+ call_stack = state_before_commit.peek_call_stack()
+ if call_stack:
+ caller_func = call_stack[0]
+ else:
+ assert state_before_commit.start_addr is not None
+ caller_func = state_before_commit.start_addr
+
+ assert state_before_commit.pc_next is not None
+ self.func_calls.append({
+ 'call_site': pc,
+ 'caller_func': caller_func,
+ 'callee_func': state_before_commit.pc_next,
+ })
+
+ # Loops
+ if isinstance(insn, LOOP) or isinstance(insn, LOOPI):
+ iterations = state_before_commit.loop_stack.stack[-1].loop_count
+ self.loops.append({
+ 'loop_addr': pc,
+ 'loop_len': insn.bodysize,
+ 'iterations': iterations,
+ })
+
+
+def _dwarf_decode_file_line(dwarf_info: DWARFInfo,
+ address: int) -> Optional[Tuple[str, int]]:
+ # Go over all the line programs in the DWARF information, looking for
+ # one that describes the given address.
+ for CU in dwarf_info.iter_CUs():
+ # First, look at line programs to find the file/line for the address
+ lineprog = dwarf_info.line_program_for_CU(CU)
+ prevstate = None
+ for entry in lineprog.get_entries():
+ # We're interested in those entries where a new state is assigned
+ if entry.state is None:
+ continue
+ if entry.state.end_sequence:
+ # if the line number sequence ends, clear prevstate.
+ prevstate = None
+ continue
+ # Looking for a range of addresses in two consecutive states that
+ # contain the required address.
+ if prevstate and prevstate.address <= address < entry.state.address:
+ filename = lineprog['file_entry'][prevstate.file - 1].name.decode('utf-8')
+ line = prevstate.line
+ return filename, line
+ prevstate = entry.state
+ return None
+
+
+def _get_addr_symbol_map(elf_file: ELFFile) -> Dict[int, str]:
+ section = elf_file.get_section_by_name('.symtab')
+
+ if not isinstance(section, SymbolTableSection):
+ return {}
+
+ return {sym.entry.st_value: sym.name for sym in section.iter_symbols()}
+
+
+class ExecutionStatAnalyzer:
+ # Assumed clock frequency of OTBN, in MHz.
+ FREQ_MHZ = 100
+
+ def __init__(self, stats: ExecutionStats, elf_file_path: str):
+ self._elf_file = ELFFile(open(elf_file_path, 'rb'))
+ self._stats = stats
+ self._addr_symbol_map = _get_addr_symbol_map(self._elf_file)
+
+ def _describe_imem_addr(self, address: int) -> str:
+ symbol_name = None
+ if address in self._addr_symbol_map:
+ symbol_name = self._addr_symbol_map[address]
+ else:
+ # |func_addr| is the largest possible |sym_addr| which is at most
+ # |address|.
+ func_addr = 0
+ for sym_addr in self._addr_symbol_map.keys():
+ if sym_addr <= address and sym_addr > func_addr:
+ func_addr = sym_addr
+ symbol_name = self._addr_symbol_map[func_addr] + f"+{address - func_addr:#x}"
+
+ file_line = None
+ if self._elf_file.has_dwarf_info():
+ dwarf_info = self._elf_file.get_dwarf_info()
+ file_line = _dwarf_decode_file_line(dwarf_info, address)
+
+ add_info = []
+ if symbol_name:
+ add_info.append(f"{symbol_name}")
+ if file_line:
+ add_info.append(f"at {file_line[0]}:{file_line[1]}")
+
+ str = f"{address:#x}"
+ if add_info:
+ str += ' (' + ' '.join(add_info) + ')'
+ return str
+
+ def dump(self) -> str:
+ out = ""
+ out += "\n"
+ out += "Execution time\n"
+ out += "--------------\n"
+ out += self._dump_execution_time()
+ out += "\n\n"
+ out += "Instruction frequencies\n"
+ out += "-----------------------\n"
+ out += self._dump_insn_histo()
+ out += "\n\n"
+ out += "Function call statistics\n"
+ out += "------------------------\n"
+ out += self._dump_function_call_stats()
+ out += "\n\n"
+ out += "Loop statistics\n"
+ out += "---------------\n"
+ out += self._dump_loop_stats()
+ out += "\n"
+
+ return out
+
+ def _dump_execution_time(self) -> str:
+ insn_count = self._stats.insn_count
+ stall_count = self._stats.stall_count
+ stall_percent = stall_count / insn_count * 100
+ cycles = insn_count + stall_count
+ time_ms = cycles / (self.FREQ_MHZ * 1e6) * 1e3
+
+ out = f"OTBN executed {insn_count} instructions in {cycles} cycles.\n"
+
+ out += f"The execution stalled for {stall_count} cycles "
+ out += f"({stall_percent:.01f} percent).\n"
+
+ out += f"The execution would take {time_ms:.02f} ms "
+ out += f"@ {self.FREQ_MHZ} MHz.\n"
+ return out
+
+ def _dump_insn_histo(self) -> str:
+ return tabulate(self._stats.insn_histo.most_common(),
+ headers=['instruction', 'count']) + "\n"
+
+ def _dump_function_call_stats(self) -> str:
+ """ Dump function call statistics """
+
+ if not self._stats.func_calls:
+ return "No functions were called.\n"
+
+ out = ""
+
+ # TODO: Add the start address as function?
+
+ # Build function call graphs and a call site index
+ # caller-indexed == forward, callee-indexed == reverse
+ # The call graphs are on function granularity; the call sites dictionary
+ # is indexed by the called function, but uses the call site as value.
+ callgraph = {} # type: Dict[int, typing.Counter[int]]
+ rev_callgraph = {} # type: Dict[int, typing.Counter[int]]
+ rev_callsites = {} # type: Dict[int, typing.Counter[int]]
+ for c in self._stats.func_calls:
+ if c['caller_func'] not in callgraph:
+ callgraph[c['caller_func']] = Counter()
+ callgraph[c['caller_func']][c['callee_func']] += 1
+
+ if c['callee_func'] not in rev_callgraph:
+ rev_callgraph[c['callee_func']] = Counter()
+ rev_callgraph[c['callee_func']][c['caller_func']] += 1
+
+ if c['callee_func'] not in rev_callsites:
+ rev_callsites[c['callee_func']] = Counter()
+ rev_callsites[c['callee_func']][c['call_site']] += 1
+
+ total_leaf_calls = 0
+ total_calls_to_funcs_with_one_callsite = 0
+ total_func_calls = 0
+ for rev_callee_func, rev_caller_funcs in rev_callgraph.items():
+ has_one_callsite = False
+ func = self._describe_imem_addr(rev_callee_func)
+ out += f"Function {func}\n"
+ out += " is called from the following functions\n"
+ for rev_caller_func, cnt in rev_caller_funcs.most_common():
+ func = self._describe_imem_addr(rev_caller_func)
+ out += f" * {cnt} times by function {func}\n"
+ out += " from the following call sites\n"
+ for rev_callsite, cnt in rev_callsites[rev_callee_func].most_common():
+ func = self._describe_imem_addr(rev_callsite)
+ out += f" * {cnt} times from {func}\n"
+
+ has_one_callsite = len(rev_callsites[rev_callee_func]) == 1
+
+ out += " calls\n"
+ if rev_callee_func not in callgraph:
+ out += " no other function (leaf function).\n"
+
+ if not has_one_callsite:
+ # We don't count it as leaf function call if it has only one
+ # call site to prevent double-counting these as optimization
+ # opportunity.
+ total_leaf_calls += sum(rev_caller_funcs.values())
+ else:
+ caller_funcs = callgraph[rev_callee_func]
+ for caller_func, cnt in caller_funcs.most_common():
+ func = self._describe_imem_addr(caller_func)
+ out += f" * {cnt} times function {func}\n"
+ out += "\n"
+
+ if has_one_callsite:
+ total_calls_to_funcs_with_one_callsite += (
+ rev_caller_funcs.most_common()[0][1])
+
+ total_func_calls += sum(rev_caller_funcs.values())
+ out += "\n"
+
+ # Function call statistics
+ total_calls_req_call = (total_func_calls - total_leaf_calls -
+ total_calls_to_funcs_with_one_callsite)
+ out += f"Of a total of {total_func_calls} function calls, there were\n"
+ out += (f" {total_calls_to_funcs_with_one_callsite} function calls "
+ f"to a function with only one call site (call/ret can be "
+ f"replaced with static jumps)\n")
+ out += (f" {total_leaf_calls} leaf function calls "
+ f"(no function prologue/epilogue needed)\n")
+ out += (f"Overall, {total_calls_req_call} of {total_func_calls} "
+ f"({(total_calls_req_call / total_func_calls * 100):.02f} "
+ f"percent) need full function call semantics.\n")
+
+ return out
+
+ def _dump_loop_stats(self) -> str:
+ loops = self._stats.loops
+ loop_cnt = len(loops)
+
+ out = f"Loops: {loop_cnt}\n"
+
+ if loop_cnt != 0:
+ loop_len_values = [loop['loop_len'] for loop in loops]
+ loop_len_min = min(loop_len_values)
+ loop_len_max = max(loop_len_values)
+ loop_len_avg = sum(loop_len_values) / loop_cnt
+
+ loop_iterations_values = [loop['iterations'] for loop in loops]
+ loop_iterations_min = min(loop_iterations_values)
+ loop_iterations_max = max(loop_iterations_values)
+ loop_iterations_avg = sum(loop_iterations_values) / loop_cnt
+
+ out += "Loop body length (instructions): "
+ out += f"min: {loop_len_min}, max: {loop_len_max}, "
+ out += f"avg: {loop_len_avg:.02f}\n"
+
+ out += f"Number of iterations: min: {loop_iterations_min}, "
+ out += f"max: {loop_iterations_max}, "
+ out += f"avg: {loop_iterations_avg:.02f}\n"
+
+ return out
diff --git a/hw/ip/otbn/dv/otbnsim/standalone.py b/hw/ip/otbn/dv/otbnsim/standalone.py
index 88482d9..834a457 100755
--- a/hw/ip/otbn/dv/otbnsim/standalone.py
+++ b/hw/ip/otbn/dv/otbnsim/standalone.py
@@ -8,6 +8,7 @@
from sim.elf import load_elf
from sim.sim import OTBNSim
+from sim.stats import ExecutionStatAnalyzer
def main() -> int:
@@ -28,14 +29,23 @@
help=("after execution, write the GPR and WDR contents to this file. "
"Use '-' to write to STDOUT.")
)
+ parser.add_argument(
+ '--dump-stats',
+ metavar="FILE",
+ type=argparse.FileType('w'),
+ help=("after execution, write execution statistics to this file. "
+ "Use '-' to write to STDOUT.")
+ )
args = parser.parse_args()
+ collect_stats = args.dump_stats is not None
+
sim = OTBNSim()
load_elf(sim, args.elf)
sim.state.start(0)
- sim.run(verbose=args.verbose)
+ sim.run(verbose=args.verbose, collect_stats=collect_stats)
if args.dump_dmem is not None:
args.dump_dmem.write(sim.dump_data())
@@ -46,6 +56,10 @@
for idx, value in enumerate(sim.state.wdrs.peek_unsigned_values()):
args.dump_regs.write(' w{:<2} = 0x{:064x}\n'.format(idx, value))
+ if collect_stats:
+ stat_analyzer = ExecutionStatAnalyzer(sim.stats, args.elf)
+ args.dump_stats.write(stat_analyzer.dump())
+
return 0
diff --git a/hw/ip/otbn/dv/otbnsim/stepped.py b/hw/ip/otbn/dv/otbnsim/stepped.py
index 44edeb3..fa092b2 100755
--- a/hw/ip/otbn/dv/otbnsim/stepped.py
+++ b/hw/ip/otbn/dv/otbnsim/stepped.py
@@ -89,7 +89,7 @@
pc = sim.state.pc
assert 0 == pc & 3
- insn, changes = sim.step(verbose=False)
+ insn, changes = sim.step(verbose=False, collect_stats=False)
if insn is None:
hdr = 'STALL'
@@ -108,7 +108,7 @@
raise ValueError('run expects zero arguments. Got {}.'
.format(args))
- num_cycles = sim.run(verbose=False)
+ num_cycles = sim.run(verbose=False, collect_stats=False)
print(' ran for {} cycles'.format(num_cycles))
diff --git a/hw/ip/otbn/dv/otbnsim/test/simple/subroutines/direct-call.exp b/hw/ip/otbn/dv/otbnsim/test/simple/subroutines/direct-call.exp
new file mode 100644
index 0000000..0cd9f40
--- /dev/null
+++ b/hw/ip/otbn/dv/otbnsim/test/simple/subroutines/direct-call.exp
@@ -0,0 +1,5 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+x11 = 4
diff --git a/hw/ip/otbn/dv/otbnsim/test/simple/subroutines/direct-call.s b/hw/ip/otbn/dv/otbnsim/test/simple/subroutines/direct-call.s
new file mode 100644
index 0000000..3ad5182
--- /dev/null
+++ b/hw/ip/otbn/dv/otbnsim/test/simple/subroutines/direct-call.s
@@ -0,0 +1,16 @@
+/* Copyright lowRISC contributors. */
+/* Licensed under the Apache License, Version 2.0, see LICENSE for details. */
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/* Call a subroutine directly. */
+
+main:
+ addi x11, x0, 5
+
+ jal x1, subroutine
+
+ ecall
+
+subroutine:
+ addi x11, x11, -1
+ jalr x0, x1, 0
diff --git a/hw/ip/otbn/dv/otbnsim/test/simple/subroutines/indirect-call.exp b/hw/ip/otbn/dv/otbnsim/test/simple/subroutines/indirect-call.exp
new file mode 100644
index 0000000..96898e5
--- /dev/null
+++ b/hw/ip/otbn/dv/otbnsim/test/simple/subroutines/indirect-call.exp
@@ -0,0 +1,6 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+x10 = 0x10
+x11 = 4
diff --git a/hw/ip/otbn/dv/otbnsim/test/simple/subroutines/indirect-call.s b/hw/ip/otbn/dv/otbnsim/test/simple/subroutines/indirect-call.s
new file mode 100644
index 0000000..588bd36
--- /dev/null
+++ b/hw/ip/otbn/dv/otbnsim/test/simple/subroutines/indirect-call.s
@@ -0,0 +1,17 @@
+/* Copyright lowRISC contributors. */
+/* Licensed under the Apache License, Version 2.0, see LICENSE for details. */
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/* Call a subroutine through a function pointer. */
+
+main:
+ addi x11, x0, 5
+
+ addi x10, x0, 0x10
+ jalr x1, x10, 0
+
+ ecall
+
+subroutine:
+ addi x11, x11, -1
+ jalr x0, x1, 0
diff --git a/hw/ip/otbn/dv/otbnsim/test/stats_test.py b/hw/ip/otbn/dv/otbnsim/test/stats_test.py
new file mode 100644
index 0000000..501fdde
--- /dev/null
+++ b/hw/ip/otbn/dv/otbnsim/test/stats_test.py
@@ -0,0 +1,75 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+import py
+import os
+
+from sim.sim import OTBNSim
+from sim.stats import ExecutionStats
+from sim.elf import load_elf
+import testutil
+
+
+def _run_sim_for_stats(asm_file: str, tmpdir: str) -> ExecutionStats:
+ """ Run the OTBN simulator, collect statistics, and return them. """
+
+ assert os.path.exists(asm_file)
+ elf_file = testutil.asm_and_link_one_file(asm_file, tmpdir)
+
+ sim = OTBNSim()
+ load_elf(sim, elf_file)
+
+ sim.state.start(0)
+ sim.run(verbose=False, collect_stats=True)
+
+ return sim.stats
+
+
+def test_general_and_loop(tmpdir: py.path.local) -> None:
+ """ Test the collection of general statistics as well as loop stats. """
+
+ asm_file = os.path.join(os.path.dirname(__file__),
+ 'simple', 'loops', 'loops.s')
+ stats = _run_sim_for_stats(asm_file, str(tmpdir))
+
+ # General statistics
+ assert stats.stall_count == 2
+ assert stats.insn_count == 28
+ assert stats.insn_histo == {'addi': 22, 'loop': 4, 'loopi': 1, 'ecall': 1}
+ assert stats.func_calls == []
+
+ # Loop statistics.
+ exp = [
+ # Outer LOOPI
+ {'iterations': 4, 'loop_addr': 8, 'loop_len': 4},
+
+ # Inner LOOP
+ {'iterations': 3, 'loop_addr': 16, 'loop_len': 1},
+ {'iterations': 3, 'loop_addr': 16, 'loop_len': 1},
+ {'iterations': 3, 'loop_addr': 16, 'loop_len': 1},
+ {'iterations': 3, 'loop_addr': 16, 'loop_len': 1}
+ ]
+ assert stats.loops == exp
+
+
+def test_func_call_direct(tmpdir: py.path.local) -> None:
+ """ Test the collection of statistics related to loops. """
+
+ asm_file = os.path.join(os.path.dirname(__file__),
+ 'simple', 'subroutines', 'direct-call.s')
+ stats = _run_sim_for_stats(asm_file, str(tmpdir))
+
+ exp = [{'call_site': 4, 'callee_func': 12, 'caller_func': 0}]
+ assert stats.func_calls == exp
+
+
+def test_func_call_indirect(tmpdir: py.path.local) -> None:
+ """ Test the collection of statistics related to loops. """
+
+ asm_file = os.path.join(os.path.dirname(__file__),
+ 'simple', 'subroutines', 'indirect-call.s')
+ stats = _run_sim_for_stats(asm_file, str(tmpdir))
+
+ exp = [{'call_site': 8, 'callee_func': 16, 'caller_func': 0}]
+ assert stats.func_calls == exp