hw/ip/otbn/dv/otbnsim/sim/stats.py - 3p/lowrisc/opentitan - Git at Google

 # Copyright lowRISC contributors.
 # Licensed under the Apache License, Version 2.0, see LICENSE for details.
 # SPDX-License-Identifier: Apache-2.0

 from collections import Counter
 import typing
 from typing import Dict, List, Optional, Tuple

 from elftools.dwarf.dwarfinfo import DWARFInfo  # type: ignore
 from elftools.elf.elffile import ELFFile  # type: ignore
 from elftools.elf.sections import SymbolTableSection  # type: ignore
 from tabulate import tabulate

 from .insn import BEQ, BNE, ECALL, JAL, JALR, LOOP, LOOPI
 from .isa import OTBNInsn
 from .state import OTBNState


 class ExecutionStats:
     def __init__(self, program: List[OTBNInsn]) -> None:
         # Executed program (the contents of the instruction memory).
         self.program = program

         self.stall_count = 0
         self.insn_histo = Counter()  # type: typing.Counter[str]
         self.func_calls = []  # type: List[Dict[str, int]]
         self.loops = []  # type: List[Dict[str, int]]

         # Histogram indexed by the length of the (extended) basic block.
         self.basic_block_histo = Counter()  # type: typing.Counter[int]
         self.ext_basic_block_histo = Counter()  # type: typing.Counter[int]

         self._current_basic_block_len = 0
         self._current_ext_basic_block_len = 0

     def get_insn_count(self) -> int:
         '''Get the number of executed instructions.'''
         return sum(self.insn_histo.values())

     def record_stall(self) -> None:
         '''Record a single stall cycle.'''
         self.stall_count += 1

     def _insn_at_addr(self, addr: int) -> Optional[OTBNInsn]:
         '''Get the instruction at a given address.'''
         assert addr % 4 == 0
         assert addr >= 0
         word_addr = addr >> 2
         return self.program[word_addr]

     def record_insn(self,
                     insn: OTBNInsn,
                     state_bc: OTBNState) -> None:
         '''Record the execution of an instruction.

         insn is the currently executed instruction. state_bc is the state of
         OTBN before the instruction is committed.

         '''
         pc = state_bc.pc

         is_jump = isinstance(insn, JAL) or isinstance(insn, JALR)
         is_branch = isinstance(insn, BEQ) or isinstance(insn, BNE)

         # Instruction histogram
         self.insn_histo[insn.insn.mnemonic] += 1

         # Function calls
         # - Direct function calls: jal x1, <offset>
         # - Indirect function calls: jalr x1, <grs1>, 0
         if is_jump and insn.grd == 1:  # type: ignore
             call_stack = state_bc.peek_call_stack()
             if call_stack:
                 caller_func = call_stack[0]
             else:
                 caller_func = state_bc.get_start_addr()

             self.func_calls.append({
                 'call_site': pc,
                 'caller_func': caller_func,
                 'callee_func': state_bc.get_next_pc(),
             })

         # Loops
         if isinstance(insn, LOOP) or isinstance(insn, LOOPI):
             assert state_bc.in_loop()
             iterations = state_bc.loop_stack.stack[-1].loop_count
             self.loops.append({
                 'loop_addr': pc,
                 'loop_len': insn.bodysize,
                 'iterations': iterations,
             })

         is_last_insn_in_loop_body = state_bc.loop_stack.is_last_insn_in_loop_body(pc)

         # Basic blocks
         # A basic block is a linear sequence of code ending with an instruction
         # that can potentially change the control flow: a jump, a branch, the
         # last instruction in a loop (LOOP or LOOPI) body, or an ECALL. The
         # length of the basic block equals the number of instructions within the
         # basic block.
         self._current_basic_block_len += 1
         if (is_jump or is_branch or is_last_insn_in_loop_body or
                 isinstance(insn, ECALL)):

             self.basic_block_histo[self._current_basic_block_len] += 1
             self._current_basic_block_len = 0

         # Extended basic blocks
         # An extended basic block is a sequence of one or more basic blocks
         # which can be statically determined at compile time.
         # Extended basic blocks end with a branch, the last instruction in a
         # LOOP body, or an ECALL instruction.

         # Determine if the current instruction is the last instruction in a LOOP
         # body (only LOOP, not LOOPI!).
         finishing_loop = False
         if is_last_insn_in_loop_body:
             loop_insn_addr = state_bc.loop_stack.stack[-1].get_loop_insn_addr()
             finishing_loop = isinstance(self._insn_at_addr(loop_insn_addr), LOOP)

         self._current_ext_basic_block_len += 1
         if is_branch or finishing_loop or isinstance(insn, ECALL):
             self.ext_basic_block_histo[self._current_ext_basic_block_len] += 1
             self._current_ext_basic_block_len = 0


 def _dwarf_decode_file_line(dwarf_info: DWARFInfo,
                             address: int) -> Optional[Tuple[str, int]]:
     # Go over all the line programs in the DWARF information, looking for
     # one that describes the given address.
     for CU in dwarf_info.iter_CUs():
         # First, look at line programs to find the file/line for the address
         lineprog = dwarf_info.line_program_for_CU(CU)
         prevstate = None
         for entry in lineprog.get_entries():
             # We're interested in those entries where a new state is assigned
             if entry.state is None:
                 continue
             if entry.state.end_sequence:
                 # if the line number sequence ends, clear prevstate.
                 prevstate = None
                 continue
             # Looking for a range of addresses in two consecutive states that
             # contain the required address.
             if prevstate and prevstate.address <= address < entry.state.address:
                 filename = lineprog['file_entry'][prevstate.file - 1].name.decode('utf-8')
                 line = prevstate.line
                 return filename, line
             prevstate = entry.state
     return None


 def _get_addr_symbol_map(elf_file: ELFFile) -> Dict[int, str]:
     section = elf_file.get_section_by_name('.symtab')

     if not isinstance(section, SymbolTableSection):
         return {}

     return {sym.entry.st_value: sym.name for sym in section.iter_symbols()}


 class ExecutionStatAnalyzer:
     # Assumed clock frequency of OTBN, in MHz.
     FREQ_MHZ = 100

     def __init__(self, stats: ExecutionStats, elf_file_path: str):
         self._elf_file = ELFFile(open(elf_file_path, 'rb'))
         self._stats = stats
         self._addr_symbol_map = _get_addr_symbol_map(self._elf_file)

     def _describe_imem_addr(self, address: int) -> str:
         symbol_name = None
         if address in self._addr_symbol_map:
             symbol_name = self._addr_symbol_map[address]
         else:
             # |func_addr| is the largest possible |sym_addr| which is at most
             # |address|.
             func_addr = 0
             for sym_addr in self._addr_symbol_map.keys():
                 if sym_addr <= address and sym_addr > func_addr:
                     func_addr = sym_addr
             symbol_name = self._addr_symbol_map[func_addr] + f"+{address - func_addr:#x}"

         file_line = None
         if self._elf_file.has_dwarf_info():
             dwarf_info = self._elf_file.get_dwarf_info()
             file_line = _dwarf_decode_file_line(dwarf_info, address)

         add_info = []
         if symbol_name:
             add_info.append(f"{symbol_name}")
         if file_line:
             add_info.append(f"at {file_line[0]}:{file_line[1]}")

         str = f"{address:#x}"
         if add_info:
             str += ' (' + ' '.join(add_info) + ')'
         return str

     def dump(self) -> str:
         out = ""
         out += "\n"
         out += "Execution time\n"
         out += "--------------\n"
         out += self._dump_execution_time()
         out += "\n\n"
         out += "Instruction frequencies\n"
         out += "-----------------------\n"
         out += self._dump_insn_histo()
         out += "\n\n"
         out += "Basic block statistics\n"
         out += "----------------------\n"
         out += self._dump_basic_block_stats()
         out += "\n\n"
         out += "Function call statistics\n"
         out += "------------------------\n"
         out += self._dump_function_call_stats()
         out += "\n\n"
         out += "Loop statistics\n"
         out += "---------------\n"
         out += self._dump_loop_stats()
         out += "\n"

         return out

     def _dump_execution_time(self) -> str:
         insn_count = self._stats.get_insn_count()
         stall_count = self._stats.stall_count
         stall_percent = stall_count / insn_count * 100
         cycles = insn_count + stall_count
         time_ms = cycles / (self.FREQ_MHZ * 1e6) * 1e3

         out = f"OTBN executed {insn_count} instructions in {cycles} cycles.\n"

         out += f"The execution stalled for {stall_count} cycles "
         out += f"({stall_percent:.01f} percent).\n"

         out += f"The execution would take {time_ms:.02f} ms "
         out += f"@ {self.FREQ_MHZ} MHz.\n"
         return out

     def _dump_insn_histo(self) -> str:
         return tabulate(self._stats.insn_histo.most_common(),
                         headers=['instruction', 'count']) + "\n"

     def _dump_basic_block_stats(self) -> str:
         out = []
         out += ["", "Number of instructions within a basic block", ""]
         out += [tabulate(sorted(self._stats.basic_block_histo.items()),
                          headers=['# of instr.', 'frequency'])]
         out += ['', '']
         out += ["Number of instructions within an extended basic block", ""]
         out.append(tabulate(sorted(self._stats.ext_basic_block_histo.items()),
                    headers=['# of instr.', 'frequency']))
         return '\n'.join(out)

     def _dump_function_call_stats(self) -> str:
         '''Dump function call statistics'''

         if not self._stats.func_calls:
             return "No functions were called.\n"

         out = ""

         # TODO: Add the start address as function?

         # Build function call graphs and a call site index
         # caller-indexed == forward, callee-indexed == reverse
         # The call graphs are on function granularity; the call sites dictionary
         # is indexed by the called function, but uses the call site as value.
         callgraph = {}  # type: Dict[int, typing.Counter[int]]
         rev_callgraph = {}  # type: Dict[int, typing.Counter[int]]
         rev_callsites = {}  # type: Dict[int, typing.Counter[int]]
         for c in self._stats.func_calls:
             if c['caller_func'] not in callgraph:
                 callgraph[c['caller_func']] = Counter()
             callgraph[c['caller_func']][c['callee_func']] += 1

             if c['callee_func'] not in rev_callgraph:
                 rev_callgraph[c['callee_func']] = Counter()
             rev_callgraph[c['callee_func']][c['caller_func']] += 1

             if c['callee_func'] not in rev_callsites:
                 rev_callsites[c['callee_func']] = Counter()
             rev_callsites[c['callee_func']][c['call_site']] += 1

         total_leaf_calls = 0
         total_calls_to_funcs_with_one_callsite = 0
         total_func_calls = 0
         for rev_callee_func, rev_caller_funcs in rev_callgraph.items():
             has_one_callsite = False
             func = self._describe_imem_addr(rev_callee_func)
             out += f"Function {func}\n"
             out += "  is called from the following functions\n"
             for rev_caller_func, cnt in rev_caller_funcs.most_common():
                 func = self._describe_imem_addr(rev_caller_func)
                 out += f"    * {cnt} times by function {func}\n"
             out += "  from the following call sites\n"
             for rev_callsite, cnt in rev_callsites[rev_callee_func].most_common():
                 func = self._describe_imem_addr(rev_callsite)
                 out += f"    * {cnt} times from {func}\n"

             has_one_callsite = len(rev_callsites[rev_callee_func]) == 1

             out += "  calls\n"
             if rev_callee_func not in callgraph:
                 out += "    no other function (leaf function).\n"

                 if not has_one_callsite:
                     # We don't count it as leaf function call if it has only one
                     # call site to prevent double-counting these as optimization
                     # opportunity.
                     total_leaf_calls += sum(rev_caller_funcs.values())
             else:
                 caller_funcs = callgraph[rev_callee_func]
                 for caller_func, cnt in caller_funcs.most_common():
                     func = self._describe_imem_addr(caller_func)
                     out += f"    * {cnt} times function {func}\n"
             out += "\n"

             if has_one_callsite:
                 total_calls_to_funcs_with_one_callsite += (
                     rev_caller_funcs.most_common()[0][1])

             total_func_calls += sum(rev_caller_funcs.values())
         out += "\n"

         # Function call statistics
         total_calls_req_call = (total_func_calls - total_leaf_calls -
                                 total_calls_to_funcs_with_one_callsite)
         out += f"Of a total of {total_func_calls} function calls, there were\n"
         out += (f"  {total_calls_to_funcs_with_one_callsite} function calls "
                 f"to a function with only one call site (call/ret can be "
                 f"replaced with static jumps)\n")
         out += (f"  {total_leaf_calls} leaf function calls "
                 f"(no function prologue/epilogue needed)\n")
         out += (f"Overall, {total_calls_req_call} of {total_func_calls} "
                 f"({(total_calls_req_call / total_func_calls * 100):.02f} "
                 f"percent) calls need full function call semantics.\n")

         return out

     def _dump_loop_stats(self) -> str:
         loops = self._stats.loops
         loop_cnt = len(loops)

         out = f"Loops: {loop_cnt}\n"

         if loop_cnt != 0:
             loop_len_values = [loop['loop_len'] for loop in loops]
             loop_len_min = min(loop_len_values)
             loop_len_max = max(loop_len_values)
             loop_len_avg = sum(loop_len_values) / loop_cnt

             loop_iterations_values = [loop['iterations'] for loop in loops]
             loop_iterations_min = min(loop_iterations_values)
             loop_iterations_max = max(loop_iterations_values)
             loop_iterations_avg = sum(loop_iterations_values) / loop_cnt

             out += "Loop body length (instructions): "
             out += f"min: {loop_len_min}, max: {loop_len_max}, "
             out += f"avg: {loop_len_avg:.02f}\n"

             out += f"Number of iterations: min: {loop_iterations_min}, "
             out += f"max: {loop_iterations_max}, "
             out += f"avg: {loop_iterations_avg:.02f}\n"

         return out
	# Copyright lowRISC contributors.
	# Licensed under the Apache License, Version 2.0, see LICENSE for details.
	# SPDX-License-Identifier: Apache-2.0

	from collections import Counter
	import typing
	from typing import Dict, List, Optional, Tuple

	from elftools.dwarf.dwarfinfo import DWARFInfo # type: ignore
	from elftools.elf.elffile import ELFFile # type: ignore
	from elftools.elf.sections import SymbolTableSection # type: ignore
	from tabulate import tabulate

	from .insn import BEQ, BNE, ECALL, JAL, JALR, LOOP, LOOPI
	from .isa import OTBNInsn
	from .state import OTBNState


	class ExecutionStats:
	def __init__(self, program: List[OTBNInsn]) -> None:
	# Executed program (the contents of the instruction memory).
	self.program = program

	self.stall_count = 0
	self.insn_histo = Counter() # type: typing.Counter[str]
	self.func_calls = [] # type: List[Dict[str, int]]
	self.loops = [] # type: List[Dict[str, int]]

	# Histogram indexed by the length of the (extended) basic block.
	self.basic_block_histo = Counter() # type: typing.Counter[int]
	self.ext_basic_block_histo = Counter() # type: typing.Counter[int]

	self._current_basic_block_len = 0
	self._current_ext_basic_block_len = 0

	def get_insn_count(self) -> int:
	'''Get the number of executed instructions.'''
	return sum(self.insn_histo.values())

	def record_stall(self) -> None:
	'''Record a single stall cycle.'''
	self.stall_count += 1

	def _insn_at_addr(self, addr: int) -> Optional[OTBNInsn]:
	'''Get the instruction at a given address.'''
	assert addr % 4 == 0
	assert addr >= 0
	word_addr = addr >> 2
	return self.program[word_addr]

	def record_insn(self,
	insn: OTBNInsn,
	state_bc: OTBNState) -> None:
	'''Record the execution of an instruction.

	insn is the currently executed instruction. state_bc is the state of
	OTBN before the instruction is committed.

	'''
	pc = state_bc.pc

	is_jump = isinstance(insn, JAL) or isinstance(insn, JALR)
	is_branch = isinstance(insn, BEQ) or isinstance(insn, BNE)

	# Instruction histogram
	self.insn_histo[insn.insn.mnemonic] += 1

	# Function calls
	# - Direct function calls: jal x1, <offset>
	# - Indirect function calls: jalr x1, <grs1>, 0
	if is_jump and insn.grd == 1: # type: ignore
	call_stack = state_bc.peek_call_stack()
	if call_stack:
	caller_func = call_stack[0]
	else:
	caller_func = state_bc.get_start_addr()

	self.func_calls.append({
	'call_site': pc,
	'caller_func': caller_func,
	'callee_func': state_bc.get_next_pc(),
	})

	# Loops
	if isinstance(insn, LOOP) or isinstance(insn, LOOPI):
	assert state_bc.in_loop()
	iterations = state_bc.loop_stack.stack[-1].loop_count
	self.loops.append({
	'loop_addr': pc,
	'loop_len': insn.bodysize,
	'iterations': iterations,
	})

	is_last_insn_in_loop_body = state_bc.loop_stack.is_last_insn_in_loop_body(pc)

	# Basic blocks
	# A basic block is a linear sequence of code ending with an instruction
	# that can potentially change the control flow: a jump, a branch, the
	# last instruction in a loop (LOOP or LOOPI) body, or an ECALL. The
	# length of the basic block equals the number of instructions within the
	# basic block.
	self._current_basic_block_len += 1
	if (is_jump or is_branch or is_last_insn_in_loop_body or
	isinstance(insn, ECALL)):

	self.basic_block_histo[self._current_basic_block_len] += 1
	self._current_basic_block_len = 0

	# Extended basic blocks
	# An extended basic block is a sequence of one or more basic blocks
	# which can be statically determined at compile time.
	# Extended basic blocks end with a branch, the last instruction in a
	# LOOP body, or an ECALL instruction.

	# Determine if the current instruction is the last instruction in a LOOP
	# body (only LOOP, not LOOPI!).
	finishing_loop = False
	if is_last_insn_in_loop_body:
	loop_insn_addr = state_bc.loop_stack.stack[-1].get_loop_insn_addr()
	finishing_loop = isinstance(self._insn_at_addr(loop_insn_addr), LOOP)

	self._current_ext_basic_block_len += 1
	if is_branch or finishing_loop or isinstance(insn, ECALL):
	self.ext_basic_block_histo[self._current_ext_basic_block_len] += 1
	self._current_ext_basic_block_len = 0


	def _dwarf_decode_file_line(dwarf_info: DWARFInfo,
	address: int) -> Optional[Tuple[str, int]]:
	# Go over all the line programs in the DWARF information, looking for
	# one that describes the given address.
	for CU in dwarf_info.iter_CUs():
	# First, look at line programs to find the file/line for the address
	lineprog = dwarf_info.line_program_for_CU(CU)
	prevstate = None
	for entry in lineprog.get_entries():
	# We're interested in those entries where a new state is assigned
	if entry.state is None:
	continue
	if entry.state.end_sequence:
	# if the line number sequence ends, clear prevstate.
	prevstate = None
	continue
	# Looking for a range of addresses in two consecutive states that
	# contain the required address.
	if prevstate and prevstate.address <= address < entry.state.address:
	filename = lineprog['file_entry'][prevstate.file - 1].name.decode('utf-8')
	line = prevstate.line
	return filename, line
	prevstate = entry.state
	return None


	def _get_addr_symbol_map(elf_file: ELFFile) -> Dict[int, str]:
	section = elf_file.get_section_by_name('.symtab')

	if not isinstance(section, SymbolTableSection):
	return {}

	return {sym.entry.st_value: sym.name for sym in section.iter_symbols()}


	class ExecutionStatAnalyzer:
	# Assumed clock frequency of OTBN, in MHz.
	FREQ_MHZ = 100

	def __init__(self, stats: ExecutionStats, elf_file_path: str):
	self._elf_file = ELFFile(open(elf_file_path, 'rb'))
	self._stats = stats
	self._addr_symbol_map = _get_addr_symbol_map(self._elf_file)

	def _describe_imem_addr(self, address: int) -> str:
	symbol_name = None
	if address in self._addr_symbol_map:
	symbol_name = self._addr_symbol_map[address]
	else:
	# \|func_addr\| is the largest possible \|sym_addr\| which is at most
	# \|address\|.
	func_addr = 0
	for sym_addr in self._addr_symbol_map.keys():
	if sym_addr <= address and sym_addr > func_addr:
	func_addr = sym_addr
	symbol_name = self._addr_symbol_map[func_addr] + f"+{address - func_addr:#x}"

	file_line = None
	if self._elf_file.has_dwarf_info():
	dwarf_info = self._elf_file.get_dwarf_info()
	file_line = _dwarf_decode_file_line(dwarf_info, address)

	add_info = []
	if symbol_name:
	add_info.append(f"{symbol_name}")
	if file_line:
	add_info.append(f"at {file_line[0]}:{file_line[1]}")

	str = f"{address:#x}"
	if add_info:
	str += ' (' + ' '.join(add_info) + ')'
	return str

	def dump(self) -> str:
	out = ""
	out += "\n"
	out += "Execution time\n"
	out += "--------------\n"
	out += self._dump_execution_time()
	out += "\n\n"
	out += "Instruction frequencies\n"
	out += "-----------------------\n"
	out += self._dump_insn_histo()
	out += "\n\n"
	out += "Basic block statistics\n"
	out += "----------------------\n"
	out += self._dump_basic_block_stats()
	out += "\n\n"
	out += "Function call statistics\n"
	out += "------------------------\n"
	out += self._dump_function_call_stats()
	out += "\n\n"
	out += "Loop statistics\n"
	out += "---------------\n"
	out += self._dump_loop_stats()
	out += "\n"

	return out

	def _dump_execution_time(self) -> str:
	insn_count = self._stats.get_insn_count()
	stall_count = self._stats.stall_count
	stall_percent = stall_count / insn_count * 100
	cycles = insn_count + stall_count
	time_ms = cycles / (self.FREQ_MHZ * 1e6) * 1e3

	out = f"OTBN executed {insn_count} instructions in {cycles} cycles.\n"

	out += f"The execution stalled for {stall_count} cycles "
	out += f"({stall_percent:.01f} percent).\n"

	out += f"The execution would take {time_ms:.02f} ms "
	out += f"@ {self.FREQ_MHZ} MHz.\n"
	return out

	def _dump_insn_histo(self) -> str:
	return tabulate(self._stats.insn_histo.most_common(),
	headers=['instruction', 'count']) + "\n"

	def _dump_basic_block_stats(self) -> str:
	out = []
	out += ["", "Number of instructions within a basic block", ""]
	out += [tabulate(sorted(self._stats.basic_block_histo.items()),
	headers=['# of instr.', 'frequency'])]
	out += ['', '']
	out += ["Number of instructions within an extended basic block", ""]
	out.append(tabulate(sorted(self._stats.ext_basic_block_histo.items()),
	headers=['# of instr.', 'frequency']))
	return '\n'.join(out)

	def _dump_function_call_stats(self) -> str:
	'''Dump function call statistics'''

	if not self._stats.func_calls:
	return "No functions were called.\n"

	out = ""

	# TODO: Add the start address as function?

	# Build function call graphs and a call site index
	# caller-indexed == forward, callee-indexed == reverse
	# The call graphs are on function granularity; the call sites dictionary
	# is indexed by the called function, but uses the call site as value.
	callgraph = {} # type: Dict[int, typing.Counter[int]]
	rev_callgraph = {} # type: Dict[int, typing.Counter[int]]
	rev_callsites = {} # type: Dict[int, typing.Counter[int]]
	for c in self._stats.func_calls:
	if c['caller_func'] not in callgraph:
	callgraph[c['caller_func']] = Counter()
	callgraph[c['caller_func']][c['callee_func']] += 1

	if c['callee_func'] not in rev_callgraph:
	rev_callgraph[c['callee_func']] = Counter()
	rev_callgraph[c['callee_func']][c['caller_func']] += 1

	if c['callee_func'] not in rev_callsites:
	rev_callsites[c['callee_func']] = Counter()
	rev_callsites[c['callee_func']][c['call_site']] += 1

	total_leaf_calls = 0
	total_calls_to_funcs_with_one_callsite = 0
	total_func_calls = 0
	for rev_callee_func, rev_caller_funcs in rev_callgraph.items():
	has_one_callsite = False
	func = self._describe_imem_addr(rev_callee_func)
	out += f"Function {func}\n"
	out += " is called from the following functions\n"
	for rev_caller_func, cnt in rev_caller_funcs.most_common():
	func = self._describe_imem_addr(rev_caller_func)
	out += f" * {cnt} times by function {func}\n"
	out += " from the following call sites\n"
	for rev_callsite, cnt in rev_callsites[rev_callee_func].most_common():
	func = self._describe_imem_addr(rev_callsite)
	out += f" * {cnt} times from {func}\n"

	has_one_callsite = len(rev_callsites[rev_callee_func]) == 1

	out += " calls\n"
	if rev_callee_func not in callgraph:
	out += " no other function (leaf function).\n"

	if not has_one_callsite:
	# We don't count it as leaf function call if it has only one
	# call site to prevent double-counting these as optimization
	# opportunity.
	total_leaf_calls += sum(rev_caller_funcs.values())
	else:
	caller_funcs = callgraph[rev_callee_func]
	for caller_func, cnt in caller_funcs.most_common():
	func = self._describe_imem_addr(caller_func)
	out += f" * {cnt} times function {func}\n"
	out += "\n"

	if has_one_callsite:
	total_calls_to_funcs_with_one_callsite += (
	rev_caller_funcs.most_common()[0][1])

	total_func_calls += sum(rev_caller_funcs.values())
	out += "\n"

	# Function call statistics
	total_calls_req_call = (total_func_calls - total_leaf_calls -
	total_calls_to_funcs_with_one_callsite)
	out += f"Of a total of {total_func_calls} function calls, there were\n"
	out += (f" {total_calls_to_funcs_with_one_callsite} function calls "
	f"to a function with only one call site (call/ret can be "
	f"replaced with static jumps)\n")
	out += (f" {total_leaf_calls} leaf function calls "
	f"(no function prologue/epilogue needed)\n")
	out += (f"Overall, {total_calls_req_call} of {total_func_calls} "
	f"({(total_calls_req_call / total_func_calls * 100):.02f} "
	f"percent) calls need full function call semantics.\n")

	return out

	def _dump_loop_stats(self) -> str:
	loops = self._stats.loops
	loop_cnt = len(loops)

	out = f"Loops: {loop_cnt}\n"

	if loop_cnt != 0:
	loop_len_values = [loop['loop_len'] for loop in loops]
	loop_len_min = min(loop_len_values)
	loop_len_max = max(loop_len_values)
	loop_len_avg = sum(loop_len_values) / loop_cnt

	loop_iterations_values = [loop['iterations'] for loop in loops]
	loop_iterations_min = min(loop_iterations_values)
	loop_iterations_max = max(loop_iterations_values)
	loop_iterations_avg = sum(loop_iterations_values) / loop_cnt

	out += "Loop body length (instructions): "
	out += f"min: {loop_len_min}, max: {loop_len_max}, "
	out += f"avg: {loop_len_avg:.02f}\n"

	out += f"Number of iterations: min: {loop_iterations_min}, "
	out += f"max: {loop_iterations_max}, "
	out += f"avg: {loop_iterations_avg:.02f}\n"

	return out