blob: 3dc27c610effd3a2dcd74fc11db800d81404e088 [file] [log] [blame]
#!/usr/bin/env python3
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
from typing import Dict, List, Set, Tuple
from .decode import OTBNProgram
from .insn_yaml import Insn
from .section import CodeSection
class ControlLoc:
'''Base class for control-flow locations.
Represents a normal PC that a branch or jump instruction could link to.
'''
def __init__(self, pc: int) -> None:
self.pc = pc
@classmethod
def is_special(cls) -> bool:
return False
def pretty(self) -> str:
return '{:#x}'.format(self.pc)
class ImemEnd(ControlLoc):
'''Represents the end of instruction memory.'''
def __init__(self) -> None:
super().__init__(-1)
@classmethod
def is_special(cls) -> bool:
return True
def pretty(self) -> str:
return '<imem end>'
class Ret(ControlLoc):
'''Represents the location at the top of the call stack.'''
def __init__(self) -> None:
super().__init__(-1)
@classmethod
def is_special(cls) -> bool:
return True
def pretty(self) -> str:
return 'RET'
class Ecall(ControlLoc):
'''Represents program termination as a result of an ecall.'''
def __init__(self) -> None:
super().__init__(-1)
@classmethod
def is_special(cls) -> bool:
return True
def pretty(self) -> str:
return 'ECALL'
class LoopStart(ControlLoc):
'''Represents the start of a loop.
The `loop_start_pc` is the first PC of the loop body, and the `loop_end_pc`
is the last PC of the loop body.
'''
def __init__(self, loop_start_pc: int, loop_end_pc: int):
super().__init__(loop_start_pc)
self.loop_start_pc = loop_start_pc
self.loop_end_pc = loop_end_pc
@classmethod
def is_special(cls) -> bool:
return True
def pretty(self) -> str:
return '<loop from {:#x}-{:#x}>'.format(self.loop_start_pc,
self.loop_end_pc)
class Cycle(ControlLoc):
'''Represents a control flow that loops back to a previous PC.
This is not represented as a normal ControlLoc because it's convenient to
catch these cyclic control flows rather than follow the jump/branch and
potentially cause an infinite loop; with this structure, the control-flow
graph remains a DAG.
'''
@classmethod
def is_special(cls) -> bool:
return True
def pretty(self) -> str:
return '<cycle: back to {:#x}>'.format(self.pc)
class LoopEnd(Cycle):
'''Represents the end of a loop (looping back to the start).
Contains the LoopStart instance we're looping back to.
'''
def __init__(self, loop_start: LoopStart):
super().__init__(loop_start.loop_start_pc)
self.loop_start = loop_start
def pretty(self) -> str:
return '<loop end: back to {:#x}>'.format(
self.loop_start.loop_start_pc)
class ControlGraph:
'''Represents the control flow graph of (part of) an OTBN program.
The `start` PC is the entrypoint of the control-flow graph.
The `graph` dictionary maps PCs to 2-item tuples. Not all PCs are in
`graph`; only ones that are jumped, branched, or looped to in the control
flow starting at `start`. The 2-item tuple for each PC is as follows:
- 0: the CodeSection that starts at the PC. It is guaranteed to be 0
or more straightline instructions, followed by a final
non-straightline instruction.
- 1: a list of the various control-flow locations that can be reached
*after* the CodeSection has run. For example, a CodeSection ending
in a BNE instruction would have two ControlLocs in its list, and a
CodeSection ending in `ret` would have a single Ret() instance in its
list.
'''
def __init__(self, start: int,
graph: Dict[int, Tuple[CodeSection,List[ControlLoc]]]):
for pc, (sec, _) in graph.items():
assert sec.start == pc
self.start = start
self.graph = graph
def get_entry(self, pc: int) -> Tuple[CodeSection, List[ControlLoc]]:
'''Returns the entry in the graph for the given PC.
Raises a ValueError if the PC is not in the graph.
'''
try:
return self.graph[pc]
except KeyError:
graph_pcs = [hex(pc) for pc in self.graph.keys()]
raise ValueError(
'PC {:#x} not found in control graph (PCs in graph are: {})'.
format(pc, graph_pcs)) from None
def get_section(self, pc: int) -> CodeSection:
'''Returns the CodeSection for the given PC.
Raises a ValueError if the PC is not in the graph.
'''
return self.get_entry(pc)[0]
def get_edges(self, pc: int) -> List[ControlLoc]:
'''Returns the edges for the section starting at pc.
Raises a ValueError if the PC is not in the graph.
'''
return self.get_entry(pc)[1]
def get_cycle_starts(self) -> Set[int]:
'''Returns start PCs of all marked cycles in the graph.'''
out = set()
for pc, entry in self.graph.items():
for edge in entry[1]:
if isinstance(edge, Cycle):
out.add(edge.pc)
return out
def _pretty_lines(self,
program: OTBNProgram,
entry_pc: int,
concise: bool,
already_printed: Set[int],
call_stack: List[int] = []) -> List[Tuple[int, str]]:
'''Returns lines for pretty-printing.
Returns each line as an (int,str) pair, where the int is the indent
level.
'''
out = []
sec, edges = self.get_entry(entry_pc)
symbols = [s for s in program.get_symbols_for_pc(entry_pc) if s != '']
pcs_to_print = list(range(sec.start, sec.end + 4, 4))
if entry_pc in already_printed and not concise:
if len(symbols) > 0:
out.append((0, '<{}> (see above)'.format(', '.join(symbols))))
else:
out.append((0, '{:#x}..{:#x} (see above)'.format(entry_pc, sec.end)))
pcs_to_print = []
elif len(symbols) > 0:
out.append((0, '<{}>'.format(', '.join(symbols))))
if concise:
# only print last insn
out.append((0, '...'))
pcs_to_print = [sec.end]
for pc in pcs_to_print:
insn = program.get_insn(pc)
operands = program.get_operands(pc)
disassembly = insn.disassemble(pc, operands)
out.append((0, '{:#x}: {}'.format(pc, disassembly)))
already_printed.add(sec.start)
# Indent if we have more than 1 non-special edge, excluding LoopEnds
# (i.e. if this is a branch)
non_special_edges = [
loc for loc in edges
if not (loc.is_special() or isinstance(loc, LoopEnd))
]
child_indent = 0 if len(non_special_edges) <= 1 else 2
if any(map(lambda loc: isinstance(loc, LoopEnd), edges)):
child_indent = -2
for loc in edges:
if isinstance(loc, LoopEnd):
out.append((0, loc.pretty()))
continue
if len(non_special_edges) > 1:
last_insn = program.get_insn(sec.end)
out.append((0, '-> (branch from {:#x}: {})'.format(sec.end, last_insn.mnemonic)))
if loc.is_special():
out.append((child_indent, loc.pretty()))
if isinstance(loc, Ret) and len(call_stack) > 0:
# pop the call stack and print flow after return to caller
out += [(indent - 2, line)
for indent, line in self._pretty_lines(
program, call_stack[0], concise,
already_printed, call_stack[1:])]
if isinstance(loc, LoopStart):
out += [(indent + child_indent + 2, line)
for indent, line in self._pretty_lines(
program, loc.loop_start_pc, concise,
already_printed, call_stack)]
else:
insn = program.get_insn(sec.end)
operands = program.get_operands(sec.end)
if insn.mnemonic == 'jal' and operands['grd'] == 1:
# push to to the call stack
call_stack = [sec.end + 4] + call_stack
child_indent += 2
out += [
(indent + child_indent, line)
for indent, line in self._pretty_lines(
program, loc.pc, concise, already_printed, call_stack)
]
return out
def pretty(self,
program: OTBNProgram,
indent: int = 0,
concise: bool = False) -> str:
'''Returns string for pretty-printing.'''
lines = self._pretty_lines(program, self.start, concise, set())
lines = [(indent + lindent, line) for (lindent, line) in lines]
line_strings = [
'{}{}'.format(' ' * lindent, line) for (lindent, line) in lines
]
return '\n'.join(line_strings)
def _clip_to_32(value: int) -> int:
'''Truncates a number to 32 bits.'''
return value & ((1 << 32) - 1)
def _get_next_control_locations(insn: Insn, operands: Dict[str, int],
pc: int) -> List[ControlLoc]:
'''Given a control-flow instruction, returns the possible destinations.
Raises a RuntimeError if the instruction is not a recognized control-flow
instruction, or if some unsupported construct is encountered.
'''
if insn.mnemonic == 'beq' or insn.mnemonic == 'bne':
branch_dst = _clip_to_32(operands['offset'])
# Branch can either go to branch_dst or next PC
return [ControlLoc(branch_dst), ControlLoc(pc + 4)]
elif insn.mnemonic == 'jal':
jump_dst = _clip_to_32(operands['offset'])
return [ControlLoc(jump_dst)]
elif insn.mnemonic == 'jalr':
if operands['grs1'] != 1:
# This is a function call through a pointer e.g. `jal x1,
# <addr>, 0`; not currently supported
raise RuntimeError(
'Cannot create control graph because of a function '
'call from a pointer at PC {:#x}: {}\nThis is '
'permitted by OTBN but not supported by this check.'.format(
pc, insn.disassemble(pc, operands)))
if operands['offset'] != 0:
raise RuntimeError(
'Cannot create control graph because of a nonzero offset for '
'JALR at PC {:#x}: {}.\nThis is permitted by OTBN but not '
'supported by this check.'.format(
pc, insn.disassemble(pc, operands)))
# This jump returns to whatever's on top of the call stack
return [Ret()]
elif insn.mnemonic == 'loopi' or insn.mnemonic == 'loop':
loop_end_pc = pc + (operands['bodysize'] * 4)
return [LoopStart(pc + 4, loop_end_pc)]
elif insn.mnemonic == 'ecall':
return [Ecall()]
raise RuntimeError(
'Unrecognized control flow instruction (straight-line=false) at PC {:#x}: {}'
.format(pc, insn.disassemble(pc, operands)))
def _populate_control_graph(graph: ControlGraph, program: OTBNProgram,
start_pc: int,
loop_stack: List[LoopStart]) -> None:
'''Populates input control flow graph starting from start_pc.
The `loop_stack` argument holds the LoopStart instances for all the loops
that enclose the start_pc, with the innermost loop first. This routine
checks only for the innermost loop's end PC; if the loops are incorrectly
nested, it will not warn. However, it will raise a RuntimeError if the loop
stack size (8) is exceeded.
This function will not record cycles in the control graph with the Cycle
instance; run _fix_cycles afterward.
'''
LOOP_STACK_SIZE = 8
if start_pc in graph.graph:
# Nothing to do; this section has already been populated.
return
# Top element of the loop stack, if any:
loop = loop_stack[0] if len(loop_stack) > 0 else None
# Follow straightline code until we get to a control flow instruction, the
# end of the loop body, or the end of imem
for pc in range(start_pc, program.max_pc() + 4, 4):
# Special handling for reaching the end of a loop body
if loop is not None and pc == loop.loop_end_pc:
# Finished the loop; add the current section and connect it to a
# LoopEnd instance as well as the next pc, then recurse on the next
# PC.
sec = CodeSection(start_pc, pc)
graph.graph[start_pc] = (sec, [LoopEnd(loop), ControlLoc(pc + 4)])
_populate_control_graph(graph, program, pc + 4, loop_stack[1:])
return
insn = program.get_insn(pc)
if insn.straight_line:
continue
operands = program.get_operands(pc)
section = CodeSection(start_pc, pc)
locs = _get_next_control_locations(insn, operands, pc)
if start_pc in graph.graph:
raise RuntimeError
graph.graph[start_pc] = (section, locs)
# Recurse on next control-flow locations.
for loc in locs:
if not loc.is_special():
# Destination is just a normal PC
_populate_control_graph(graph, program, loc.pc, loop_stack)
elif isinstance(loc, LoopStart):
if len(loop_stack) >= LOOP_STACK_SIZE - 1:
loop_start_pcs = [
loop.loop_start_pc for loop in [loc] + loop_stack
]
raise RuntimeError(
'Loop stack overflow at PC {} (loop stack includes '
'loops starting at the following PCs: {})'.format(
pc, loop_start_pcs))
_populate_control_graph(graph, program, loc.loop_start_pc,
[loc] + loop_stack)
elif isinstance(loc, ImemEnd) or isinstance(
loc, Ecall) or isinstance(loc, Ret):
# Nothing more to do
pass
else:
raise ValueError(
'Control-flow location of type {} is special '
'but does not have a recognized special type.'.format(
type(loc)))
# If this was a jump that pushed to the call stack, we need to also
# populate the control graph for what happens after we return from the
# jump
if insn.mnemonic == 'jal' and operands['grd'] == 1:
_populate_control_graph(graph, program, pc + 4, loop_stack)
return
# If we get here, we've reached the max PC
graph.graph[start_pc] = (CodeSection(start_pc,
program.max_pc()), [ImemEnd()])
return
def _label_cycles(program: OTBNProgram, graph: ControlGraph, start_pc: int, visited_pcs: Set[int]) -> None:
'''Creates Cycle edges to remove cyclic control flow from the graph.
Modifies graph in place. Works by replacing edges that loop back to an
already-traversed part of the control flow with a Cycle instance.
'''
sec, edges = graph.get_entry(start_pc)
for pc in sec:
visited_pcs.add(pc)
for i in range(len(edges)):
edge = edges[i]
if isinstance(edge, Ret) or isinstance(edge, ImemEnd) or isinstance(edge, Ecall):
# Cannot possibly loop back
continue
elif isinstance(edge, Cycle):
# Done; no need to traverse or replace
continue
if edge.pc in visited_pcs:
# Replace with Cycle instance and do not traverse
edges[i] = Cycle(edge.pc)
else:
_label_cycles(program, graph, edge.pc, visited_pcs.copy())
def _fix_cycles(program: OTBNProgram, graph: ControlGraph) -> None:
'''Labels cycles and splits them from other CodeSections in the graph.
Modifies graph in place.
'''
_label_cycles(program, graph, graph.start, set())
cycle_start_pcs = graph.get_cycle_starts()
new_entries: Dict[int,Tuple[CodeSection,List[ControlLoc]]] = {}
for start_pc in graph.graph:
sec, edges = graph.get_entry(start_pc)
for pc in sec:
if pc in cycle_start_pcs and pc != sec.start:
# Split this section and create a new edge leading to the cycle
new_entries[start_pc] = (CodeSection(start_pc, pc), [ControlLoc(pc)])
break
graph.graph.update(new_entries)
def _make_control_graph(program: OTBNProgram, start_pc: int) -> ControlGraph:
'''Constructs a control flow graph with start_pc as the entrypoint.
Assumes the loop stack is empty at start_pc.
'''
graph = ControlGraph(start_pc, {})
_populate_control_graph(graph, program, start_pc, [])
_fix_cycles(program, graph)
return graph
def program_control_graph(program: OTBNProgram) -> ControlGraph:
'''Constructs a control flow graph representing an entire program.'''
return _make_control_graph(program, program.min_pc())
def subroutine_control_graph(program: OTBNProgram,
subroutine: str) -> ControlGraph:
'''Control flow graph from the given symbol to the first unmatched `ret`.
The control flow graph finishes when either an `ecall` instruction is
reached or a `ret` returns to the caller of the subroutine.
'''
start_pc = program.get_pc_at_symbol(subroutine)
return _make_control_graph(program, start_pc)