CPU: the CPU model, which controls the simulation
The CPU reference all the main units, and run the simulation one cycle at a
time.
The commit also includes the Counter module which has several performance
counters. interfaces.py includes the internal API of TBM.
Change-Id: I9efb0a82301554759be33d6192bbfdcca16e825a
diff --git a/counter.py b/counter.py
new file mode 100644
index 0000000..2a43bf1
--- /dev/null
+++ b/counter.py
@@ -0,0 +1,124 @@
+"""Counter module."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+import sys
+from typing import Optional
+
+
+@dataclass(slots=True)
+class Utilization:
+ size: Optional[int] = None
+ count: int = 0
+ occupied: int = 0
+
+ def utilization(self, cycles: int) -> float:
+ if self.size is not None:
+ return self.occupied * 100 / (cycles * self.size)
+
+ return self.occupied * 100 / cycles
+
+ # Overload +=
+ def __iadd__(self, other: Utilization) -> Utilization:
+ assert self.size == other.size
+
+ self.count += other.count
+ self.occupied += other.occupied
+
+ return self
+
+
+@dataclass(slots=True)
+class Counter:
+ cycles: int = 0
+
+ retired_instruction_count: int = 0
+
+ branch_count: int = 0
+
+ stalls: dict[str, int] = field(default_factory=dict)
+
+ utilizations: dict[str, Utilization] = field(default_factory=dict)
+
+ scalar_load_store: int = 0
+ scalar_load_store_stall: int = 0
+
+ vector_load_store: int = 0
+ vector_load_store_stall: int = 0
+
+ # Overload +=
+ def __iadd__(self, other: Counter) -> Counter:
+ self.cycles += other.cycles
+
+ self.retired_instruction_count += other.retired_instruction_count
+
+ self.branch_count += other.branch_count
+
+ # The assertion holds because the reset() functions assign 0 to all
+ # keys.
+ assert self.stalls.keys() == other.stalls.keys()
+ for key, val in other.stalls.items():
+ self.stalls[key] += val
+
+ # The assertion holds because the reset() functions assign 0 to all
+ # keys.
+ for key, val in other.utilizations.items():
+ self.utilizations[key] += val
+
+ self.scalar_load_store += other.scalar_load_store
+ self.scalar_load_store_stall += other.scalar_load_store_stall
+
+ self.vector_load_store += other.vector_load_store
+ self.vector_load_store_stall += other.vector_load_store_stall
+
+ return self
+
+ def print(self, file=sys.stdout) -> None:
+ print(f"*** cycles: {self.cycles}", file=file)
+ if self.cycles == 0:
+ return
+
+ # pylint: disable=consider-using-f-string
+ print("*** retired instructions per cycle: %.2f (%d)" %
+ (self.retired_instruction_count / self.cycles,
+ self.retired_instruction_count),
+ file=file)
+
+ print("*** retired / fetched instructions: %.2f" %
+ (self.retired_instruction_count / self.utilizations["FE"].count),
+ file=file)
+
+ print("*** branch count: " + str(self.branch_count), file=file)
+
+ if self.scalar_load_store:
+ print(
+ "*** scalar load/store stall rate:"
+ f" {self.scalar_load_store_stall / self.scalar_load_store:.2f}"
+ " stalls per-instruction",
+ file=file)
+
+ if self.vector_load_store:
+ print(
+ "*** vector load/store stall rate:"
+ f" {self.vector_load_store_stall / self.vector_load_store:.2f}"
+ " stalls per-instruction",
+ file=file)
+
+ print(file=file)
+ print("*** stall cycles:", file=file)
+ for name, stall in self.stalls.items():
+ val = stall * 100 // self.cycles
+ print(f" {name}: {val}% ({stall})", file=file)
+
+ print(file=file)
+ print("*** instructions per cycle:", file=file)
+ for name, util in self.utilizations.items():
+ val = util.count / self.cycles
+ print(f" {name}: {val:.2f} ({util.count})", file=file)
+
+ print(file=file)
+ print("*** utilization:", file=file)
+ for name, util in self.utilizations.items():
+ val = util.utilization(self.cycles)
+ print(f" {name}: {val:.0f}% ({util.count})", file=file)
diff --git a/cpu.py b/cpu.py
new file mode 100644
index 0000000..a636ca9
--- /dev/null
+++ b/cpu.py
@@ -0,0 +1,193 @@
+"""CPU module."""
+
+import collections
+import itertools
+import logging
+import pickle
+import sys
+from typing import Any, Dict
+
+from counter import Counter
+from exec_unit import ExecUnit
+from fetch_unit import FetchUnit
+from functional_trace import FunctionalTrace
+from memory_system import MemorySystem
+from sched_unit import SchedUnit
+import tbm_options
+import utilities
+
+
+logger = logging.getLogger(__name__)
+
+
+class CPU:
+ """Top level core model."""
+
+ def __init__(self, pipe_map: Dict[str, str], rf_scoreboards: Dict[str, Any],
+ mem_sys: MemorySystem, config: Dict[str, Any],
+ trace: FunctionalTrace) -> None:
+ """Construct a CPU object."""
+
+ self._print_header_cycle = None
+
+ # conunters
+ self.counter = Counter()
+
+ # Units
+ self.fetch_unit = FetchUnit(config, trace)
+ self.sched_unit = SchedUnit(config)
+ self.exec_unit = ExecUnit(config, pipe_map, rf_scoreboards)
+ self.mem_sys = mem_sys
+
+ # Connect the units to each other.
+ self.sched_unit.connect(self.fetch_unit,
+ self.exec_unit)
+ self.exec_unit.connect(self.fetch_unit,
+ self.sched_unit)
+
+ # The order of this list is significant, this is the order in which the
+ # tick/tock phases will be executed, and different order will give
+ # different results. Units that work in lockstep should be listed in
+ # order that is counter to instruction flow order.
+ self.units = [
+ self.mem_sys,
+ self.exec_unit,
+ self.sched_unit,
+ self.fetch_unit,
+ ]
+
+ def log(self, message: str) -> None:
+ if tbm_options.args.print_from_cycle <= self.counter.cycles:
+ logger.info("[CPU:%d] %s", self.counter.cycles, message)
+
+ def simulate(self) -> None:
+ """Run the simulation."""
+
+ # For debugging! If self.counter.retired_instruction_count doesn't
+ # change for deadlock_threshold cycles, we suspect TBM is in a
+ # deadlock, and terminate the execution.
+ prev_ret_insts = 0
+ maybe_deadlock_count = 0
+ deadlock_threshold = 100
+
+ for unit in self.units:
+ unit.reset(self.counter)
+
+ with utilities.CallEvery(30,
+ lambda: logger.info("%s retired instructions",
+ self.counter.retired_instruction_count)):
+ # simulation's main loop
+ while (not self.fetch_unit.eof() or
+ any(u.pending() for u in self.units)):
+
+ if (tbm_options.args.print_cycles is not None and
+ self.counter.cycles >= tbm_options.args.print_cycles):
+ break
+
+ self.counter.cycles += 1
+
+ self.log("start tick")
+ for unit in self.units:
+ unit.tick(self.counter)
+
+ self.log("start tock")
+ for unit in self.units:
+ unit.tock(self.counter)
+
+ if tbm_options.args.print_trace:
+ self.print_state(tbm_options.args.print_trace)
+
+ # Stop the simulation if we suspect a deadlock.
+ if prev_ret_insts == self.counter.retired_instruction_count:
+ maybe_deadlock_count += 1
+ if maybe_deadlock_count > deadlock_threshold:
+ self.print_state_detailed(file=sys.stderr)
+ logger.error("(cycle %d) retired instruction count has"
+ " not changed for %d cycles, this is"
+ " probably a TBM bug.",
+ self.counter.cycles, deadlock_threshold)
+ sys.exit(1)
+ else:
+ prev_ret_insts = self.counter.retired_instruction_count
+ maybe_deadlock_count = 0
+
+ if tbm_options.args.save_counters:
+ with open(tbm_options.args.save_counters, "wb") as out:
+ pickle.dump(self.counter, out, pickle.HIGHEST_PROTOCOL)
+
+ if tbm_options.args.report:
+ # Save report to file
+ with open(tbm_options.args.report,
+ "w" if tbm_options.args.report_dont_include_cfg else "a",
+ encoding="ascii") as out:
+ self.print_report(out)
+ else:
+ # Or print report to stdout
+ self.print_report()
+
+ def print_report(self, file=sys.stdout) -> None:
+ self.counter.print(file)
+
+ for unit in self.units:
+ pending = unit.pending()
+ if pending:
+ print(f"*** Warning: pending instructions in {unit.name}:"
+ f" {pending}", file=file)
+
+ def print_state(self, print_trace: str, file=sys.stdout) -> None:
+ """Dump the current snapshot."""
+ if not tbm_options.args.print_from_cycle <= self.counter.cycles:
+ return
+
+ if print_trace == "detailed":
+ self.print_state_detailed(file=file)
+ else:
+ assert print_trace == "three-valued"
+ self.print_state_three_valued(file=file)
+
+ def print_state_detailed(self, file=sys.stdout) -> None:
+ """Dump a detailed snapshot."""
+ print(file=file)
+ for unit in self.units:
+ unit.print_state_detailed(file)
+
+ def print_state_three_valued(self, file=sys.stdout) -> None:
+ """Dump a three-valued snapshot."""
+
+ pp_vals = ["-", "P", "F"]
+
+ values = collections.deque([str(self.counter.cycles)])
+ for unit in self.units:
+ values.extend(unit.get_state_three_valued(pp_vals))
+
+ # Print the header lines the first time we get here, and then every 100
+ # cycles.
+ if self._print_header_cycle is None:
+ # Record the remainder the first time we print a line, and then
+ # print the header everytime we see it.
+ self._print_header_cycle = self.counter.cycles % 100
+
+ if self._print_header_cycle == self.counter.cycles % 100:
+ headers = collections.deque(["cycle"])
+ for unit in self.units:
+ headers.extend(unit.get_state_three_valued_header())
+
+ # Transpose the headers (i.e. print them vertically)
+ height = max(len(h) for h in headers)
+ lines = [collections.deque() for _ in range(height)]
+ for header, val in zip(headers, values):
+ # Because lines was constructed to match the longest header we
+ # know that in the zip_longest below it's the header that will
+ # be filled with fillvalue to match lines' length.
+ assert len(header) <= len(lines)
+ for c, line in itertools.zip_longest(reversed(header),
+ lines,
+ fillvalue=" "):
+ line.append(f"{c:{len(val)}}")
+
+ print(file=file)
+ for line in reversed(lines):
+ print("|".join(line), file=file)
+ print("+".join("-" * len(val) for val in values), file=file)
+
+ print("|".join(values), file=file)
diff --git a/interfaces.py b/interfaces.py
new file mode 100644
index 0000000..f03d0b2
--- /dev/null
+++ b/interfaces.py
@@ -0,0 +1,223 @@
+import abc
+import enum
+import collections
+import logging
+from typing import Generic, Iterable, Optional, Sequence, TypeVar
+
+from counter import Counter
+from instruction import Instruction
+import tbm_options
+
+class CyclePhase(enum.Enum):
+ TICK = enum.auto()
+ TOCK = enum.auto()
+
+
+# Declare type variable
+T = TypeVar('T')
+
+
+class ConsumableQueue(Generic[T], Iterable[T]):
+ """A queue that can be consumed by another (not the owner) unit.
+
+ The Iterable iterates over all the visible objects in the queue, oldest to
+ newest.
+ """
+
+ @property
+ @abc.abstractmethod
+ def size(self) -> Optional[int]:
+ """Total size of the queue."""
+
+ @abc.abstractmethod
+ def __len__(self) -> int:
+ """Number of elements in the queue."""
+
+ @abc.abstractmethod
+ def full(self) -> bool:
+ """Check if the queue is full."""
+
+ @abc.abstractmethod
+ def dequeue(self) -> Optional[T]:
+ """Remove the oldest element in the queue and return it."""
+
+ @abc.abstractmethod
+ def peek(self) -> Optional[T]:
+ """Return the oldest element in the queue."""
+
+
+class Module(abc.ABC):
+ def __init__(self, name: str):
+ self._name = name
+ self._cycle = None
+ self._phase = None
+ self.logger = logging.getLogger(name)
+
+ @property
+ def name(self) -> str:
+ return self._name
+
+ @property
+ def cycle(self) -> int:
+ assert self._cycle is not None
+ return self._cycle
+
+ @property
+ def phase(self) -> CyclePhase:
+ assert self._phase is not None
+ return self._phase
+
+ def log(self, message: str) -> None:
+ if self._cycle is None:
+ self.logger.info("[%s:init] %s", self.name, message)
+ elif tbm_options.args.print_from_cycle <= self.cycle:
+ assert self.phase is not None
+ self.logger.info("[%s:%d:%s] %s",
+ self.name, self.cycle, self.phase.name, message)
+
+ @abc.abstractmethod
+ def reset(self, cntr: Counter) -> None:
+ # TODO(sflur): implement proper reset for all the subclasses. For now
+ # I'm just using this to init the cntr.
+ self._cycle = None
+ self._phase = None
+
+ @abc.abstractmethod
+ def tick(self, cntr: Counter) -> None:
+ assert self._cycle is None or self.cycle + 1 == cntr.cycles
+ assert self._phase is None or self.phase == CyclePhase.TOCK
+ self._cycle = cntr.cycles
+ self._phase = CyclePhase.TICK
+
+ @abc.abstractmethod
+ def tock(self, cntr: Counter) -> None:
+ assert self.phase == CyclePhase.TICK
+ self._phase = CyclePhase.TOCK
+
+ @abc.abstractmethod
+ def pending(self) -> int:
+ """Number of pending instructions."""
+
+ @abc.abstractmethod
+ def print_state_detailed(self, file) -> None:
+ pass
+
+ @abc.abstractmethod
+ def get_state_three_valued_header(self) -> Sequence[str]:
+ pass
+
+ @abc.abstractmethod
+ def get_state_three_valued(self, vals: Sequence[str]) -> Sequence[str]:
+ pass
+
+class FetchUnit(Module):
+ @property
+ @abc.abstractmethod
+ def queue(self) -> ConsumableQueue[Instruction]:
+ pass
+
+ @abc.abstractmethod
+ def eof(self) -> bool:
+ pass
+
+ @abc.abstractmethod
+ def branch_resolved(self) -> None:
+ pass
+
+
+class SchedUnit(Module):
+ @property
+ @abc.abstractmethod
+ def queues(self) -> Iterable[ConsumableQueue[Instruction]]:
+ pass
+
+ @abc.abstractmethod
+ def branch_resolved(self) -> None:
+ pass
+
+class ExecUnit(Module):
+ @abc.abstractmethod
+ def get_issue_queue_id(self, instr: Instruction) -> str:
+ pass
+
+
+class ExecPipeline(Module):
+ def __init__(self, name: str, kind: str, issue_queue_id: str,
+ depth: int) -> None:
+ super().__init__(name)
+
+ self._kind = kind
+ self._issue_queue_id = issue_queue_id
+ self._depth = depth
+
+ # Instructions that were retired in the current cycle.
+ self._retired_instrs = collections.deque()
+
+ @property
+ def kind(self) -> str:
+ return self._kind
+
+ @property
+ def issue_queue_id(self) -> str:
+ return self._issue_queue_id
+
+ @property
+ def depth(self) -> int:
+ """Pipeline depth (excluding other parts of the unit)."""
+ return self._depth
+
+ @property
+ def retired_instrs(self) -> Sequence[Instruction]:
+ """Instructions that retired in the current phase."""
+ return self._retired_instrs
+
+ @abc.abstractmethod
+ def try_dispatch(self, instr: Instruction, cntr: Counter) -> bool:
+ pass
+
+
+class Scoreboard(Module):
+ def insert_accesses(self, instr: Instruction, *,
+ # keyword-only args:
+ reg_reads: Sequence[str],
+ reg_writes: Sequence[str]) -> None:
+ """Record the reg accesses instr intends to execute."""
+
+ def can_read(self, instr: Instruction, regs: Sequence[str]) -> bool:
+ """True iff instr can execute the reg reads in the next cycle.
+
+ regs must be a subset of reg_reads of a previously call to
+ insert_accesses.
+ """
+
+ def read(self, instr: Instruction, regs: Sequence[str]) -> None:
+ """Record that instr is executing the reg reads in the next cycle."""
+
+ def can_write(self, instr: Instruction, regs: Sequence[str]) -> bool:
+ """True iff instr can execute the reg writes in the next cycle.
+
+ regs must be a subset of reg_writes of a previously call to
+ insert_accesses.
+ """
+
+ def buff_write(self, instr: Instruction, regs: Sequence[str]) -> None:
+ """Record that the reg writes become avilable in the writeback queue in
+ the next cycle.
+ """
+
+ def write(self, instr: Instruction, regs: Sequence[str]) -> None:
+ """Record that the writes become avilable in the reg-file in the
+ next cycle.
+ """
+
+ def can_issue(self, instr: Instruction) -> bool:
+ """True iff instr can be issued in the next cycle.
+
+ This is to prevent deadlocks due to dependency cycles. Note that two
+ instructions that are issued to the same staged pipeline have an
+ additional order (on top of the rw/ww/wr-dependencies), enforced by the
+ pipeline.
+ """
+
+ def issue(self, instr: Instruction) -> None:
+ """Record that instr is issued in the next cycle."""