CPU: the CPU model, which controls the simulation

The CPU reference all the main units, and run the simulation one cycle at a
time.

The commit also includes the Counter module which has several performance
counters. interfaces.py includes the internal API of TBM.

Change-Id: I9efb0a82301554759be33d6192bbfdcca16e825a
diff --git a/counter.py b/counter.py
new file mode 100644
index 0000000..2a43bf1
--- /dev/null
+++ b/counter.py
@@ -0,0 +1,124 @@
+"""Counter module."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+import sys
+from typing import Optional
+
+
+@dataclass(slots=True)
+class Utilization:
+    size: Optional[int] = None
+    count: int = 0
+    occupied: int = 0
+
+    def utilization(self, cycles: int) -> float:
+        if self.size is not None:
+            return self.occupied * 100 / (cycles * self.size)
+
+        return self.occupied * 100 / cycles
+
+    # Overload +=
+    def __iadd__(self, other: Utilization) -> Utilization:
+        assert self.size == other.size
+
+        self.count += other.count
+        self.occupied += other.occupied
+
+        return self
+
+
+@dataclass(slots=True)
+class Counter:
+    cycles: int = 0
+
+    retired_instruction_count: int = 0
+
+    branch_count: int = 0
+
+    stalls: dict[str, int] = field(default_factory=dict)
+
+    utilizations: dict[str, Utilization] = field(default_factory=dict)
+
+    scalar_load_store: int = 0
+    scalar_load_store_stall: int = 0
+
+    vector_load_store: int = 0
+    vector_load_store_stall: int = 0
+
+    # Overload +=
+    def __iadd__(self, other: Counter) -> Counter:
+        self.cycles += other.cycles
+
+        self.retired_instruction_count += other.retired_instruction_count
+
+        self.branch_count += other.branch_count
+
+        # The assertion holds because the reset() functions assign 0 to all
+        # keys.
+        assert self.stalls.keys() == other.stalls.keys()
+        for key, val in other.stalls.items():
+            self.stalls[key] += val
+
+        # The assertion holds because the reset() functions assign 0 to all
+        # keys.
+        for key, val in other.utilizations.items():
+            self.utilizations[key] += val
+
+        self.scalar_load_store += other.scalar_load_store
+        self.scalar_load_store_stall += other.scalar_load_store_stall
+
+        self.vector_load_store += other.vector_load_store
+        self.vector_load_store_stall += other.vector_load_store_stall
+
+        return self
+
+    def print(self, file=sys.stdout) -> None:
+        print(f"*** cycles: {self.cycles}", file=file)
+        if self.cycles == 0:
+            return
+
+        # pylint: disable=consider-using-f-string
+        print("*** retired instructions per cycle: %.2f (%d)" %
+              (self.retired_instruction_count / self.cycles,
+               self.retired_instruction_count),
+              file=file)
+
+        print("*** retired / fetched instructions: %.2f" %
+              (self.retired_instruction_count / self.utilizations["FE"].count),
+              file=file)
+
+        print("*** branch count: " + str(self.branch_count), file=file)
+
+        if self.scalar_load_store:
+            print(
+                "*** scalar load/store stall rate:"
+                f" {self.scalar_load_store_stall / self.scalar_load_store:.2f}"
+                " stalls per-instruction",
+                file=file)
+
+        if self.vector_load_store:
+            print(
+                "*** vector load/store stall rate:"
+                f" {self.vector_load_store_stall / self.vector_load_store:.2f}"
+                " stalls per-instruction",
+                file=file)
+
+        print(file=file)
+        print("*** stall cycles:", file=file)
+        for name, stall in self.stalls.items():
+            val = stall * 100 // self.cycles
+            print(f"  {name}: {val}% ({stall})", file=file)
+
+        print(file=file)
+        print("*** instructions per cycle:", file=file)
+        for name, util in self.utilizations.items():
+            val = util.count / self.cycles
+            print(f"  {name}: {val:.2f} ({util.count})", file=file)
+
+        print(file=file)
+        print("*** utilization:", file=file)
+        for name, util in self.utilizations.items():
+            val = util.utilization(self.cycles)
+            print(f"  {name}: {val:.0f}% ({util.count})", file=file)
diff --git a/cpu.py b/cpu.py
new file mode 100644
index 0000000..a636ca9
--- /dev/null
+++ b/cpu.py
@@ -0,0 +1,193 @@
+"""CPU module."""
+
+import collections
+import itertools
+import logging
+import pickle
+import sys
+from typing import Any, Dict
+
+from counter import Counter
+from exec_unit import ExecUnit
+from fetch_unit import FetchUnit
+from functional_trace import FunctionalTrace
+from memory_system import MemorySystem
+from sched_unit import SchedUnit
+import tbm_options
+import utilities
+
+
+logger = logging.getLogger(__name__)
+
+
+class CPU:
+    """Top level core model."""
+
+    def __init__(self, pipe_map: Dict[str, str], rf_scoreboards: Dict[str, Any],
+                 mem_sys: MemorySystem, config: Dict[str, Any],
+                 trace: FunctionalTrace) -> None:
+        """Construct a CPU object."""
+
+        self._print_header_cycle = None
+
+        # conunters
+        self.counter = Counter()
+
+        # Units
+        self.fetch_unit = FetchUnit(config, trace)
+        self.sched_unit = SchedUnit(config)
+        self.exec_unit = ExecUnit(config, pipe_map, rf_scoreboards)
+        self.mem_sys = mem_sys
+
+        # Connect the units to each other.
+        self.sched_unit.connect(self.fetch_unit,
+                                self.exec_unit)
+        self.exec_unit.connect(self.fetch_unit,
+                               self.sched_unit)
+
+        # The order of this list is significant, this is the order in which the
+        # tick/tock phases will be executed, and different order will give
+        # different results. Units that work in lockstep should be listed in
+        # order that is counter to instruction flow order.
+        self.units = [
+            self.mem_sys,
+            self.exec_unit,
+            self.sched_unit,
+            self.fetch_unit,
+        ]
+
+    def log(self, message: str) -> None:
+        if tbm_options.args.print_from_cycle <= self.counter.cycles:
+            logger.info("[CPU:%d] %s", self.counter.cycles, message)
+
+    def simulate(self) -> None:
+        """Run the simulation."""
+
+        # For debugging! If self.counter.retired_instruction_count doesn't
+        # change for deadlock_threshold cycles, we suspect TBM is in a
+        # deadlock, and terminate the execution.
+        prev_ret_insts = 0
+        maybe_deadlock_count = 0
+        deadlock_threshold = 100
+
+        for unit in self.units:
+            unit.reset(self.counter)
+
+        with utilities.CallEvery(30,
+                lambda: logger.info("%s retired instructions",
+                                    self.counter.retired_instruction_count)):
+            # simulation's main loop
+            while (not self.fetch_unit.eof() or
+                   any(u.pending() for u in self.units)):
+
+                if (tbm_options.args.print_cycles is not None and
+                    self.counter.cycles >= tbm_options.args.print_cycles):
+                    break
+
+                self.counter.cycles += 1
+
+                self.log("start tick")
+                for unit in self.units:
+                    unit.tick(self.counter)
+
+                self.log("start tock")
+                for unit in self.units:
+                    unit.tock(self.counter)
+
+                if tbm_options.args.print_trace:
+                    self.print_state(tbm_options.args.print_trace)
+
+                # Stop the simulation if we suspect a deadlock.
+                if prev_ret_insts == self.counter.retired_instruction_count:
+                    maybe_deadlock_count += 1
+                    if maybe_deadlock_count > deadlock_threshold:
+                        self.print_state_detailed(file=sys.stderr)
+                        logger.error("(cycle %d) retired instruction count has"
+                                     " not changed for %d cycles, this is"
+                                     " probably a TBM bug.",
+                                     self.counter.cycles, deadlock_threshold)
+                        sys.exit(1)
+                else:
+                    prev_ret_insts = self.counter.retired_instruction_count
+                    maybe_deadlock_count = 0
+
+        if tbm_options.args.save_counters:
+            with open(tbm_options.args.save_counters, "wb") as out:
+                pickle.dump(self.counter, out, pickle.HIGHEST_PROTOCOL)
+
+        if tbm_options.args.report:
+            # Save report to file
+            with open(tbm_options.args.report,
+                      "w" if tbm_options.args.report_dont_include_cfg else "a",
+                      encoding="ascii") as out:
+                self.print_report(out)
+        else:
+            # Or print report to stdout
+            self.print_report()
+
+    def print_report(self, file=sys.stdout) -> None:
+        self.counter.print(file)
+
+        for unit in self.units:
+            pending = unit.pending()
+            if pending:
+                print(f"*** Warning: pending instructions in {unit.name}:"
+                      f" {pending}", file=file)
+
+    def print_state(self, print_trace: str, file=sys.stdout) -> None:
+        """Dump the current snapshot."""
+        if not tbm_options.args.print_from_cycle <= self.counter.cycles:
+            return
+
+        if print_trace == "detailed":
+            self.print_state_detailed(file=file)
+        else:
+            assert print_trace == "three-valued"
+            self.print_state_three_valued(file=file)
+
+    def print_state_detailed(self, file=sys.stdout) -> None:
+        """Dump a detailed snapshot."""
+        print(file=file)
+        for unit in self.units:
+            unit.print_state_detailed(file)
+
+    def print_state_three_valued(self, file=sys.stdout) -> None:
+        """Dump a three-valued snapshot."""
+
+        pp_vals = ["-", "P", "F"]
+
+        values = collections.deque([str(self.counter.cycles)])
+        for unit in self.units:
+            values.extend(unit.get_state_three_valued(pp_vals))
+
+        # Print the header lines the first time we get here, and then every 100
+        # cycles.
+        if self._print_header_cycle is None:
+            # Record the remainder the first time we print a line, and then
+            # print the header everytime we see it.
+            self._print_header_cycle = self.counter.cycles % 100
+
+        if self._print_header_cycle == self.counter.cycles % 100:
+            headers = collections.deque(["cycle"])
+            for unit in self.units:
+                headers.extend(unit.get_state_three_valued_header())
+
+            # Transpose the headers (i.e. print them vertically)
+            height = max(len(h) for h in headers)
+            lines = [collections.deque() for _ in range(height)]
+            for header, val in zip(headers, values):
+                # Because lines was constructed to match the longest header we
+                # know that in the zip_longest below it's the header that will
+                # be filled with fillvalue to match lines' length.
+                assert len(header) <= len(lines)
+                for c, line in itertools.zip_longest(reversed(header),
+                                                     lines,
+                                                     fillvalue=" "):
+                    line.append(f"{c:{len(val)}}")
+
+            print(file=file)
+            for line in reversed(lines):
+                print("|".join(line), file=file)
+            print("+".join("-" * len(val) for val in values), file=file)
+
+        print("|".join(values), file=file)
diff --git a/interfaces.py b/interfaces.py
new file mode 100644
index 0000000..f03d0b2
--- /dev/null
+++ b/interfaces.py
@@ -0,0 +1,223 @@
+import abc
+import enum
+import collections
+import logging
+from typing import Generic, Iterable, Optional, Sequence, TypeVar
+
+from counter import Counter
+from instruction import Instruction
+import tbm_options
+
+class CyclePhase(enum.Enum):
+    TICK = enum.auto()
+    TOCK = enum.auto()
+
+
+# Declare type variable
+T = TypeVar('T')
+
+
+class ConsumableQueue(Generic[T], Iterable[T]):
+    """A queue that can be consumed by another (not the owner) unit.
+
+    The Iterable iterates over all the visible objects in the queue, oldest to
+    newest.
+    """
+
+    @property
+    @abc.abstractmethod
+    def size(self) -> Optional[int]:
+        """Total size of the queue."""
+
+    @abc.abstractmethod
+    def __len__(self) -> int:
+        """Number of elements in the queue."""
+
+    @abc.abstractmethod
+    def full(self) -> bool:
+        """Check if the queue is full."""
+
+    @abc.abstractmethod
+    def dequeue(self) -> Optional[T]:
+        """Remove the oldest element in the queue and return it."""
+
+    @abc.abstractmethod
+    def peek(self) -> Optional[T]:
+        """Return the oldest element in the queue."""
+
+
+class Module(abc.ABC):
+    def __init__(self, name: str):
+        self._name = name
+        self._cycle = None
+        self._phase = None
+        self.logger = logging.getLogger(name)
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @property
+    def cycle(self) -> int:
+        assert self._cycle is not None
+        return self._cycle
+
+    @property
+    def phase(self) -> CyclePhase:
+        assert self._phase is not None
+        return self._phase
+
+    def log(self, message: str) -> None:
+        if self._cycle is None:
+            self.logger.info("[%s:init] %s", self.name, message)
+        elif tbm_options.args.print_from_cycle <= self.cycle:
+            assert self.phase is not None
+            self.logger.info("[%s:%d:%s] %s",
+                             self.name, self.cycle, self.phase.name, message)
+
+    @abc.abstractmethod
+    def reset(self, cntr: Counter) -> None:
+        # TODO(sflur): implement proper reset for all the subclasses. For now
+        # I'm just using this to init the cntr.
+        self._cycle = None
+        self._phase = None
+
+    @abc.abstractmethod
+    def tick(self, cntr: Counter) -> None:
+        assert self._cycle is None or self.cycle + 1 == cntr.cycles
+        assert self._phase is None or self.phase == CyclePhase.TOCK
+        self._cycle = cntr.cycles
+        self._phase = CyclePhase.TICK
+
+    @abc.abstractmethod
+    def tock(self, cntr: Counter) -> None:
+        assert self.phase == CyclePhase.TICK
+        self._phase = CyclePhase.TOCK
+
+    @abc.abstractmethod
+    def pending(self) -> int:
+        """Number of pending instructions."""
+
+    @abc.abstractmethod
+    def print_state_detailed(self, file) -> None:
+        pass
+
+    @abc.abstractmethod
+    def get_state_three_valued_header(self) -> Sequence[str]:
+        pass
+
+    @abc.abstractmethod
+    def get_state_three_valued(self, vals: Sequence[str]) -> Sequence[str]:
+        pass
+
+class FetchUnit(Module):
+    @property
+    @abc.abstractmethod
+    def queue(self) -> ConsumableQueue[Instruction]:
+        pass
+
+    @abc.abstractmethod
+    def eof(self) -> bool:
+        pass
+
+    @abc.abstractmethod
+    def branch_resolved(self) -> None:
+        pass
+
+
+class SchedUnit(Module):
+    @property
+    @abc.abstractmethod
+    def queues(self) -> Iterable[ConsumableQueue[Instruction]]:
+        pass
+
+    @abc.abstractmethod
+    def branch_resolved(self) -> None:
+        pass
+
+class ExecUnit(Module):
+    @abc.abstractmethod
+    def get_issue_queue_id(self, instr: Instruction) -> str:
+        pass
+
+
+class ExecPipeline(Module):
+    def __init__(self, name: str, kind: str, issue_queue_id: str,
+                 depth: int) -> None:
+        super().__init__(name)
+
+        self._kind = kind
+        self._issue_queue_id = issue_queue_id
+        self._depth = depth
+
+        # Instructions that were retired in the current cycle.
+        self._retired_instrs = collections.deque()
+
+    @property
+    def kind(self) -> str:
+        return self._kind
+
+    @property
+    def issue_queue_id(self) -> str:
+        return self._issue_queue_id
+
+    @property
+    def depth(self) -> int:
+        """Pipeline depth (excluding other parts of the unit)."""
+        return self._depth
+
+    @property
+    def retired_instrs(self) -> Sequence[Instruction]:
+        """Instructions that retired in the current phase."""
+        return self._retired_instrs
+
+    @abc.abstractmethod
+    def try_dispatch(self, instr: Instruction, cntr: Counter) -> bool:
+        pass
+
+
+class Scoreboard(Module):
+    def insert_accesses(self, instr: Instruction, *,
+                        # keyword-only args:
+                        reg_reads: Sequence[str],
+                        reg_writes: Sequence[str]) -> None:
+        """Record the reg accesses instr intends to execute."""
+
+    def can_read(self, instr: Instruction, regs: Sequence[str]) -> bool:
+        """True iff instr can execute the reg reads in the next cycle.
+
+        regs must be a subset of reg_reads of a previously call to
+        insert_accesses.
+        """
+
+    def read(self, instr: Instruction, regs: Sequence[str]) -> None:
+        """Record that instr is executing the reg reads in the next cycle."""
+
+    def can_write(self, instr: Instruction, regs: Sequence[str]) -> bool:
+        """True iff instr can execute the reg writes in the next cycle.
+
+        regs must be a subset of reg_writes of a previously call to
+        insert_accesses.
+        """
+
+    def buff_write(self, instr: Instruction, regs: Sequence[str]) -> None:
+        """Record that the reg writes become avilable in the writeback queue in
+        the next cycle.
+        """
+
+    def write(self, instr: Instruction, regs: Sequence[str]) -> None:
+        """Record that the writes become avilable in the reg-file in the
+        next cycle.
+        """
+
+    def can_issue(self, instr: Instruction) -> bool:
+        """True iff instr can be issued in the next cycle.
+
+        This is to prevent deadlocks due to dependency cycles. Note that two
+        instructions that are issued to the same staged pipeline have an
+        additional order (on top of the rw/ww/wr-dependencies), enforced by the
+        pipeline.
+        """
+
+    def issue(self, instr: Instruction) -> None:
+        """Record that instr is issued in the next cycle."""