execution unit: model of an execution unit This includes EIQ, a pipeline (scalar or vector) and a writeback queue. The commit also includes scoreboard models for scalar register files, and vector register files. Change-Id: I6f4f116dd06d45ab76182dc09b35ce6aa69afc0d
diff --git a/exec_unit.py b/exec_unit.py new file mode 100644 index 0000000..2d4addc --- /dev/null +++ b/exec_unit.py
@@ -0,0 +1,151 @@ +import collections +import sys +from typing import Any, Dict, Sequence, Union + +from counter import Counter +from instruction import Instruction +import interfaces +import scoreboard + +class ExecUnit(interfaces.ExecUnit): + """Execution unit model.""" + + def __init__( + self, config: Dict[str, Any], pipe_map: Dict[str, str], + rf_scoreboards: Dict[str, Union[scoreboard.Preemptive, + scoreboard.VecPreemptive]] + ): + super().__init__("EX") + + self._branch_prediction = config["branch_prediction"] + + self._fetch_unit = None + self._sched_unit = None + + self._pipe_map = pipe_map + + # State + self._rf_scoreboards = rf_scoreboards + self._pipes = {} + self._retired_instructions = collections.deque() + + def add_pipe(self, kind: str, + pipes: Sequence[interfaces.ExecPipeline]) -> None: + assert pipes + assert kind not in self._pipes + self._pipes[kind] = pipes + + def connect(self, fetch_unit: interfaces.FetchUnit, + sched_unit: interfaces.SchedUnit) -> None: + self._fetch_unit = fetch_unit + self._sched_unit = sched_unit + + # Implements interfaces.ExecUnit + def pending(self) -> int: + return sum(p.pending() for ps in self._pipes.values() for p in ps) + + # Implements interfaces.ExecUnit + def get_issue_queue_id(self, instr: Instruction) -> str: + kind = self.get_functional_unit(instr) + return self._pipes[kind][0].issue_queue_id + + def get_functional_unit(self, instr: Instruction) -> str: + """Return the functional unit kind the instruction will execute in.""" + try: + return self._pipe_map[instr.mnemonic] + except KeyError: + self.logger.error("unknown pipe for instruction '%s'", + instr.mnemonic) + sys.exit(1) + + # Implements interfaces.ExecUnit + def reset(self, cntr: Counter) -> None: + super().reset(cntr) + # TODO(sflur): implement proper reset + for ps in self._pipes.values(): + for p in ps: + p.reset(cntr) + + # Implements interfaces.ExecUnit + def tick(self, cntr: Counter) -> None: + """Move instructions from dispatch queues, in sched_unit, to functional + units. + + Instructions move in lockstep when possible. To achieve lockstep we + process the elements counter to instruction flow direction. + """ + super().tick(cntr) + + self._retired_instructions.clear() + + for sb in self._rf_scoreboards.values(): + sb.tick(cntr) + + for ps in self._pipes.values(): + for p in ps: + p.tick(cntr) + self._retired_instructions.extend(p.retired_instrs) + + if self._branch_prediction == "none": + for instr in self._retired_instructions: + if instr.is_branch: + self._sched_unit.branch_resolved() + self._fetch_unit.branch_resolved() + break + + for dq in self._sched_unit.queues: + while dq: + if self.dispatch_instruction(dq[0], cntr): + dq.popleft() + else: + break + + # Update retired instruction count. + cntr.retired_instruction_count += len(self._retired_instructions) + + # Implements interfaces.ExecUnit + def tock(self, cntr: Counter) -> None: + super().tock(cntr) + + + self._retired_instructions.clear() + + for sb in self._rf_scoreboards.values(): + sb.tock(cntr) + + for ps in self._pipes.values(): + for p in ps: + p.tock(cntr) + self._retired_instructions.extend(p.retired_instrs) + + # Update retired instruction count. + cntr.retired_instruction_count += len(self._retired_instructions) + + def dispatch_instruction(self, instr: Instruction, cntr: Counter): + # TODO(sflur): use other policies to choose pipe, instead of the first + # free one. + kind = self.get_functional_unit(instr) + for pipe in self._pipes[kind]: + if pipe.try_dispatch(instr, cntr): + return True + + return False + + # Implements interfaces.ExecUnit + def print_state_detailed(self, file) -> None: + for pipes in self._pipes.values(): + for pipe in pipes: + pipe.print_state_detailed(file) + + print("[re] " + ", ".join(str(i) for i in self._retired_instructions), + file=file) + + # Implements interfaces.ExecUnit + def get_state_three_valued_header(self) -> Sequence[str]: + return [pipe.get_state_three_valued_header() + for pipes in self._pipes.values() for pipe in pipes] + + # Implements interfaces.ExecUnit + def get_state_three_valued(self, vals: Sequence[str]) -> Sequence[str]: + return [pipe.get_state_three_valued(vals) + for pipes in self._pipes.values() for pipe in pipes]
diff --git a/scalar_pipe.py b/scalar_pipe.py new file mode 100644 index 0000000..1cb3586 --- /dev/null +++ b/scalar_pipe.py
@@ -0,0 +1,300 @@ +"""ScalarPipe module.""" + +import collections +from typing import Any, Dict, Sequence, Union + +from buffered_queue import BufferedQueue +import counter +from counter import Counter +from instruction import Instruction +import interfaces +import scoreboard + + +class ScalarPipe(interfaces.ExecPipeline): + def __init__(self, name:str, kind: str, desc: Dict[str, Any], mem_sys, + rf_scoreboards: Dict[str, Union[scoreboard.Preemptive, + scoreboard.VecPreemptive]] + ) -> None: + super().__init__(name, kind, desc["issue_queue"], desc["depth"]) + + # Execution Issue Queues + self._eiq = BufferedQueue(desc.get("eiq_size")) + self._can_skip_eiq = desc["can_skip_eiq"] + + # The pipeline + self._pipelined = desc["pipelined"] + self._stage = collections.deque([None] * self.depth) + + # The writeback buffer + self._writebackq = BufferedQueue(desc.get("writeback_buff_size")) + + # Interface to memory + self._mem = (mem_sys.elements[desc["memory_interface"]] + if "memory_interface" in desc else None) + + self._load_stage = desc.get("load_stage") + self._fixed_load_latency = desc.get("fixed_load_latency") + self._stalling_loads = {} + + self._store_stage = desc.get("store_stage") + self._fixed_store_latency = desc.get("fixed_store_latency") + self._stalling_stores = {} + + self._rf_scoreboards = rf_scoreboards + + def reg_read_stall(self, instr: Instruction) -> bool: + return any(not self._rf_scoreboards[rf].can_read(instr, regs) + for rf, regs in instr.inputs_by_type().items()) + + def reg_write_stall(self, instr: Instruction) -> bool: + return any(not self._rf_scoreboards[rf].can_write(instr, regs) + for rf, regs in instr.outputs_by_type().items()) + + def sb_reg_read(self, instr: Instruction) -> None: + for rf, regs in instr.inputs_by_type().items(): + self._rf_scoreboards[rf].read(instr, regs) + + def sb_buff_reg_write(self, instr: Instruction) -> None: + for rf, regs in instr.outputs_by_type().items(): + self._rf_scoreboards[rf].buff_write(instr, regs) + + def sb_reg_write(self, instr: Instruction) -> None: + for rf, regs in instr.outputs_by_type().items(): + self._rf_scoreboards[rf].write(instr, regs) + + def do_reg_writeback(self) -> None: + if self._writebackq: + if not self.reg_write_stall(self._writebackq[0]): + instr = self._writebackq.popleft() + self.sb_reg_write(instr) + self.retired_instrs.append(instr) + + def stall(self, cntr: Counter) -> bool: + # Check if last stage needs to do reg writes, and the writeback buffer + # is full. + if (self._stage[-1] and self._stage[-1].outputs_by_type() and + self._writebackq.is_buffer_full()): + return True + + # Check if memory accesses are waiting for reply. + if (any(self._stalling_loads.values()) or + any(self._stalling_stores.values())): + cntr.scalar_load_store_stall += 1 + return True + + return False + + def do_load(self) -> None: + if self._load_stage is None: + return + + if self._stage[self._load_stage]: + inst = self._stage[self._load_stage] + # TODO(sflur): handle multiple loads? + assert len(inst.loads) <= 1 + for load in inst.loads: + if (inst, load) not in self._stalling_loads: + self._mem.issue_load(inst, load) + self._stalling_loads[(inst, load)] = None + + if self._stage[self._load_stage + self._fixed_load_latency]: + inst = self._stage[self._load_stage + self._fixed_load_latency] + for load in inst.loads: + if self._stalling_loads[(inst, load)] is None: + self._stalling_loads[(inst, load)] = True + for load in self._mem.take_load_replys(inst): + self._stalling_loads[(inst, load)] = False + + def do_store(self) -> None: + if self._store_stage is None: + return + + if self._stage[self._store_stage]: + inst = self._stage[self._store_stage] + # TODO(sflur): handle multiple stores? + assert len(inst.stores) <= 1 + for store in inst.stores: + if (inst, store) not in self._stalling_stores: + self._mem.issue_store(inst, store) + self._stalling_stores[(inst, store)] = None + + if self._stage[self._store_stage + self._fixed_store_latency]: + inst = self._stage[self._store_stage + self._fixed_store_latency] + for store in inst.stores: + if self._stalling_stores[(inst, store)] is None: + self._stalling_stores[(inst, store)] = True + for store in self._mem.take_store_replys(inst): + self._stalling_stores[(inst, store)] = False + + # Implements interfaces.ExecPipeline + def reset(self, cntr: Counter) -> None: + super().reset(cntr) + # TODO(sflur): implement proper reset + cntr.utilizations[f"{self.name}.eiq"] = counter.Utilization( + self._eiq.size) + cntr.utilizations[f"{self.name}.pipe"] = counter.Utilization( + len(self._stage)) + cntr.utilizations[f"{self.name}.wbq"] = counter.Utilization( + self._writebackq.size) + + # Implements interfaces.ExecPipeline + def tick(self, cntr: Counter) -> None: + """Move instructions from EIQ to pipeline, to WBQ, to RF. + + Instructions move in lockstep when possible. To achieve lockstep we + process the elements counter to instruction flow direction. + """ + super().tick(cntr) + + self.retired_instrs.clear() + + self.do_reg_writeback() + + if not self.stall(cntr): + # Cleanup self._stalling_loads + if (self._load_stage is not None and + self._stage[self._load_stage + self._fixed_load_latency]): + inst = self._stage[self._load_stage + self._fixed_load_latency] + for load in inst.loads: + # The assertion holds because self.stall() above is True. + assert not self._stalling_loads.get((inst, load), False) + del self._stalling_loads[(inst, load)] + + # Cleanup self._stalling_stores + if (self._store_stage is not None and + self._stage[self._store_stage + self._fixed_store_latency]): + inst = self._stage[self._store_stage + + self._fixed_store_latency] + for store in inst.stores: + # The assertion holds because self.stall() above is True. + del self._stalling_stores[(inst, store)] + + # Shift stages + instr = self._stage.pop() + if instr: + if instr.outputs_by_type().items(): + self._writebackq.buffer(instr) + cntr.utilizations[f"{self.name}.wbq"].count += 1 + self.sb_buff_reg_write(instr) + else: + self.retired_instrs.append(instr) + self._stage.appendleft(None) + + self.do_load() + self.do_store() + + # Try to issue instructions from eiq to pipeline, until one succeeds. + if self.is_ready(): + for _ in range(len(self._eiq)): + instr = self._eiq.popleft() + if self.try_issue(instr, cntr): + break + + self._eiq.append(instr) + + # Implements interfaces.ExecPipeline + def tock(self, cntr: Counter) -> None: + super().tock(cntr) + + self.retired_instrs.clear() + + cntr.utilizations[f"{self.name}.pipe"].occupied += len( + list(1 for i in self._stage if i)) + + self._eiq.flush() + cntr.utilizations[f"{self.name}.eiq"].occupied += len(self._eiq) + + self._writebackq.flush() + cntr.utilizations[f"{self.name}.wbq"].occupied += len(self._writebackq) + + # Implements interfaces.ExecPipeline + def pending(self) -> int: + eiq_count = len(list(self._eiq.chain())) + pipe_count = len(list(1 for i in self._stage if i)) + wbq_count = len(list(self._writebackq.chain())) + return eiq_count + pipe_count + wbq_count + + # Implements interfaces.ExecPipeline + def try_dispatch(self, instr: Instruction, cntr: Counter) -> bool: + if self._eiq.is_buffer_full(): + return False + + inputs = instr.inputs_by_type() + outputs = instr.outputs_by_type() + + for rf in inputs.keys() | outputs.keys(): + reads = inputs.get(rf, []) + writes = outputs.get(rf, []) + self._rf_scoreboards[rf].insert_accesses(instr, reg_reads=reads, + reg_writes=writes) + + if not (self._can_skip_eiq and self.is_ready() and + self.try_issue(instr, cntr)): + self._eiq.buffer(instr) + cntr.utilizations[f"{self.name}.eiq"].count += 1 + + if instr.loads or instr.stores: + cntr.scalar_load_store += 1 + + return True + + def is_ready(self) -> bool: + """Check if the pipe is ready to accept a new instruction.""" + if self._pipelined: + return self._stage[0] is None + + return all(s is None for s in self._stage) + + def try_issue(self, instr: Instruction, cntr: Counter) -> bool: + """Issue an instruction.""" + + if not all(sb.can_issue(instr) for sb in self._rf_scoreboards.values()): + return False + + if self.reg_read_stall(instr): + return False + + assert self._stage[0] is None + self._stage[0] = instr + cntr.utilizations[f"{self.name}.pipe"].count += 1 + + for sb in self._rf_scoreboards.values(): + sb.issue(instr) + + self.sb_reg_read(instr) + + return True + + # Implements interfaces.ExecPipeline + def print_state_detailed(self, file) -> None: + eiq_str = ", ".join(str(i) for i in reversed(list(self._eiq.chain()))) + stages = ", ".join(str(i) if i else "-" for i in self._stage) + wbq_str = ", ".join( + str(i) for i in reversed(list(self._writebackq.chain()))) + + pipe_str = (f"{eiq_str if eiq_str else '-'}" + f" > {stages}" + f" > {wbq_str if wbq_str else '-'}") + + print(f"[{self.name}] {pipe_str}", file=file) + + # Implements interfaces.ExecPipeline + def get_state_three_valued_header(self) -> Sequence[str]: + return ["eiq", self.kind, "wbq"] + + # Implements interfaces.ExecPipeline + def get_state_three_valued(self, vals: Sequence[str]) -> Sequence[str]: + if all(self._stage): + # Full + pipe_str = vals[2] + elif any(self._stage): + # Partial + pipe_str = vals[1] + else: + # Empty + pipe_str = vals[0] + + return [self._eiq.pp_three_valued(vals), + pipe_str, + self._writebackq.pp_three_valued(vals)]
diff --git a/scoreboard.py b/scoreboard.py new file mode 100644 index 0000000..95351ac --- /dev/null +++ b/scoreboard.py
@@ -0,0 +1,385 @@ +"""scoreboard module.""" + +import collections +import sys +from typing import Any, Dict, Sequence, Tuple + +from counter import Counter +from instruction import Instruction +import interfaces + + +class Preemptive(interfaces.Scoreboard): + """Scoreboard that stalls functional units.""" + + def __init__(self, uid: str, desc: Dict[str, Any]) -> None: + super().__init__(uid) + + # `None` means unrestricted + self.read_ports = desc.get("read_ports") + self.dedicated_read_ports = set(desc.get("dedicated_read_ports", [])) + + # `None` means unrestricted + self.write_ports = desc.get("write_ports") + self.dedicated_write_ports = set(desc.get("dedicated_write_ports", [])) + + # `self.rw_deps[instr][reg]` is the instruction from which `instr` + # reads `reg`'s value, if that instruction is still in-flight, and + # `None` otherwise. + self.rw_deps = {} + + # `self.ww_deps[instr][reg]` is the instruction that writes to `reg` + # just before `instr` writes to it. + self.ww_deps = {} + + # `self.wr_deps[instr][reg]` is the set of instructions that must do + # their reads from `reg` before `instr` does its write to `reg`. + self.wr_deps = collections.defaultdict(dict) + + # `self.writes[reg]` is the last instruction, so far, that intends to + # write to `reg`, if that instruction is in-flight, and `None` + # otherwise. + self.writes = collections.defaultdict(lambda: None) + + # `self.reads[reg]` is the set of instructions that follow + # `self.writes[reg]`, and read from `reg`. + self.reads = collections.defaultdict(set) + + # The set of instructions that have been issued to a functional unit. + # This is a coarse way of preventing deadlocks. + self.issued = set() + + # `self.write_buff[instr]` is the set of registers for which `instr` + # has already computed a write value that can be used in a bypass. + self.write_buff = collections.defaultdict(set) + + # The number of register reads/writes done so far in the current tick. + self.used_read_ports = 0 + self.used_write_ports = 0 + + def dump(self, file=sys.stdout) -> None: + print(f"-- Scoreboard {self.name}: --", file=file) + + print(f"read ports: {self.read_ports}", file=file) + print(f"dedicated read ports: {self.dedicated_read_ports}", file=file) + print(f"write ports: {self.write_ports}", file=file) + print(f"dedicated write ports: {self.dedicated_write_ports}", file=file) + + print(f"issued instructions: {', '.join(str(i) for i in self.issued)}", + file=file) + + def pp_instr(i) -> str: + if i is None: + return "None" + + if isinstance(i, Instruction): + return str(i) + + if isinstance(i, Tuple): + return f"{pp_instr(i[0])} ({i[1]})" + + return "???" + + for i, deps in self.rw_deps.items(): + print(f"rw {pp_instr(i)}: " + + ", ".join(f"({r}: {pp_instr(d)})" for r, d in deps.items()), + file=file) + + for i, deps in self.ww_deps.items(): + print(f"ww {pp_instr(i)}: " + + ", ".join(f"({r}: {pp_instr(d)})" for r, d in deps.items()), + file=file) + + for i, deps in self.wr_deps.items(): + print(f"wr {pp_instr(i)}: " + + ", ".join(f"({r}: " + "; ".join(pp_instr(d) + for d in ds) + ")" + for r, ds in deps.items()), + file=file) + + # Implements interfaces.Scoreboard + def insert_accesses(self, instr: Instruction, *, + # keyword-only args: + reg_reads: Sequence[str], + reg_writes: Sequence[str]) -> None: + for reg in reg_reads: + # We assume instructions never read their own writes + assert self.writes[reg] != instr + + self.rw_deps.setdefault( + instr, {} + )[reg] = self.writes[reg] + self.reads[reg].add(instr) + + for reg in reg_writes: + # Can instructions write twice to the same reg? + assert self.writes[reg] != instr + + self.ww_deps.setdefault( + instr, {} + )[reg] = self.writes[reg] + self.wr_deps.setdefault(instr, {}).setdefault( + reg, set()).update(self.reads[reg] - {instr}) + + self.writes[reg] = instr + self.reads[reg].clear() + + def read_port_regs(self, instr, regs): + """Return the regs that need to use a non-dedicated read port.""" + return [ + r for r in regs if + (r not in self.dedicated_read_ports + # rw_deps which are not None will be read from + # the write-buffer. + # TODO(sflur): what are the restrictions on the + # write-buffer? + and self.rw_deps[instr][r] is None) + ] + + def check_read_ports(self, instr, regs) -> bool: + if self.read_ports is None: + return True + + return (self.used_read_ports + len(self.read_port_regs(instr, regs)) <= + self.read_ports) + + # Implements interfaces.Scoreboard + def can_read(self, instr, regs) -> bool: + if not self.check_read_ports(instr, regs): + return False + + for reg in regs: + dep = self.rw_deps[instr][reg] + if dep and reg not in self.write_buff[dep]: + return False + return True + + def write_port_regs(self, instr, regs): + """Return the regs that need to use a non-dedicated write port.""" + # `instr` is not used here, but it is used in the Vec case below. + del instr + return [r for r in regs if r not in self.dedicated_write_ports] + + def check_write_ports(self, instr, regs) -> bool: + if self.write_ports is None: + return True + + return (self.used_write_ports + len(self.write_port_regs(instr, regs)) + <= self.write_ports) + + # Implements interfaces.Scoreboard + def can_write(self, instr, regs) -> bool: + if not self.check_write_ports(instr, regs): + return False + + return not (any(self.ww_deps[instr][reg] for reg in regs) or + any(self.wr_deps[instr][reg] for reg in regs)) + + def update_used_read_ports(self, instr, regs) -> None: + self.used_read_ports += len(self.read_port_regs(instr, regs)) + + # Implements interfaces.Scoreboard + def read(self, instr, regs) -> None: + self.update_used_read_ports(instr, regs) + + for reg in regs: + # TODO(sflur): In RVV vec reg groups must be a multiple of the + # LMUL. Hence, `vadd.vv v0, v1, v2` with LMUL=2 is not a valid + # instruction (it's actually "reserved"), becasue v1 is not + # multiple of 2. If we want to support architectures where this is + # allowed, consider a valid instruction like `vadd.vv v0, v1, v2` + # with LMUL=2, and 2 slices uArch. Slice v2.0 is read twice, first + # as part of the group starting from v2, and second as part of the + # group starting from v1. The `del` below will be executed for the + # first read and then the second read will fail in `can_read` where + # we assume it's still in the map. + del self.rw_deps[instr][reg] + + for rdeps in self.wr_deps.values(): + rdeps.get(reg, set()).discard(instr) + + self.reads[reg].discard(instr) + + if not any(self.rw_deps[instr]): + del self.rw_deps[instr] + if instr not in self.ww_deps: + self.issued.remove(instr) + + # Implements interfaces.Scoreboard + def buff_write(self, instr, regs) -> None: + self.write_buff[instr].update(regs) + + def update_used_write_ports(self, instr, regs) -> None: + self.used_write_ports += len(self.write_port_regs(instr, regs)) + + # Implements interfaces.Scoreboard + def write(self, instr, regs) -> None: + self.update_used_write_ports(instr, regs) + + for reg in regs: + del self.ww_deps[instr][reg] + del self.wr_deps[instr][reg] + + for rdeps in self.rw_deps.values(): + if rdeps.get(reg) == instr: + rdeps[reg] = None + + for rdeps in self.ww_deps.values(): + if rdeps.get(reg) == instr: + rdeps[reg] = None + + if self.writes[reg] == instr: + self.writes[reg] = None + + if not any(self.ww_deps[instr]): + del self.ww_deps[instr] + del self.wr_deps[instr] + if instr not in self.rw_deps: + self.issued.remove(instr) + + self.write_buff.pop(instr, None) + + # Implements interfaces.Scoreboard + def can_issue(self, instr) -> bool: + if (instr not in self.rw_deps and instr not in self.ww_deps and + instr not in self.wr_deps): + return True + + if any(d and d not in self.issued + for d in self.rw_deps.get(instr, {}).values()): + return False + + if any(d and d not in self.issued + for d in self.ww_deps.get(instr, {}).values()): + return False + + if any(d not in self.issued + for ds in self.wr_deps.get(instr, {}).values() + for d in ds): + return False + + return True + + # Implements interfaces.Scoreboard + def issue(self, instr) -> None: + if (instr not in self.rw_deps and instr not in self.ww_deps and + instr not in self.wr_deps): + return + + self.issued.add(instr) + + def clear_used_ports(self) -> None: + self.used_read_ports = 0 + self.used_write_ports = 0 + + # Implements interfaces.Scoreboard + # pylint: disable-next=useless-parent-delegation + def reset(self, cntr: Counter) -> None: + super().reset(cntr) + + # Implements interfaces.Scoreboard + # pylint: disable-next=useless-parent-delegation + def tick(self, cntr: Counter) -> None: + super().tick(cntr) + + # Implements interfaces.Scoreboard + def tock(self, cntr: Counter) -> None: + super().tock(cntr) + self.clear_used_ports() + + # Implements interfaces.Scoreboard + def pending(self) -> int: + # TODO(sflur): implement? + assert False + + # Implements interfaces.Scoreboard + def print_state_detailed(self, file) -> None: + # TODO(sflur): implement? + assert False + + # Implements interfaces.Scoreboard + def get_state_three_valued_header(self) -> Sequence[str]: + # TODO(sflur): implement? + assert False + + # Implements interfaces.Scoreboard + def get_state_three_valued(self, vals: Sequence[str]) -> Sequence[str]: + # TODO(sflur): implement? + assert False + + +class VecPreemptive(Preemptive): + """Scoreboard for vector registers. + + Each register is sliced to multiple slices. + """ + + def __init__(self, uid: str, desc: Dict[str, Any], slices: int) -> None: + super().__init__(uid, desc) + + self.slices = slices + + self.used_read_ports = [0] * slices + self.used_write_ports = [0] * slices + + def read_port_regs(self, instr, + regs: Sequence[str]) -> Dict[int, Sequence[str]]: + """Return the regs that need to use a non-dedicated read port.""" + + res = {} + + for rs in regs: + r, _, s = rs.rpartition(".") + if (r not in self.dedicated_read_ports + # rw_deps which are not None will be read from + # the write-buffer. + # TODO(sflur): what are the restrictions on the + # write-buffer? + and self.rw_deps[instr][rs] is None): + res.setdefault(int(s), collections.deque()).append(r) + + return res + + def check_read_ports(self, instr, regs) -> bool: + if self.read_ports is None: + return True + + regs = self.read_port_regs(instr, regs) + + return all(self.used_read_ports[s] + len(rs) <= self.read_ports + for s, rs in regs.items()) + + def write_port_regs(self, instr: Instruction, + regs: Sequence[str]) -> Dict[int, Sequence[str]]: + """Return the regs that need to use a non-dedicated write port.""" + + res = {} + + for rs in regs: + r, _, s = rs.rpartition(".") + if r not in self.dedicated_write_ports: + res.setdefault(int(s), collections.deque()).append(r) + + return res + + def check_write_ports(self, instr, regs) -> bool: + if self.write_ports is None: + return True + + regs = self.write_port_regs(instr, regs) + + return all(self.used_write_ports[s] + len(rs) <= self.write_ports + for s, rs in regs.items()) + + def update_used_read_ports(self, instr, regs) -> None: + for s, rs in self.read_port_regs(instr, regs).items(): + self.used_read_ports[s] += len(rs) + + def update_used_write_ports(self, instr, regs) -> None: + for s, rs in self.write_port_regs(instr, regs).items(): + self.used_write_ports[s] += len(rs) + + def clear_used_ports(self) -> None: + for s in range(self.slices): + self.used_read_ports[s] = 0 + self.used_write_ports[s] = 0
diff --git a/vector_pipe.py b/vector_pipe.py new file mode 100644 index 0000000..c06c7bb --- /dev/null +++ b/vector_pipe.py
@@ -0,0 +1,493 @@ +"""VectorPipe module.""" + +import collections +import math +from typing import Any, Dict, Sequence, Optional, Tuple, Union + +from buffered_queue import BufferedQueue +import counter +from counter import Counter +import instruction +from instruction import Instruction +import interfaces +import scoreboard +import utilities + + +class VectorPipe(interfaces.ExecPipeline): + """Vector pipe model. + + This pipeline supports flexible chaining and tailgating. + + That is + - The vector register is split into a number of slices. + - Once a vector instruction starts producing result slices, + they are written to the register file in order at one slice per cycle. + - (See ScoreboardVector for more on issuing vector instructions.) + + Scalar input registers are read at the start of the instruction (first + cycle of first slice). + + Scalar output register are written to at the end of the instruction (last + cycle of the last slice). + """ + + def __init__(self, name: str, kind: str, desc: Dict[str, Any], slices: int, + mem_sys, + rf_scoreboards: Dict[str, Union[scoreboard.Preemptive, + scoreboard.VecPreemptive]] + ) -> None: + super().__init__(name, kind, desc["issue_queue"], desc["depth"]) + + # Execution Issue Queues + self._eiq = BufferedQueue(desc.get("eiq_size")) + self._can_skip_eiq = desc["can_skip_eiq"] + + # The pipeline + self._slices = slices + self._pipelined = desc["pipelined"] + self._stage = collections.deque([None] * self.depth) + self._inflight_instr = None + self._inflight_next_slice = 0 + + # The writeback buffer + self._writebackq = BufferedQueue(desc.get("writeback_buff_size")) + + # Interface to memory + self._mem = (mem_sys.elements[desc["memory_interface"]] + if "memory_interface" in desc else None) + + self._load_stage = desc.get("load_stage") + self._fixed_load_latency = desc.get("fixed_load_latency") + self._stalling_loads = {} + + self._store_stage = desc.get("store_stage") + self._fixed_store_latency = desc.get("fixed_store_latency") + self._stalling_stores = {} + + self._rf_scoreboards = rf_scoreboards + + def eslices(self, instr: Instruction) -> int: + """The number of slices required to execute `instr`.""" + return math.ceil(instr.max_emul() * self._slices) + + def slice(self, accesses: Sequence[int], index: int, + eslices: int) -> Tuple[int, int]: + """The memory access location and size, of a given slice. + + Args: + accesses: a sequence of memory locations, one for each vector element. + index: the index of the required slice. + eslices: the total number of slices `accesses` should be split to. + + # TODO(sflur): return the access size in bytes, instead of `count`. + Return: (access, count), where access is the first byte of the memory + location that should be accessed, and count is the number of elements + that should be accessed. + """ + alen = len(accesses) + slen = alen // eslices + start = index * slen + slen = min(slen, alen - start) + return (accesses[start], slen) + + def reg_read_stall(self, instr: Instruction, s: int) -> bool: + return any(not self._rf_scoreboards[rf].can_read(instr, seq[s]) + for rf, seq in self.input_seq_by_type(instr).items() + if len(seq) > s) + + def reg_write_stall(self, instr: Instruction, s: int) -> bool: + return any(not self._rf_scoreboards[rf].can_write(instr, seq[s]) + for rf, seq in self.output_seq_by_type(instr).items() + if len(seq) > s) + + def sb_reg_read(self, instr: Instruction, s: int) -> None: + for rf, seq in self.input_seq_by_type(instr).items(): + if len(seq) > s and seq[s]: + self._rf_scoreboards[rf].read(instr, seq[s]) + + def sb_buff_reg_write(self, instr: Instruction, s: int) -> None: + for rf, seq in self.output_seq_by_type(instr).items(): + if len(seq) > s: + self._rf_scoreboards[rf].buff_write(instr, seq[s]) + + def sb_reg_write(self, instr: Instruction, s: int) -> None: + for rf, seq in self.output_seq_by_type(instr).items(): + if len(seq) > s: + self._rf_scoreboards[rf].write(instr, seq[s]) + + def do_reg_writeback(self) -> None: + if self._writebackq: + instr, s = self._writebackq[0] + if not self.reg_write_stall(instr, s): + self.sb_reg_write(instr, s) + self._writebackq.popleft() + if s + 1 == self.eslices(instr): + self.retired_instrs.append(instr) + + def stall(self, cntr: Counter) -> bool: + # Check if last stage needs to do reg writes, and the writeback buffer + # is full. + if (self._stage[-1] and any( + self._stage[-1][1] < len(seq) and seq[self._stage[-1][1]] + for _, seq in + self.output_seq_by_type(self._stage[-1][0]).items()) and + self._writebackq.is_buffer_full()): + return True + + # Check if memory accesses are waiting for reply. + if (any(self._stalling_loads.values()) or + any(self._stalling_stores.values())): + cntr.vector_load_store_stall += 1 + return True + + return False + + def do_load(self) -> None: + if self._load_stage is None: + return + + if (self._stage[self._load_stage] and + self._stage[self._load_stage][0].loads): + instr, s = self._stage[self._load_stage] + load, _size = self.slice(instr.loads, s, self.eslices(instr)) + if (instr, s, load) not in self._stalling_loads: + # TODO(sflur): pass size to issue_load + self._mem.issue_load((instr, s), load) + self._stalling_loads[(instr, s, load)] = None + + if (self._stage[self._load_stage + self._fixed_load_latency] and + self._stage[self._load_stage + + self._fixed_load_latency][0].loads): + instr, s = self._stage[self._load_stage + self._fixed_load_latency] + load, _ = self.slice(instr.loads, s, self.eslices(instr)) + if self._stalling_loads[(instr, s, load)] is None: + self._stalling_loads[(instr, s, load)] = True + for load in self._mem.take_load_replys((instr, s)): + self._stalling_loads[(instr, s, load)] = False + + def do_store(self) -> None: + if self._store_stage is None: + return + + if self._stage[self._store_stage] and self._stage[ + self._store_stage][0].stores: + instr, s = self._stage[self._store_stage] + store, _size = self.slice(instr.stores, s, self.eslices(instr)) + if (instr, s, store) not in self._stalling_stores: + # TODO(sflur): pass size to issue_store + self._mem.issue_store((instr, s), store) + self._stalling_stores[(instr, s, store)] = None + + if (self._stage[self._store_stage + self._fixed_store_latency] and + self._stage[self._store_stage + + self._fixed_store_latency][0].stores): + instr, s = self._stage[self._store_stage + + self._fixed_store_latency] + store, _ = self.slice(instr.stores, s, self.eslices(instr)) + if self._stalling_stores[(instr, s, store)] is None: + self._stalling_stores[(instr, s, store)] = True + for store in self._mem.take_store_replys((instr, s)): + self._stalling_stores[(instr, s, store)] = False + + # Implements interfaces.ExecPipeline + def reset(self, cntr: Counter) -> None: + super().reset(cntr) + # TODO(sflur): implement proper reset + cntr.utilizations[f"{self.name}.eiq"] = counter.Utilization( + self._eiq.size) + cntr.utilizations[f"{self.name}.pipe"] = counter.Utilization( + len(self._stage)) + cntr.utilizations[f"{self.name}.wbq"] = counter.Utilization( + self._writebackq.size) + + # Implements interfaces.ExecPipeline + def tick(self, cntr: Counter) -> None: + super().tick(cntr) + + self.retired_instrs.clear() + + self.do_reg_writeback() + + if not self.stall(cntr): + # Cleanup self.stalling_loads + if (self._load_stage is not None and + self._stage[self._load_stage + self._fixed_load_latency] and + self._stage[self._load_stage + + self._fixed_load_latency][0].loads): + instr, s = self._stage[self._load_stage + + self._fixed_load_latency] + load, _ = self.slice(instr.loads, s, self.eslices(instr)) + # The assertion holds because self.stall() above is True. + assert not self._stalling_loads.get((instr, s, load), False) + del self._stalling_loads[(instr, s, load)] + + # Cleanup self.stalling_stores + if (self._store_stage is not None and + self._stage[self._store_stage + + self._fixed_store_latency] and + self._stage[self._store_stage + + self._fixed_store_latency][0].stores): + instr, s = self._stage[self._store_stage + + self._fixed_store_latency] + store, _ = self.slice(instr.stores, s, self.eslices(instr)) + # The assertion holds because self.stall() above is True. + assert not self._stalling_stores.get((instr, s, store), False) + del self._stalling_stores[(instr, s, store)] + + # Shift stages + st = self._stage.pop() + if st: + instr, s = st + if any( + len(seq) > s and seq[s] + for _, seq in self.output_seq_by_type(instr).items()): + self._writebackq.buffer((instr, s)) + cntr.utilizations[f"{self.name}.wbq"].count += 1 + self.sb_buff_reg_write(instr, s) + elif s + 1 == self.eslices(instr): + self.retired_instrs.append(instr) + + # Issue the next slice into the piprline. + if (self._inflight_instr and not self.reg_read_stall( + self._inflight_instr, self._inflight_next_slice)): + self.sb_reg_read(self._inflight_instr, + self._inflight_next_slice) + + self._stage.appendleft( + (self._inflight_instr, self._inflight_next_slice)) + cntr.utilizations[f"{self.name}.pipe"].count += 1 + self._inflight_next_slice += 1 + if self._inflight_next_slice == self.eslices( + self._inflight_instr): + self._inflight_instr = None + else: + self._stage.appendleft(None) + + self.do_load() + self.do_store() + + # Try to issue each of the instructions in `eiq`. + if self.is_ready(): + for _ in range(len(self._eiq)): + instr = self._eiq.popleft() + if self.try_issue(instr, cntr): + break + + self._eiq.append(instr) + + # Implements interfaces.ExecPipeline + def tock(self, cntr: Counter) -> None: + super().tock(cntr) + + self.retired_instrs.clear() + + cntr.utilizations[f"{self.name}.pipe"].occupied += len( + list(1 for i in self._stage if i)) + + self._eiq.flush() + cntr.utilizations[f"{self.name}.eiq"].occupied += len(self._eiq) + + self._writebackq.flush() + cntr.utilizations[f"{self.name}.wbq"].occupied += len(self._writebackq) + + # Implements interfaces.ExecPipeline + def pending(self) -> int: + eiq_count = len(list(self._eiq.chain())) + pipe_count = len(list(1 for i in self._stage if i)) + wbq_count = len(list(self._writebackq.chain())) + return eiq_count + pipe_count + wbq_count + + # Implements interfaces.ExecPipeline + def try_dispatch(self, instr: Instruction, cntr: Counter) -> bool: + if self._eiq.is_buffer_full(): + return False + + inputs = self.input_seq_by_type(instr) + outputs = self.output_seq_by_type(instr) + + for rf in inputs.keys() | outputs.keys(): + reads = utilities.flatten(inputs.get(rf, [])) + writes = utilities.flatten(outputs.get(rf, [])) + self._rf_scoreboards[rf].insert_accesses(instr, reg_reads=reads, + reg_writes=writes) + + if not (self._can_skip_eiq and self.is_ready() and + self.try_issue(instr, cntr)): + self._eiq.buffer(instr) + cntr.utilizations[f"{self.name}.eiq"].count += 1 + + if instr.loads or instr.stores: + cntr.vector_load_store += 1 + + return True + + def is_ready(self) -> bool: + """Check if the pipe is ready to accept a new instruction.""" + if self._pipelined: + return self._inflight_instr is None and self._stage[0] is None + + return self._inflight_instr is None and all( + s is None for s in self._stage) + + def try_issue(self, instr: Instruction, cntr: Counter) -> bool: + """Issue an instruction.""" + + if not all(sb.can_issue(instr) for sb in self._rf_scoreboards.values()): + return False + + if self.reg_read_stall(instr, 0): + return False + + assert self._stage[0] is None + self._stage[0] = (instr, 0) + cntr.utilizations[f"{self.name}.pipe"].count += 1 + + assert self._inflight_instr is None + if 1 < self.eslices(instr): + self._inflight_instr = instr + self._inflight_next_slice = 1 + + for sb in self._rf_scoreboards.values(): + sb.issue(instr) + + self.sb_reg_read(instr, 0) + + return True + + def vec_reg_seq(self, reg: str, input_reg: bool, emul: Union[int, float], + max_emul: Union[int, float]) -> Sequence[Optional[str]]: + base = int(reg[1:]) + if emul < 1: + seq = [f"{reg}.{s}" for s in range(math.ceil(emul * self._slices))] + else: + emul = int(emul) + seq = [ + f"{reg[0]}{base + g}.{s}" for g in range(emul) + for s in range(self._slices) + ] + + if (emul == max_emul or (emul < 1 and self._slices < 1 / emul)): + return seq + + assert int(max_emul / emul) == 2 + + # Interleave Nones with `seq` + if input_reg: + seq = zip(seq, [None] * len(seq)) + else: + seq = zip([None] * len(seq), seq) + return utilities.flatten(seq) + + def input_seq(self, instr: Instruction, reg: str) -> Sequence[str]: + if instruction.is_vector_register(reg): + assert instr.lmul is not None + + # TODO(sflur): anymore cases of input widening? + if ((instr.mnemonic.endswith(".wv") or + instr.mnemonic.endswith(".wx") or + instr.mnemonic.endswith(".wf") or + instr.mnemonic.endswith(".wi")) and instr.operands[1] == reg): + emul = 2 * instr.lmul + else: + emul = instr.lmul + + return self.vec_reg_seq(reg, True, emul, instr.max_emul()) + + return [reg] + + def output_seq(self, instr: Instruction, reg: str) -> Sequence[str]: + if instruction.is_vector_register(reg): + assert instr.lmul is not None + + # TODO(sflur): anymore cases of output widening? + if ((instr.mnemonic.startswith("vw") or + instr.mnemonic.startswith("vfw")) and + instr.operands[0] == reg): + emul = 2 * instr.lmul + else: + emul = instr.lmul + + return self.vec_reg_seq(reg, False, emul, instr.max_emul()) + + res = [None] * self.eslices(instr) + res[-1] = reg + return res + + def input_seq_by_type( + self, instr: Instruction) -> Dict[str, Sequence[Sequence[str]]]: + """Compute a map from register-files to sequences of input register + sets. + + A register file is mapped to a sequence of sets, where set i is the set + of registers that will be read from by slice i. + """ + res = {} + + for ty, regs in instr.inputs_by_type().items(): + seq = [collections.deque() for _ in range(self.eslices(instr))] + + for reg in regs: + for i, r in enumerate(self.input_seq(instr, reg)): + if r: + seq[i].append(r) + + res[ty] = seq + + return res + + def output_seq_by_type( + self, instr: Instruction) -> Dict[str, Sequence[Sequence[str]]]: + """Compute a map from register-files to sequences of output register + sets. + + A register file is mapped to a sequence of sets, where set i is the set + of registers that will be written to by slice i. + """ + res = {} + + for ty, regs in instr.outputs_by_type().items(): + seq = [collections.deque() for _ in range(self.eslices(instr))] + + for reg in regs: + for i, r in enumerate(self.output_seq(instr, reg)): + if r: + seq[i].append(r) + + res[ty] = seq + + return res + + # Implements interfaces.ExecPipeline + def print_state_detailed(self, file) -> None: + eiq_str = ", ".join(str(i) for i in reversed(list(self._eiq.chain()))) + stages = ", ".join(f"{i[0]} ({i[1]})" if i else "-" + for i in self._stage) + wbq_str = ", ".join(f"{i[0]} ({i[1]})" + for i in reversed(list(self._writebackq.chain()))) + + pipe_str = (f"{eiq_str if eiq_str else '-'}" + f" > {stages}" + f" > {wbq_str if wbq_str else '-'}") + + print(f"[{self.name}] {pipe_str}", file=file) + + # Implements interfaces.ExecPipeline + def get_state_three_valued_header(self) -> Sequence[str]: + return ["eiq", self.kind, "wbq"] + + # Implements interfaces.ExecPipeline + def get_state_three_valued(self, vals: Sequence[str]) -> Sequence[str]: + if all(self._stage): + # Full + pipe_str = vals[2] + elif any(self._stage): + # Partial + pipe_str = vals[1] + else: + # Empty + pipe_str = vals[0] + + return [self._eiq.pp_three_valued(vals), + pipe_str, + self._writebackq.pp_three_valued(vals)]