Initial commit of cheriot ibex into hw/matcha
-Sync to commit 31dbab1
Bypass-Presubmit-Reason: failed test related to test environment change
Change-Id: I28699fb4cd29b805c60549251b4980c96f2c177b
diff --git a/hw/ip/cheriot-ibex/cheriot_core.core b/hw/ip/cheriot-ibex/cheriot_core.core
new file mode 100644
index 0000000..31ac2b7
--- /dev/null
+++ b/hw/ip/cheriot-ibex/cheriot_core.core
@@ -0,0 +1,186 @@
+CAPI=2:
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+name: "lowrisc:ibex:cheriot_core:0.1"
+description: "Ibex CPU Core Components"
+
+filesets:
+ files_rtl:
+ depend:
+ - lowrisc:prim:assert
+ - lowrisc:prim:clock_gating
+ - lowrisc:prim:lfsr
+ - lowrisc:ibex:cheriot_pkg
+ - lowrisc:ibex:cheriot_icache
+ - lowrisc:dv:dv_fcov_macros
+ files:
+ - rtl/cheri_decoder.sv
+ - rtl/cheri_ex.sv
+ - rtl/cheri_tbre.sv
+ - rtl/cheri_stkz.sv
+ - rtl/cheri_tbre_wrapper.sv
+ - rtl/cheri_trvk_stage.sv
+ - rtl/cheriot_alu.sv
+ - rtl/cheriot_branch_predict.sv
+ - rtl/cheriot_compressed_decoder.sv
+ - rtl/cheriot_controller.sv
+ - rtl/cheriot_cs_registers.sv
+ - rtl/cheriot_csr.sv
+ - rtl/cheriot_counter.sv
+ - rtl/cheriot_decoder.sv
+ - rtl/cheriot_ex_block.sv
+ - rtl/cheriot_fetch_fifo.sv
+ - rtl/cheriot_id_stage.sv
+ - rtl/cheriot_if_stage.sv
+ - rtl/cheriot_load_store_unit.sv
+ - rtl/cheriot_multdiv_fast.sv
+ - rtl/cheriot_multdiv_slow.sv
+ - rtl/cheriot_prefetch_buffer.sv
+ - rtl/cheriot_pmp.sv
+ - rtl/cheriot_wb_stage.sv
+ - rtl/cheriot_dummy_instr.sv
+ - rtl/cheriot_core.sv
+ - rtl/cheriot_pmp_reset_default.svh: {is_include_file: true}
+ file_type: systemVerilogSource
+
+ files_lint_verilator:
+ files:
+ - lint/verilator_waiver.vlt: {file_type: vlt}
+
+ files_lint_verible:
+ files:
+ - lint/verible_waiver.vbw: {file_type: veribleLintWaiver}
+
+ files_check_tool_requirements:
+ depend:
+ - lowrisc:tool:check_tool_requirements
+
+parameters:
+ RVFI:
+ datatype: bool
+ paramtype: vlogdefine
+
+ SYNTHESIS:
+ datatype: bool
+ paramtype: vlogdefine
+
+ FPGA_XILINX:
+ datatype: bool
+ description: Identifies Xilinx FPGA targets to set DSP pragmas for performance counters.
+ default: false
+ paramtype: vlogdefine
+
+ RV32E:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+
+ RV32M:
+ datatype: str
+ default: cheriot_pkg::RV32MFast
+ paramtype: vlogdefine
+ description: "RV32M implementation parameter enum. See the cheriot_pkg::rv32m_e enum in cheriot_pkg.sv for permitted values."
+
+ RV32B:
+ datatype: str
+ default: cheriot_pkg::RV32BNone
+ paramtype: vlogdefine
+ description: "Bitmanip implementation parameter enum. See the cheriot_pkg::rv32b_e enum in cheriot_pkg.sv for permitted values."
+
+ RegFile:
+ datatype: str
+ default: cheriot_pkg::RegFileFF
+ paramtype: vlogdefine
+ description: "Register file implementation parameter enum. See the cheriot_pkg::regfile_e enum in cheriot_pkg.sv for permitted values."
+
+ ICache:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enable instruction cache"
+
+ ICacheECC:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enable ECC protection in instruction cache"
+
+ BranchTargetALU:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enables separate branch target ALU (increasing branch performance EXPERIMENTAL) [0/1]"
+
+ WritebackStage:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enables third pipeline stage (EXPERIMENTAL) [0/1]"
+
+ BranchPredictor:
+ datatype: int
+ paramtype: vlogparam
+ default: 0
+ description: "Enables static branch prediction (EXPERIMENTAL)"
+
+ SecureIbex:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enables security hardening features (EXPERIMENTAL) [0/1]"
+
+ PMPEnable:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enable PMP"
+
+ PMPGranularity:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Granularity of NAPOT range, 0 = 4 byte, 1 = 8 byte, 2 = 16 byte, 3 = 32 byte etc"
+
+ PMPNumRegions:
+ datatype: int
+ default: 4
+ paramtype: vlogparam
+ description: "Number of PMP regions"
+
+targets:
+ default: &default_target
+ filesets:
+ - tool_verilator ? (files_lint_verilator)
+ - tool_veriblelint ? (files_lint_verible)
+ - files_rtl
+ - files_check_tool_requirements
+ toplevel: cheriot_core
+ parameters:
+ - tool_vivado ? (FPGA_XILINX=true)
+ lint:
+ <<: *default_target
+ parameters:
+ - SYNTHESIS=true
+ - RVFI=true
+ default_tool: verilator
+ tools:
+ verilator:
+ mode: lint-only
+ verilator_options:
+ - "-Wall"
+ # RAM primitives wider than 64bit (required for ECC) fail to build in
+ # Verilator without increasing the unroll count (see Verilator#1266)
+ - "--unroll-count 72"
+ format:
+ filesets:
+ - files_rtl
+ parameters:
+ - SYNTHESIS=true
+ - RVFI=true
+ default_tool: veribleformat
+ toplevel: cheriot_core
+ tools:
+ veribleformat:
+ verible_format_args:
+ - "--inplace"
diff --git a/hw/ip/cheriot-ibex/cheriot_icache.core b/hw/ip/cheriot-ibex/cheriot_icache.core
new file mode 100644
index 0000000..6f963c5
--- /dev/null
+++ b/hw/ip/cheriot-ibex/cheriot_icache.core
@@ -0,0 +1,22 @@
+CAPI=2:
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+name: "lowrisc:ibex:cheriot_icache:0.1"
+description: "Ibex instruction cache"
+filesets:
+ files_rtl:
+ depend:
+ - lowrisc:prim:secded
+ - lowrisc:prim:assert
+ - lowrisc:ibex:cheriot_pkg
+ files:
+ - rtl/cheriot_icache.sv
+ file_type: systemVerilogSource
+
+targets:
+ default: &default_target
+ filesets:
+ - files_rtl
+ toplevel: cheriot_icache
+ default_tool: vcs
diff --git a/hw/ip/cheriot-ibex/cheriot_multdiv.core b/hw/ip/cheriot-ibex/cheriot_multdiv.core
new file mode 100644
index 0000000..6898853
--- /dev/null
+++ b/hw/ip/cheriot-ibex/cheriot_multdiv.core
@@ -0,0 +1,28 @@
+CAPI=2:
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+name: "lowrisc:ibex:ibex_multdiv:0.1"
+description: "Multiplier and divider"
+
+filesets:
+ files_rtl:
+ depend:
+ - lowrisc:prim:assert
+ - lowrisc:ibex:cheriot_pkg
+ files:
+ - rtl/cheriot_multdiv_fast.sv
+ - rtl/cheriot_multdiv_slow.sv
+ file_type: systemVerilogSource
+
+parameters:
+ RV32M:
+ datatype: int
+ default: 2
+ paramtype: vlogparam
+ description: "Selection of multiplication implementation. Switch to enable single cycle multiplications."
+
+targets:
+ default: &default_target
+ filesets:
+ - files_rtl
diff --git a/hw/ip/cheriot-ibex/cheriot_pkg.core b/hw/ip/cheriot-ibex/cheriot_pkg.core
new file mode 100644
index 0000000..4c60a18
--- /dev/null
+++ b/hw/ip/cheriot-ibex/cheriot_pkg.core
@@ -0,0 +1,18 @@
+CAPI=2:
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+name: "lowrisc:ibex:cheriot_pkg:0.1"
+description: "Header package for Ibex"
+
+filesets:
+ files_rtl:
+ files:
+ - rtl/cheriot_pkg.sv
+ - rtl/cheri_pkg.sv
+ file_type: systemVerilogSource
+
+targets:
+ default:
+ filesets:
+ - files_rtl
diff --git a/hw/ip/cheriot-ibex/cheriot_top.core b/hw/ip/cheriot-ibex/cheriot_top.core
new file mode 100644
index 0000000..5d08123
--- /dev/null
+++ b/hw/ip/cheriot-ibex/cheriot_top.core
@@ -0,0 +1,175 @@
+CAPI=2:
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+name: "lowrisc:ibex:cheriot_top:0.1"
+description: "Ibex, a small RV32 CPU core"
+
+filesets:
+ files_rtl:
+ depend:
+ - lowrisc:ibex:cheriot_pkg
+ - lowrisc:ibex:cheriot_core
+ - lowrisc:prim:buf
+ - lowrisc:prim:clock_mux2
+ - lowrisc:prim:flop
+ - lowrisc:prim:ram_1p_scr
+ files:
+ - rtl/cheriot_register_file_ff.sv # generic FF-based
+ - rtl/cheriot_register_file_fpga.sv # FPGA
+ - rtl/cheriot_register_file_latch.sv # ASIC
+ - rtl/cheri_regfile.sv # generic FF-based
+ - rtl/cheriot_lockstep.sv
+ - rtl/cheriot_top.sv
+ file_type: systemVerilogSource
+
+ files_lint_verilator:
+ files:
+ - lint/verilator_waiver.vlt: {file_type: vlt}
+
+ files_lint_verible:
+ files:
+ - lint/verible_waiver.vbw: {file_type: veribleLintWaiver}
+
+ files_check_tool_requirements:
+ depend:
+ - lowrisc:tool:check_tool_requirements
+
+parameters:
+ RVFI:
+ datatype: bool
+ paramtype: vlogdefine
+
+ SYNTHESIS:
+ datatype: bool
+ paramtype: vlogdefine
+
+ FPGA_XILINX:
+ datatype: bool
+ description: Identifies Xilinx FPGA targets to set DSP pragmas for performance counters.
+ default: false
+ paramtype: vlogdefine
+
+ RV32E:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+
+ RV32M:
+ datatype: str
+ default: cheriot_pkg::RV32MFast
+ paramtype: vlogdefine
+ description: "RV32M implementation parameter enum. See the cheriot_pkg::rv32m_e enum in cheriot_pkg.sv for permitted values."
+
+ RV32B:
+ datatype: str
+ default: cheriot_pkg::RV32BNone
+ paramtype: vlogdefine
+ description: "Bitmanip implementation parameter enum. See the cheriot_pkg::rv32b_e enum in cheriot_pkg.sv for permitted values."
+
+ RegFile:
+ datatype: str
+ default: cheriot_pkg::RegFileFF
+ paramtype: vlogdefine
+ description: "Register file implementation parameter enum. See the cheriot_pkg::regfile_e enum in cheriot_pkg.sv for permitted values."
+
+ ICache:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enable instruction cache"
+
+ ICacheECC:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enable ECC protection in instruction cache"
+
+ BranchTargetALU:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enables separate branch target ALU (increasing branch performance EXPERIMENTAL) [0/1]"
+
+ WritebackStage:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enables third pipeline stage (EXPERIMENTAL) [0/1]"
+
+ BranchPredictor:
+ datatype: int
+ paramtype: vlogparam
+ default: 0
+ description: "Enables static branch prediction (EXPERIMENTAL)"
+
+ SecureIbex:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enables security hardening features (EXPERIMENTAL) [0/1]"
+
+ ICacheScramble:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enables ICache scrambling feature (EXPERIMENTAL) [0/1]"
+
+ PMPEnable:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enable PMP"
+
+ PMPGranularity:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Granularity of NAPOT range, 0 = 4 byte, 1 = 8 byte, 2 = 16 byte, 3 = 32 byte etc"
+
+ PMPNumRegions:
+ datatype: int
+ default: 4
+ paramtype: vlogparam
+ description: "Number of PMP regions"
+
+targets:
+ default: &default_target
+ filesets:
+ - tool_verilator ? (files_lint_verilator)
+ - tool_veriblelint ? (files_lint_verible)
+ - files_rtl
+ - files_check_tool_requirements
+ toplevel: cheriot_top
+ parameters:
+ - tool_vivado ? (FPGA_XILINX=true)
+ lint:
+ <<: *default_target
+ parameters:
+ - SYNTHESIS=true
+ - RVFI=true
+ default_tool: verilator
+ tools:
+ verilator:
+ mode: lint-only
+ verilator_options:
+ - "-Wall"
+ # RAM primitives wider than 64bit (required for ECC) fail to build in
+ # Verilator without increasing the unroll count (see Verilator#1266)
+ - "--unroll-count 72"
+ format:
+ filesets:
+ - files_rtl
+ parameters:
+ - SYNTHESIS=true
+ - RVFI=true
+ default_tool: veribleformat
+ toplevel: cheriot_top
+ tools:
+ veribleformat:
+ verible_format_args:
+ - "--inplace"
+ - "--formal_parameters_indentation=indent"
+ - "--named_parameter_indentation=indent"
+ - "--named_port_indentation=indent"
+ - "--port_declarations_indentation=indent"
diff --git a/hw/ip/cheriot-ibex/cheriot_top_tracing.core b/hw/ip/cheriot-ibex/cheriot_top_tracing.core
new file mode 100644
index 0000000..48c6995
--- /dev/null
+++ b/hw/ip/cheriot-ibex/cheriot_top_tracing.core
@@ -0,0 +1,161 @@
+CAPI=2:
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+name: "lowrisc:ibex:cheriot_top_tracing:0.1"
+description: "Ibex, a small RV32 CPU core with tracing enabled"
+filesets:
+ files_rtl:
+ depend:
+ - lowrisc:ibex:cheriot_top
+ - lowrisc:ibex:cheriot_tracer
+ files:
+ - rtl/cheriot_top_tracing.sv
+ file_type: systemVerilogSource
+
+parameters:
+ # The tracer uses the RISC-V Formal Interface (RVFI) to collect trace signals.
+ RVFI:
+ datatype: bool
+ paramtype: vlogdefine
+ default: true
+
+ SYNTHESIS:
+ datatype: bool
+ paramtype: vlogdefine
+
+ RV32E:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+
+ RV32M:
+ datatype: str
+ default: cheriot_pkg::RV32MFast
+ paramtype: vlogdefine
+ description: "RV32M implementation parameter enum. See the cheriot_pkg::rv32m_e enum in cheriot_pkg.sv for permitted values."
+
+ RV32B:
+ datatype: str
+ default: cheriot_pkg::RV32BNone
+ paramtype: vlogdefine
+ description: "Bitmanip implementation parameter enum. See the cheriot_pkg::rv32b_e enum in cheriot_pkg.sv for permitted values."
+
+ RegFile:
+ datatype: str
+ default: cheriot_pkg::RegFileFF
+ paramtype: vlogdefine
+ description: "Register file implementation parameter enum. See the cheriot_pkg::regfile_e enum in cheriot_pkg.sv for permitted values."
+
+ ICache:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enable instruction cache"
+
+ ICacheECC:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enable ECC protection in instruction cache"
+
+ BranchTargetALU:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enables separate branch target ALU (increasing branch performance EXPERIMENTAL) [0/1]"
+
+ WritebackStage:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enables third pipeline stage (EXPERIMENTAL) [0/1]"
+
+ BranchPredictor:
+ datatype: int
+ paramtype: vlogparam
+ default: 0
+ description: "Enables static branch prediction (EXPERIMENTAL)"
+
+ SecureIbex:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enables security hardening features (EXPERIMENTAL) [0/1]"
+
+ ICacheScramble:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enables ICache scrambling feature (EXPERIMENTAL) [0/1]"
+
+ PMPEnable:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Enable PMP"
+
+ PMPGranularity:
+ datatype: int
+ default: 0
+ paramtype: vlogparam
+ description: "Granularity of NAPOT range, 0 = 4 byte, 1 = 8 byte, 2 = 16 byte, 3 = 32 byte etc"
+
+ PMPNumRegions:
+ datatype: int
+ default: 4
+ paramtype: vlogparam
+ description: "Number of PMP regions"
+
+targets:
+ default: &default_target
+ filesets:
+ - files_rtl
+ parameters:
+ - RVFI=true
+ toplevel: cheriot_top_tracing
+
+ lint:
+ <<: *default_target
+ parameters:
+ - RVFI=true
+ - SYNTHESIS=true
+ - RV32E
+ - RV32M
+ - RV32B
+ - RegFile
+ - ICache
+ - ICacheECC
+ - BranchTargetALU
+ - WritebackStage
+ - BranchPredictor
+ - SecureIbex
+ - ICacheScramble
+ - PMPEnable
+ - PMPGranularity
+ - PMPNumRegions
+ default_tool: verilator
+ tools:
+ verilator:
+ mode: lint-only
+ verilator_options:
+ - "-Wall"
+ # RAM primitives wider than 64bit (required for ECC) fail to build in
+ # Verilator without increasing the unroll count (see Verilator#1266)
+ - "--unroll-count 72"
+ format:
+ filesets:
+ - files_rtl
+ parameters:
+ - SYNTHESIS=true
+ - RVFI=true
+ default_tool: veribleformat
+ toplevel: cheriot_top_tracing
+ tools:
+ veribleformat:
+ verible_format_args:
+ - "--inplace"
+ - "--formal_parameters_indentation=indent"
+ - "--named_parameter_indentation=indent"
+ - "--named_port_indentation=indent"
+ - "--port_declarations_indentation=indent"
diff --git a/hw/ip/cheriot-ibex/cheriot_tracer.core b/hw/ip/cheriot-ibex/cheriot_tracer.core
new file mode 100644
index 0000000..e9bbce5
--- /dev/null
+++ b/hw/ip/cheriot-ibex/cheriot_tracer.core
@@ -0,0 +1,20 @@
+CAPI=2:
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+name: "lowrisc:ibex:cheriot_tracer:0.1"
+description: "Tracer for use with Ibex using the RVFI interface"
+filesets:
+ files_rtl:
+ depend:
+ - lowrisc:prim:assert
+ - lowrisc:ibex:cheriot_pkg
+ files:
+ - rtl/cheriot_tracer_pkg.sv
+ - rtl/cheriot_tracer.sv
+ file_type: systemVerilogSource
+
+targets:
+ default:
+ filesets:
+ - files_rtl
diff --git a/hw/ip/cheriot-ibex/lint/verible_waiver.vbw b/hw/ip/cheriot-ibex/lint/verible_waiver.vbw
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/hw/ip/cheriot-ibex/lint/verible_waiver.vbw
diff --git a/hw/ip/cheriot-ibex/lint/verilator_waiver.vlt b/hw/ip/cheriot-ibex/lint/verilator_waiver.vlt
new file mode 100644
index 0000000..b7c952c
--- /dev/null
+++ b/hw/ip/cheriot-ibex/lint/verilator_waiver.vlt
@@ -0,0 +1,72 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Lint waivers for Verilator
+// See https://www.veripool.org/projects/verilator/wiki/Manual-verilator#CONFIGURATION-FILES
+// for documentation.
+//
+// Important: This file must included *before* any other Verilog file is read.
+// Otherwise, only global waivers are applied, but not file-specific waivers.
+
+`verilator_config
+lint_off -rule PINCONNECTEMPTY
+
+// We have some boolean top-level parameters in e.g. ibex_core_tracing.sv.
+// When building with fusesoc, these get set with defines like
+// -GRV32M=1 (rather than -GRV32M=1'b1), leading to warnings like:
+//
+// Operator VAR '<varname>' expects 1 bits on the Initial value, but
+// Initial value's CONST '32'h1' generates 32 bits.
+//
+// This signoff rule ignores errors like this. Note that it only
+// matches when you set a 1-bit value to a literal 1, so it won't hide
+// silly mistakes like setting it to 2.
+//
+lint_off -rule WIDTH -file "*/rtl/ibex_top_tracing.sv"
+ -match "*expects 1 bits*Initial value's CONST '32'h1'*"
+
+// Operator expects 1 bit on initial value but initial value's CONST generates
+// 32 bits, need a specific RV32B waiver as it uses enums so the above catch-all
+// waiver doesn't work.
+lint_off -rule WIDTH -file "*/rtl/ibex_top_tracing.sv" -match "*'RV32B'*"
+
+// Bits of signal are not used: be_i[3:1]
+// Bits of signal are not used: addr_i[31:10,1:0]
+// Bits of signal are not used: wdata_i[31:8]
+//
+// simulator_ctrl exposes a 32-bit write-only interface to its control
+// registers, but actually only looks at the bottom byte and rounds addresses
+// down to be 4-byte aligned.
+//
+lint_off -rule UNUSED -file "*/rtl/sim/simulator_ctrl.sv" -match "*'be_i'[3:1]*"
+lint_off -rule UNUSED -file "*/rtl/sim/simulator_ctrl.sv" -match "*'addr_i'[31:10,1:0]*"
+lint_off -rule UNUSED -file "*/rtl/sim/simulator_ctrl.sv" -match "*'wdata_i'[31:8]*"
+
+// Bits of signal are not used: timer_addr_i[31:10]
+//
+// The upper bits of this address are used to select whether the timer is
+// addressed at all (encoded in the timer_req_i input). However, we pass the
+// entire 32-bit address around to make the code a bit cleaner.
+lint_off -rule UNUSED -file "*/rtl/timer.sv" -match "*'timer_addr_i'[31:10]*"
+
+// Signal is not used: clk_i
+// leaving clk and reset connected in-case we want to add assertions
+lint_off -rule UNUSED -file "*/rtl/ibex_pmp.sv" -match "*clk_i*"
+lint_off -rule UNUSED -file "*/rtl/ibex_compressed_decoder.sv" -match "*clk_i*"
+lint_off -rule UNUSED -file "*/rtl/ibex_decoder.sv" -match "*clk_i*"
+lint_off -rule UNUSED -file "*/rtl/ibex_branch_predict.sv" -match "*clk_i*"
+
+// Signal is not used: rst_ni
+// leaving clk and reset connected in-case we want to add assertions
+lint_off -rule UNUSED -file "*/rtl/ibex_pmp.sv" -match "*rst_ni*"
+lint_off -rule UNUSED -file "*/rtl/ibex_compressed_decoder.sv" -match "*rst_ni*"
+lint_off -rule UNUSED -file "*/rtl/ibex_decoder.sv" -match "*rst_ni*"
+lint_off -rule UNUSED -file "*/rtl/ibex_branch_predict.sv" -match "*rst_ni*"
+
+// Temporary waivers until OpenTitan primitives are lint-clean
+// https://github.com/lowRISC/opentitan/issues/2313
+lint_off -file "*/lowrisc_prim_*/rtl/*.sv"
+
+lint_off -rule UNUSED -file "*/rtl/ibex_top_tracing.sv" -match "*RndCnstLfsrSeed*"
+lint_off -rule UNUSED -file "*/rtl/ibex_top_tracing.sv" -match "*RndCnstLfsrPerm*"
diff --git a/hw/ip/cheriot-ibex/rtl/cheri_decoder.sv b/hw/ip/cheriot-ibex/rtl/cheri_decoder.sv
new file mode 100644
index 0000000..113c95f
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheri_decoder.sv
@@ -0,0 +1,130 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Cheri instruction decoder
+// should we merge this with cheri_EX? let's leave it alone for now since we may look into
+// a separate decoder PL stage later
+
+module cheri_decoder import cheri_pkg::*; # (
+ parameter bit CheriPPLBC = 1'b1,
+ parameter bit CheriSBND2 = 1'b0
+) (
+ input logic [31:0] instr_rdata_i,
+ input logic cheri_opcode_en_i, // op = 0x5b
+ input logic cheri_tsafe_en_i,
+ input logic cheri_auipcc_en_i, // op = 0x17 (AUIPC)
+ input logic cheri_auicgp_en_i, // op = 0x7b (AUIGCP)
+ input logic cheri_jalr_en_i, // op = 0x67 (JALR)
+ input logic cheri_jal_en_i, // op = 0x6f (JAL)
+ input logic cheri_cload_en_i, // op = 0x3, [14:12] = 0x3 (LD)
+ input logic cheri_cstore_en_i, // op = 0x23, [14:12] = 0x3 (SD)
+ output logic instr_is_cheri_o, // instr in cheri space
+ output logic instr_is_legal_cheri_o, // legal cheri instruction
+ output logic [11:0] cheri_imm12_o,
+ output logic [19:0] cheri_imm20_o,
+ output logic [20:0] cheri_imm21_o,
+ output logic [OPDW-1:0] cheri_operator_o,
+ output logic [4:0] cheri_cs2_dec_o,
+ output logic cheri_rf_ren_a_o,
+ output logic cheri_rf_ren_b_o,
+ output logic cheri_rf_we_dec_o,
+ output logic cheri_multicycle_dec_o
+ );
+
+ logic [6:0] unused_opcode;
+ logic [2:0] func3_op;
+ logic [6:0] func7_op;
+ logic [4:0] imm5_op;
+ logic [4:0] rd_op;
+
+ // note there are 3 encoding formats of CHERI instructions
+ // - fmt1: I-format, func3(14:12) = subFuc.
+ // - fmt2: R-format, func3(14:12) = 0x0, func7(31:25) = subFunc, etc.
+ // - fmt3: I-format, func3(14:12) = 0x0, func7(31:25) = 0x7f, imm5(24:20) = subFunc
+ // - opcode [6:0] == 0x5b for all CHERI instructions
+ assign unused_opcode = instr_rdata_i[6:0];
+ assign func3_op = instr_rdata_i[14:12];
+ assign func7_op = instr_rdata_i[31:25];
+ assign imm5_op = instr_rdata_i[24:20];
+ assign rd_op = instr_rdata_i[11:7];
+
+ always_comb begin
+ cheri_operator_o = OPDW'('h0);
+
+ cheri_operator_o[CCSR_RW] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h01);
+ cheri_operator_o[CSET_BOUNDS] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h08);
+ cheri_operator_o[CSET_BOUNDS_EX] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h09);
+ cheri_operator_o[CSET_BOUNDS_RNDN]= cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h0a);
+ cheri_operator_o[CSEAL] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h0b);
+ cheri_operator_o[CUNSEAL] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h0c);
+ cheri_operator_o[CAND_PERM] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h0d);
+ cheri_operator_o[CSET_ADDR] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h10);
+ cheri_operator_o[CINC_ADDR] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h11);
+ cheri_operator_o[CSUB_CAP] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h14);
+ cheri_operator_o[CSET_HIGH] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h16);
+ cheri_operator_o[CIS_SUBSET] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h20);
+ cheri_operator_o[CIS_EQUAL] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h21);
+
+
+ cheri_operator_o[CGET_PERM] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h00);
+ cheri_operator_o[CGET_TYPE] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h01);
+ cheri_operator_o[CGET_BASE] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h02);
+ cheri_operator_o[CGET_HIGH] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h17);
+ cheri_operator_o[CGET_TOP] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h18);
+ cheri_operator_o[CGET_LEN] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h03);
+ cheri_operator_o[CGET_TAG] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h04);
+ cheri_operator_o[CRRL] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h08);
+ cheri_operator_o[CRAM] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h09);
+ cheri_operator_o[CGET_ADDR] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h0f);
+ cheri_operator_o[CMOVE_CAP] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h0a);
+ cheri_operator_o[CCLEAR_TAG] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h0b);
+
+ cheri_operator_o[CINC_ADDR_IMM] = cheri_opcode_en_i && (func3_op == 1);
+ cheri_operator_o[CSET_BOUNDS_IMM] = cheri_opcode_en_i && (func3_op == 2);
+
+
+ cheri_operator_o[CAUIPCC] = cheri_auipcc_en_i;
+ cheri_operator_o[CAUICGP] = cheri_auicgp_en_i;
+ cheri_operator_o[CJALR] = cheri_jalr_en_i;
+ cheri_operator_o[CJAL] = cheri_jal_en_i;
+ cheri_operator_o[CLOAD_CAP] = cheri_cload_en_i;
+ // cheri_operator_o[CLBC] = cheri_cload_en_i & ~func3_op[2] & cheri_tsafe_en_i;
+ cheri_operator_o[CSTORE_CAP] = cheri_cstore_en_i;
+ end
+
+ // partially decoded, early signal to control muxing and regfile read
+ assign instr_is_cheri_o = cheri_opcode_en_i | cheri_jalr_en_i | cheri_jal_en_i |
+ cheri_auipcc_en_i | cheri_auicgp_en_i | cheri_cload_en_i | cheri_cstore_en_i;
+
+ assign instr_is_legal_cheri_o = |cheri_operator_o;
+
+ assign cheri_cs2_dec_o = cheri_operator_o[CCSR_RW] ? imm5_op : 0;
+
+ assign cheri_imm12_o = (cheri_operator_o[CJALR]|cheri_operator_o[CSET_BOUNDS_IMM]|
+ cheri_operator_o[CINC_ADDR_IMM]|cheri_operator_o[CLOAD_CAP]) ?
+ {func7_op, imm5_op}:(cheri_operator_o[CSTORE_CAP]?{func7_op, rd_op}:0);
+
+ assign cheri_imm20_o = (cheri_operator_o[CAUIPCC]|cheri_operator_o[CAUICGP]) ? instr_rdata_i[31:12] : 0;
+
+ assign cheri_imm21_o = cheri_operator_o[CJAL] ? {instr_rdata_i[31], instr_rdata_i[19:12],
+ instr_rdata_i[20], instr_rdata_i[30:21], 1'b0} : 0;
+
+ // register dependency decoding (ren_a, ren_b, we)
+ // only handled opcode=0x5b case here.
+ // Will be qualified and combined with other cases by ibexc_decoder
+ assign cheri_rf_ren_a_o = 1'b1;
+ assign cheri_rf_ren_b_o = (func3_op == 0) && (func7_op != 7'h7f) && (func7_op !=7'h01);
+
+ // cheri_rf_we_dec_o is not used to generate the actual regfile write enables in the case of
+ // cheri instructions (which is in cheri_ex and muxed with rf_we in wb_stage).
+ // However it is merged into the overall rf_we and used to generate stall_cheri_trvk
+ assign cheri_rf_we_dec_o = cheri_opcode_en_i & (|cheri_operator_o);
+
+ assign cheri_multicycle_dec_o = (cheri_operator_o[CLOAD_CAP] & cheri_tsafe_en_i & ~CheriPPLBC) |
+ (CheriSBND2 & (cheri_operator_o[CSET_BOUNDS] |
+ cheri_operator_o[CSET_BOUNDS_IMM] |
+ cheri_operator_o[CSET_BOUNDS_EX] |
+ cheri_operator_o[CRRL] | cheri_operator_o[CRAM]));
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheri_ex.sv b/hw/ip/cheriot-ibex/rtl/cheri_ex.sv
new file mode 100644
index 0000000..45dd6c2
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheri_ex.sv
@@ -0,0 +1,1172 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+module cheri_ex import cheri_pkg::*; #(
+ parameter bit WritebackStage = 1'b0,
+ parameter bit MemCapFmt = 1'b0,
+ parameter int unsigned HeapBase = 32'h2001_0000,
+ parameter int unsigned TSMapBase = 32'h2002_f000,
+ parameter int unsigned TSMapSize = 1024,
+ parameter bit CheriPPLBC = 1'b1,
+ parameter bit CheriSBND2 = 1'b0,
+ parameter bit CheriStkZ = 1'b1,
+ parameter bit CheriCapIT8 = 1'b0
+)(
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ // configuration & control
+ input logic cheri_pmode_i,
+ input logic cheri_tsafe_en_i,
+ input logic debug_mode_i,
+
+ // data forwarded from WB stage
+ input logic fwd_we_i,
+ input logic [4:0] fwd_waddr_i,
+ input logic [31:0] fwd_wdata_i,
+ input reg_cap_t fwd_wcap_i,
+
+ // regfile interface
+ input logic [4:0] rf_raddr_a_i,
+ input logic [31:0] rf_rdata_a_i,
+ input reg_cap_t rf_rcap_a_i,
+ input logic [4:0] rf_raddr_b_i,
+ input logic [31:0] rf_rdata_b_i,
+ input reg_cap_t rf_rcap_b_i,
+ output logic rf_trsv_en_o,
+ input logic [4:0] rf_waddr_i,
+
+ // pcc interface
+ input pcc_cap_t pcc_cap_i,
+ output pcc_cap_t pcc_cap_o,
+ input logic [31:0] pc_id_i,
+
+ // use branch_req_o also to update pcc cap
+ output logic branch_req_o, // update PCC (goes to cs_registers)
+ output logic branch_req_spec_o, // speculative branch request (go to IF)
+ output logic [31:0] branch_target_o,
+
+ // Interface to ID stage control logic
+ input logic cheri_exec_id_i,
+ input logic instr_first_cycle_i, // 1st exec cycle allowing lsu_req
+
+ // inputs from decoder
+ input logic instr_valid_i,
+ input logic instr_is_cheri_i,
+ input logic instr_is_rv32lsu_i,
+ input logic instr_is_compressed_i,
+ input logic [11:0] cheri_imm12_i,
+ input logic [19:0] cheri_imm20_i,
+ input logic [20:0] cheri_imm21_i,
+ input logic [4:0] cheri_cs2_dec_i, // cs2 used for CSR address
+ input logic [OPDW-1:0] cheri_operator_i,
+
+ // output to wb stage
+ output logic cheri_rf_we_o,
+ output logic [31:0] result_data_o,
+ output reg_cap_t result_cap_o,
+
+ output logic cheri_ex_valid_o,
+ output logic cheri_ex_err_o,
+ output logic [11:0] cheri_ex_err_info_o,
+ output logic cheri_wb_err_o,
+ output logic [15:0] cheri_wb_err_info_o,
+
+ // lsu interface
+ output logic lsu_req_o,
+ output logic lsu_cheri_err_o,
+ output logic lsu_is_cap_o,
+ output logic [3:0] lsu_lc_clrperm_o,
+ output logic lsu_we_o,
+ output logic [31:0] lsu_addr_o,
+ output logic [1:0] lsu_type_o,
+ output logic [32:0] lsu_wdata_o,
+ output reg_cap_t lsu_wcap_o,
+ output logic lsu_sign_ext_o,
+ output logic cpu_stall_by_stkz_o,
+ output logic cpu_grant_to_stkz_o,
+
+ input logic addr_incr_req_i,
+ input logic [31:0] addr_last_i,
+ input logic lsu_req_done_i,
+ input logic [32:0] lsu_rdata_i,
+ input reg_cap_t lsu_rcap_i,
+
+ // LSU interface to the existing core (muxed)
+ input logic rv32_lsu_req_i,
+ input logic rv32_lsu_we_i,
+ input logic [1:0] rv32_lsu_type_i,
+ input logic [31:0] rv32_lsu_wdata_i,
+ input logic rv32_lsu_sign_ext_i,
+ input logic [31:0] rv32_lsu_addr_i,
+ output logic rv32_addr_incr_req_o,
+ output logic [31:0] rv32_addr_last_o,
+
+ // TBRE LSU request (for muxing)
+ input logic lsu_tbre_sel_i,
+ input logic tbre_lsu_req_i,
+ input logic tbre_lsu_is_cap_i,
+ input logic tbre_lsu_we_i,
+ input logic [31:0] tbre_lsu_addr_i,
+ input logic [32:0] tbre_lsu_wdata_i,
+ output logic cpu_lsu_dec_o,
+
+ input logic [31:0] csr_rdata_i,
+ input reg_cap_t csr_rcap_i,
+ input logic csr_mstatus_mie_i,
+ output logic csr_access_o,
+ output logic [4:0] csr_addr_o,
+ output logic [31:0] csr_wdata_o,
+ output reg_cap_t csr_wcap_o,
+ output cheri_csr_op_e csr_op_o,
+ output logic csr_op_en_o,
+ output logic csr_set_mie_o,
+ output logic csr_clr_mie_o,
+
+ // stack highwater mark updates
+ input logic [31:0] csr_mshwm_i,
+ input logic [31:0] csr_mshwmb_i,
+ output logic csr_mshwm_set_o,
+ output logic [31:0] csr_mshwm_new_o,
+
+ // stack fast clearing control signals
+ input logic stkz_active_i,
+ input logic stkz_abort_i,
+ input logic [31:0] stkz_ptr_i,
+ input logic [31:0] stkz_base_i,
+
+ output logic ztop_wr_o,
+ output logic [31:0] ztop_wdata_o,
+ output full_cap_t ztop_wfcap_o,
+ input logic [31:0] ztop_rdata_i,
+ input reg_cap_t ztop_rcap_i,
+
+ // debug feature
+ input logic csr_dbg_tclr_fault_i
+);
+
+ localparam int unsigned TSMapTop = TSMapBase+TSMapSize*4;
+
+ logic cheri_lsu_req;
+ logic cheri_lsu_we;
+ logic [31:0] cheri_lsu_addr;
+ logic [32:0] cheri_lsu_wdata;
+ reg_cap_t cheri_lsu_wcap;
+ logic cheri_lsu_err;
+ logic cheri_lsu_is_cap;
+
+ logic [31:0] rf_rdata_a, rf_rdata_ng_a;
+ logic [31:0] rf_rdata_b, rf_rdata_ng_b;
+
+ reg_cap_t rf_rcap_a, rf_rcap_ng_a;
+ reg_cap_t rf_rcap_b, rf_rcap_ng_b;
+
+ full_cap_t rf_fullcap_a, rf_fullcap_b;
+
+ reg_cap_t csc_wcap;
+
+ logic is_load_cap, is_store_cap, is_cap;
+
+ logic addr_bound_vio;
+ logic perm_vio, perm_vio_slc;
+ logic rv32_lsu_err;
+ logic addr_bound_vio_rv32;
+ logic perm_vio_rv32;
+
+ logic [W_PVIO-1:0] perm_vio_vec, perm_vio_vec_rv32;
+
+ logic [31:0] cs1_addr_plusimm;
+ logic [31:0] cs1_imm;
+ logic [31:0] addr_result;
+
+
+ logic cheri_rf_we_raw, branch_req_raw, branch_req_spec_raw;
+ logic csr_set_mie_raw, csr_clr_mie_raw;
+ logic cheri_ex_valid_raw, cheri_ex_err_raw;
+ logic csr_op_en_raw;
+ logic cheri_wb_err_raw;
+ logic cheri_wb_err_q, cheri_wb_err_d;
+ logic ztop_wr_raw;
+
+ logic [3:0] cheri_lsu_lc_clrperm;
+ logic lc_cglg, lc_csdlm, lc_ctag;
+ logic [31:0] pc_id_nxt;
+
+ full_cap_t setaddr1_outcap, setbounds_outcap, setbounds_rndn_outcap;
+ logic [15:0] cheri_wb_err_info_q, cheri_wb_err_info_d;
+ logic set_bounds_done;
+
+ logic [4:0] cheri_err_cause, rv32_err_cause;
+ logic [31:0] cpu_lsu_addr;
+ logic [32:0] cpu_lsu_wdata;
+ logic cpu_lsu_we;
+ logic cpu_lsu_cheri_err, cpu_lsu_is_cap;
+
+ logic illegal_scr_addr;
+ logic scr_legalization;
+
+ // data forwarding for CHERI instructions
+ // - note address 0 is a read-only location per RISC-V
+ always_comb begin : fwd_data_merger
+ if ((rf_raddr_a_i == fwd_waddr_i) && fwd_we_i && (|rf_raddr_a_i)) begin
+ rf_rdata_ng_a = fwd_wdata_i;
+ rf_rcap_ng_a = fwd_wcap_i;
+ end else begin
+ rf_rdata_ng_a = rf_rdata_a_i;
+ rf_rcap_ng_a = rf_rcap_a_i;
+ end
+
+ if ((rf_raddr_b_i == fwd_waddr_i) && fwd_we_i && (|rf_raddr_b_i)) begin
+ rf_rdata_ng_b = fwd_wdata_i;
+ rf_rcap_ng_b = fwd_wcap_i;
+ end else begin
+ rf_rdata_ng_b = rf_rdata_b_i;
+ rf_rcap_ng_b = rf_rcap_b_i;
+ end
+ end
+
+ // 1st level of operand gating (power-saving)
+ // - gate off the input to reg2full conversion logic
+ // - note rv32 lsu req only use cs1
+ // - may need to use dont_tounch gates
+ assign rf_rcap_a = (instr_is_cheri_i | instr_is_rv32lsu_i) ? rf_rcap_ng_a : NULL_REG_CAP;
+ assign rf_rdata_a = (instr_is_cheri_i | instr_is_rv32lsu_i) ? rf_rdata_ng_a : 32'h0;
+
+ assign rf_rcap_b = instr_is_cheri_i ? rf_rcap_ng_b : NULL_REG_CAP;
+ assign rf_rdata_b = instr_is_cheri_i ? rf_rdata_ng_b : 32'h0;
+
+ // expand the capabilities
+ assign rf_fullcap_a = reg2fullcap(rf_rcap_a, rf_rdata_a);
+ assign rf_fullcap_b = reg2fullcap(rf_rcap_b, rf_rdata_b);
+
+ // gate these signals with cheri_exec_id to make sure they are only active when needed
+ // (only 1 cycle in all cases other than cheri_rf_we)
+ // -- safest approach and probably the right thing to do in case there is a wb_exception
+ assign cheri_rf_we_o = cheri_rf_we_raw & cheri_exec_id_i;
+ assign branch_req_o = branch_req_raw & cheri_exec_id_i;
+ assign branch_req_spec_o = branch_req_spec_raw & cheri_exec_id_i;
+ assign csr_set_mie_o = csr_set_mie_raw & cheri_exec_id_i;
+ assign csr_clr_mie_o = csr_clr_mie_raw & cheri_exec_id_i;
+ assign csr_op_en_o = csr_op_en_raw & cheri_exec_id_i;
+ assign ztop_wr_o = ztop_wr_raw & cheri_exec_id_i;
+
+ // ex_valid only used in multicycle case
+ // ex_err is used for id exceptions
+ assign cheri_ex_valid_o = cheri_ex_valid_raw & cheri_exec_id_i;
+ assign cheri_ex_err_o = cheri_ex_err_raw & cheri_exec_id_i & ~debug_mode_i;
+
+ if (WritebackStage) begin
+ assign cheri_wb_err_o = cheri_wb_err_q;
+ end else begin
+ assign cheri_wb_err_o = cheri_wb_err_d;
+ end
+
+ assign cheri_lsu_lc_clrperm = debug_mode_i ? 4'h0 : {lc_ctag, 1'b0, lc_csdlm, lc_cglg};
+
+ always_comb begin : main_ex
+ logic [PERMS_W-1:0] perms_temp;
+ full_cap_t tfcap;
+
+ //default
+ cheri_rf_we_raw = 1'b0;
+ result_data_o = 32'h0;
+ result_cap_o = NULL_REG_CAP;
+ csc_wcap = NULL_REG_CAP;
+ cheri_ex_valid_raw = 1'b0;
+ cheri_ex_err_raw = 1'b0;
+ cheri_wb_err_raw = 1'b0;
+ perms_temp = 0;
+
+ csr_access_o = 1'b0;
+ csr_addr_o = 5'h0;
+ csr_wdata_o = 32'h0;
+ csr_wcap_o = NULL_REG_CAP;
+ csr_op_o = CHERI_CSR_NULL;
+ csr_op_en_raw = 1'b0;
+ scr_legalization = 1'b0;
+
+ branch_req_raw = 1'b0;
+ branch_req_spec_raw = 1'b0;
+ csr_set_mie_raw = 1'b0;
+ csr_clr_mie_raw = 1'b0;
+ branch_target_o = 32'h0;
+ pcc_cap_o = NULL_PCC_CAP;
+ tfcap = NULL_FULL_CAP;
+ lc_cglg = 1'b0;
+ lc_csdlm = 1'b0;
+ lc_ctag = 1'b0;
+ rf_trsv_en_o = 1'b0;
+ ztop_wr_raw = 1'b0;
+ ztop_wdata_o = 32'h0;
+ ztop_wfcap_o = NULL_FULL_CAP;
+
+ unique case (1'b1)
+ cheri_operator_i[CGET_PERM]:
+ begin
+ result_data_o = {19'h0, rf_fullcap_a.perms};
+ result_cap_o = NULL_REG_CAP; // zerout the cap msw
+ cheri_rf_we_raw = 1'b1;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ cheri_operator_i[CGET_TYPE]:
+ begin
+ result_data_o = {28'h0, decode_otype(rf_fullcap_a.otype, rf_fullcap_a.perms[PERM_EX])};
+ result_cap_o = NULL_REG_CAP;
+ cheri_rf_we_raw = 1'b1;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ cheri_operator_i[CGET_BASE]:
+ begin
+ result_data_o = rf_fullcap_a.base32;
+ result_cap_o = NULL_REG_CAP;
+ cheri_rf_we_raw = 1'b1;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ cheri_operator_i[CGET_TOP]:
+ begin
+ result_data_o = rf_fullcap_a.top33[32] ? 32'hffff_ffff : rf_fullcap_a.top33[31:0];
+ result_cap_o = NULL_REG_CAP;
+ cheri_rf_we_raw = 1'b1;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ cheri_operator_i[CGET_LEN]:
+ begin
+ result_data_o = get_cap_len(rf_fullcap_a);
+ result_cap_o = NULL_REG_CAP;
+ cheri_rf_we_raw = 1'b1;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ cheri_operator_i[CGET_TAG]:
+ begin
+ result_data_o = {31'h0, rf_fullcap_a.valid};
+ result_cap_o = NULL_REG_CAP;
+ cheri_rf_we_raw = 1'b1;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ cheri_operator_i[CGET_ADDR]:
+ begin
+ result_data_o = rf_rdata_a;
+ result_cap_o = NULL_REG_CAP;
+ cheri_rf_we_raw = 1'b1;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ cheri_operator_i[CGET_HIGH]:
+ begin
+ logic [65:0] tmp66;
+ tmp66 = MemCapFmt ? (CheriCapIT8 ? reg2mem_it8_fmt1(rf_rcap_a, rf_rdata_a) :
+ reg2mem_fmt1(rf_rcap_a, rf_rdata_a)) :
+ (CheriCapIT8 ? {reg2memcap_it8_fmt0(rf_rcap_a), 1'b0, rf_rdata_a[31:0]} :
+ {reg2memcap_fmt0(rf_rcap_a), 1'b0, rf_rdata_a[31:0]});
+ result_data_o = tmp66[64:33];
+ result_cap_o = NULL_REG_CAP;
+ cheri_rf_we_raw = 1'b1;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ (cheri_operator_i[CSEAL] | cheri_operator_i[CUNSEAL]):
+ begin // cd <-- cs1; cd.otyp <-- cs2.otype; cd.sealed <-- val
+ result_data_o = rf_rdata_a;
+
+ if (cheri_operator_i[CSEAL])
+ result_cap_o = full2regcap(seal_cap(rf_fullcap_a, rf_rdata_b[OTYPE_W-1:0]));
+ else begin
+ tfcap = unseal_cap(rf_fullcap_a);
+ tfcap.perms[PERM_GL] = rf_fullcap_a.perms[PERM_GL] & rf_fullcap_b.perms[PERM_GL];
+ tfcap.cperms = compress_perms(tfcap.perms, tfcap.cperms[5:4]);
+ result_cap_o = full2regcap(tfcap);
+ end
+
+ result_cap_o.valid = result_cap_o.valid & (~addr_bound_vio) & (~perm_vio);
+ cheri_rf_we_raw = 1'b1;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ cheri_operator_i[CAND_PERM]: // cd <-- cs1; cd.perm <-- cd.perm & rs2
+ begin
+ logic [PERMS_W-1:0] pmask;
+ result_data_o = rf_rdata_a;
+ tfcap = rf_fullcap_a;
+ tfcap.perms = tfcap.perms & rf_rdata_b[PERMS_W-1:0];
+ tfcap.cperms = compress_perms(tfcap.perms, tfcap.cperms[5:4]);
+ // for sealed caps, clear tag unless perm mask (excluding GL) == all '1'
+ pmask = rf_rdata_b[PERMS_W-1:0];
+ pmask[PERM_GL] = 1'b1;
+ tfcap.valid = tfcap.valid & (~is_cap_sealed(rf_fullcap_a) | (&pmask));
+ result_cap_o = full2regcap(tfcap);
+ cheri_rf_we_raw = 1'b1;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ cheri_operator_i[CSET_HIGH]: // cd <-- cs1; cd.high <-- convert(rs2)
+ begin
+ // this only works for memcap_fmt0 for now QQQ
+ result_data_o = rf_rdata_a;
+ result_cap_o = CheriCapIT8 ? mem2regcap_it8_fmt0({1'b0, rf_rdata_b}, {1'b0, rf_rdata_a}, 4'h0) :
+ mem2regcap_fmt0({1'b0, rf_rdata_b}, {1'b0, rf_rdata_a}, 4'h0);
+ cheri_rf_we_raw = 1'b1;
+ cheri_ex_valid_raw = 1'b1;
+ end
+
+ // setaddr/incoffset: cd <-- cs1; cd.offset <-- rs2, or cs1.addr + rs2, or cs1.addr + imm12
+ // auipcc: cd <-- pcc, cd.address <-- pcc.address + (imm20 << 12)
+ (cheri_operator_i[CSET_ADDR] | cheri_operator_i[CINC_ADDR] |
+ cheri_operator_i[CINC_ADDR_IMM] | cheri_operator_i[CAUIPCC] | cheri_operator_i[CAUICGP]):
+ begin
+ logic clr_sealed;
+ logic instr_fault;
+
+ result_data_o = addr_result;
+
+ // for pointer operations, follow C convention and allow newptr == top
+ clr_sealed = cheri_operator_i[CAUIPCC] ? 1'b0 : is_cap_sealed(rf_fullcap_a);
+ tfcap = setaddr1_outcap;
+ tfcap.valid = tfcap.valid & ~clr_sealed;
+ result_cap_o = full2regcap(tfcap);
+ instr_fault = csr_dbg_tclr_fault_i & (rf_fullcap_a.valid | cheri_operator_i[CAUIPCC]) &
+ ~result_cap_o.valid;
+ cheri_wb_err_raw = instr_fault;
+ cheri_rf_we_raw = ~instr_fault;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ (cheri_operator_i[CSET_BOUNDS] | cheri_operator_i[CSET_BOUNDS_IMM] | cheri_operator_i[CSET_BOUNDS_EX] |
+ cheri_operator_i[CRRL] | cheri_operator_i[CRAM] | cheri_operator_i[CSET_BOUNDS_RNDN]):
+ begin // cd <-- cs1; cd.base <-- cs1.address, cd.len <-- rs2 or imm12
+ logic instr_fault;
+
+ tfcap = cheri_operator_i[CSET_BOUNDS_RNDN] ? setbounds_rndn_outcap : setbounds_outcap;
+ tfcap.valid = tfcap.valid & ~is_cap_sealed(rf_fullcap_a);
+
+ if (cheri_operator_i[CRRL]) begin
+ result_data_o = tfcap.rlen;
+ result_cap_o = NULL_REG_CAP;
+ end else if (cheri_operator_i[CRAM]) begin
+ result_data_o = tfcap.maska;
+ result_cap_o = NULL_REG_CAP;
+ end else begin
+ result_data_o = rf_rdata_a;
+ result_cap_o = full2regcap(tfcap);
+ end
+
+ cheri_ex_valid_raw = set_bounds_done;
+ instr_fault = csr_dbg_tclr_fault_i & rf_fullcap_a.valid & ~result_cap_o.valid &
+ (cheri_operator_i[CSET_BOUNDS] | cheri_operator_i[CSET_BOUNDS_IMM] |
+ cheri_operator_i[CSET_BOUNDS_EX] | cheri_operator_i[CSET_BOUNDS_RNDN]);
+ cheri_rf_we_raw = ~instr_fault;
+ cheri_wb_err_raw = instr_fault;
+ end
+ cheri_operator_i[CCLEAR_TAG]: // cd <-- cs1; cd.tag <-- '0'
+ begin
+ result_data_o = rf_rdata_a;
+ result_cap_o = rf_rcap_a;
+ result_cap_o.valid = 1'b0;
+ cheri_rf_we_raw = 1'b1;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ cheri_operator_i[CIS_SUBSET]: // rd <-- (cs1.tag == cs2.tag) && (cs2 is_subset_of cs1)
+ begin
+ result_data_o = 32'((rf_fullcap_a.valid == rf_fullcap_b.valid) &&
+ ~addr_bound_vio && (&(rf_fullcap_a.perms | ~rf_fullcap_b.perms)));
+ result_cap_o = NULL_REG_CAP;
+ cheri_rf_we_raw = 1'b1;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ cheri_operator_i[CIS_EQUAL]: // rd <-- (cs1 == cs2)
+ begin
+ result_data_o = 32'(is_equal(rf_fullcap_a, rf_fullcap_b, rf_rdata_a, rf_rdata_b));
+ result_cap_o = NULL_REG_CAP;
+ cheri_rf_we_raw = 1'b1;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ cheri_operator_i[CSUB_CAP]: // rd <-- cs1.addr - cs2.addr
+ begin
+ result_data_o = rf_rdata_a - rf_rdata_b;
+ result_cap_o = NULL_REG_CAP;
+ cheri_rf_we_raw = 1'b1;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ cheri_operator_i[CMOVE_CAP]: // cd <-- cs1
+ begin
+ result_data_o = rf_rdata_a;
+ result_cap_o = rf_rcap_a;
+ cheri_rf_we_raw = 1'b1;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ cheri_operator_i[CLOAD_CAP]:
+ begin
+ lc_cglg = ~rf_fullcap_a.perms[PERM_LG];
+ lc_csdlm = ~rf_fullcap_a.perms[PERM_LM];
+ lc_ctag = ~rf_fullcap_a.perms[PERM_MC];
+
+ result_data_o = 32'h0;
+ result_cap_o = NULL_REG_CAP;
+ cheri_rf_we_raw = 1'b0;
+ cheri_ex_valid_raw = 1'b1; // lsu_req_done is factored in by id_stage
+ cheri_ex_err_raw = 1'b0; // acc err passed to LSU and processed later in WB
+ rf_trsv_en_o = CheriPPLBC & cheri_tsafe_en_i & lsu_req_done_i;
+ end
+ cheri_operator_i[CSTORE_CAP]:
+ begin
+ result_data_o = 32'h0;
+ result_cap_o = NULL_REG_CAP;
+ cheri_rf_we_raw = 1'b0;
+ cheri_ex_valid_raw = 1'b1;
+ cheri_ex_err_raw = 1'b0; // acc err passed to LSU and processed later in WB
+ csc_wcap = rf_rcap_b;
+ csc_wcap.valid = rf_rcap_b.valid & ~perm_vio_slc;
+ end
+ cheri_operator_i[CCSR_RW]: // cd <-- scr; scr <-- cs1 if cs1 != C0
+ begin
+ logic [31:0] tmp32;
+ logic is_ztop, is_write;
+ reg_cap_t trcap;
+ logic instr_fault;
+
+ is_ztop = (cheri_cs2_dec_i==CHERI_SCR_ZTOPC);
+ is_write = (rf_raddr_a_i != 0);
+ instr_fault = perm_vio | illegal_scr_addr;
+
+ csr_access_o = ~instr_fault;
+ csr_op_o = CHERI_CSR_RW;
+ csr_op_en_raw = ~instr_fault && is_write && ~is_ztop;
+ ztop_wr_raw = ~instr_fault && is_write && is_ztop;
+ csr_addr_o = cheri_cs2_dec_i;
+
+ if (cheri_cs2_dec_i == CHERI_SCR_MTCC) begin
+ // MTVEC/MTCC legalization (clear tag if checking fails)
+ // note we don't reall need set_address checks here - it's only used to update temp fields
+ // so that RTL behavior would match sail
+ scr_legalization = 1'b1;
+ csr_wdata_o = {rf_rdata_a[31:2], 2'b00};
+ trcap = full2regcap(setaddr1_outcap);
+ if ((rf_rdata_a[1:0] != 2'b00) || ~rf_fullcap_a.perms[PERM_EX] || (rf_fullcap_a.otype != 0))
+ trcap.valid = 1'b0;
+ else
+ trcap.valid = rf_fullcap_a.valid;
+ csr_wcap_o = trcap;
+ end else if (cheri_cs2_dec_i == CHERI_SCR_MEPCC) begin
+ // MEPCC legalization (clear tag if checking fails)
+ scr_legalization = 1'b1;
+ csr_wdata_o = {rf_rdata_a[31:1], 1'b0};
+ trcap = full2regcap(setaddr1_outcap);
+ if ((rf_rdata_a[0] != 1'b0) || ~rf_fullcap_a.perms[PERM_EX] || (rf_fullcap_a.otype != 0))
+ trcap.valid = 1'b0;
+ else
+ trcap.valid = rf_fullcap_a.valid;
+ csr_wcap_o = trcap;
+ end else begin
+ scr_legalization = 1'b0;
+ csr_wdata_o = rf_rdata_a;
+ csr_wcap_o = rf_rcap_a;
+ end
+
+ if (is_ztop) begin
+ result_data_o = ztop_rdata_i;
+ result_cap_o = ztop_rcap_i;
+ ztop_wfcap_o = rf_fullcap_a;
+ ztop_wdata_o = rf_rdata_a;
+ end else begin
+ result_data_o = csr_rdata_i;
+ result_cap_o = csr_rcap_i;
+ ztop_wfcap_o = NULL_FULL_CAP;
+ ztop_wdata_o = 32'h0;
+ end
+ cheri_rf_we_raw = ~instr_fault;
+ cheri_ex_valid_raw = 1'b1;
+ cheri_wb_err_raw = instr_fault;
+ end
+ (cheri_operator_i[CJALR] | cheri_operator_i[CJAL]):
+ begin // cd <-- pcc; pcc <-- cs1/pc+offset; pcc.address[0] <--'0'; pcc.sealed <--'0'
+ logic [2:0] seal_type;
+ logic instr_fault;
+
+ // note this is the RV32 definition of JALR arithmetic (add first then mask of lsb)
+ branch_target_o = {addr_result[31:1], 1'b0};
+ pcc_cap_o = full2pcap(unseal_cap(rf_fullcap_a));
+ // Note we can't directly use pc_if here
+ // (link address == pc_id + delta, but pc_if should be the next executed PC (the jump target)
+ // if branch prediction works)
+ result_data_o = pc_id_nxt;
+ seal_type = csr_mstatus_mie_i ? OTYPE_SENTRY_IE_BKWD : OTYPE_SENTRY_ID_BKWD;
+ //tfcap = seal_cap(setaddr1_outcap, seal_type);
+ tfcap = (rf_waddr_i == 5'h1) ? seal_cap(setaddr1_outcap, seal_type) :
+ setaddr1_outcap;
+ result_cap_o = full2regcap(tfcap);
+
+ // problem with instr_fault: the pcc_cap.valid check causing timing issue on instr_addr_o
+ // -- use the speculative version for instruction fetch
+ // -- the ID exception (cheri_ex_err) flushes the pipeline and re-set PC so
+ // the speculatively fetched instruction will be flushed
+ // -- this is now mitigated since we no longer do address bound checking here
+ // but let's keep the solution for now
+
+ instr_fault = perm_vio;
+
+ cheri_rf_we_raw = ~instr_fault; // err -> wb exception
+ branch_req_raw = ~instr_fault & cheri_operator_i[CJALR]; // update PCC in CSR
+ // branch_req_spec_raw = 1'b1;
+ branch_req_spec_raw = ~instr_fault; // set fetch PC
+
+ cheri_wb_err_raw = instr_fault;
+ cheri_ex_err_raw = 1'b0;
+ csr_set_mie_raw = ~instr_fault && cheri_operator_i[CJALR] &&
+ ((rf_fullcap_a.otype == OTYPE_SENTRY_IE_FWD) ||
+ (rf_fullcap_a.otype == OTYPE_SENTRY_IE_BKWD)) ;
+ csr_clr_mie_raw = ~instr_fault && cheri_operator_i[CJALR] &&
+ ((rf_fullcap_a.otype == OTYPE_SENTRY_ID_FWD) ||
+ (rf_fullcap_a.otype == OTYPE_SENTRY_ID_BKWD)) ;
+ cheri_ex_valid_raw = 1'b1;
+ end
+ default:;
+ endcase
+ end // always_combi
+
+ assign is_load_cap = cheri_operator_i[CLOAD_CAP];
+ assign is_store_cap = cheri_operator_i[CSTORE_CAP];
+
+ assign is_cap = cheri_operator_i[CLOAD_CAP] | cheri_operator_i[CSTORE_CAP];
+
+ // muxing between "normal cheri LSU requests (clc/csc) and CLBC
+
+ if (WritebackStage) begin
+ // assert LSU req until instruction is retired (req_done from LSU)
+ // note if the previous instr is also a load/store, cheri_exec_id won't be asserted
+ // till WB is ready (lsu_resp for the previous isntr)
+ assign cheri_lsu_req = is_cap & cheri_exec_id_i;
+ end else begin
+ // no WB stage, only assert req in the first_cycle phase of the instruction
+ // (consistent with the RV32 load/store instructions)
+ // Here instruction won't complete till lsu_resp_valid in this case,
+ // keeping lsu_req asserted causes problem as LSU sees it as a new request
+ assign cheri_lsu_req = is_cap & cheri_exec_id_i & instr_first_cycle_i;
+ end
+
+ assign cheri_lsu_we = is_store_cap;
+ assign cheri_lsu_addr = cs1_addr_plusimm + {29'h0, addr_incr_req_i, 2'b00};
+ assign cheri_lsu_is_cap = is_cap;
+
+ assign cheri_lsu_wdata = is_store_cap ? {csc_wcap.valid, rf_rdata_b} : 33'h0;
+ assign cheri_lsu_wcap = is_store_cap ? csc_wcap : NULL_REG_CAP;
+
+ // RS1/CS1+offset is
+ // keep this separate to help timing on the memory interface
+ // - the starting address for cheri L*/S*.CAP instructions
+ assign cs1_imm = (is_cap|cheri_operator_i[CJALR]) ? {{20{cheri_imm12_i[11]}}, cheri_imm12_i} : 0;
+
+ assign cs1_addr_plusimm = rf_rdata_a + cs1_imm;
+
+ assign pc_id_nxt = pc_id_i + (instr_is_compressed_i ? 2 : 4);
+
+ //
+ // shared adder for address calculation
+ //
+ always_comb begin : shared_adder
+ logic [31:0] tmp32a, tmp32b;
+
+ if (cheri_operator_i[CJALR]) tmp32a = {{20{cheri_imm12_i[11]}}, cheri_imm12_i};
+ else if (cheri_operator_i[CJAL]) tmp32a = {{11{cheri_imm21_i[20]}}, cheri_imm21_i};
+ else if (cheri_operator_i[CAUIPCC]) tmp32a = {cheri_imm20_i[19], cheri_imm20_i, 11'h0};
+ else if (cheri_operator_i[CAUICGP]) tmp32a = {cheri_imm20_i[19], cheri_imm20_i, 11'h0};
+ else if (cheri_operator_i[CSET_ADDR]) tmp32a = rf_rdata_b;
+ else if (cheri_operator_i[CINC_ADDR]) tmp32a = rf_rdata_b;
+ else if (cheri_operator_i[CINC_ADDR_IMM]) tmp32a = {{20{cheri_imm12_i[11]}}, cheri_imm12_i};
+ else tmp32a = 0;
+
+ if (cheri_operator_i[CJALR]) tmp32b = rf_rdata_a;
+ else if (cheri_operator_i[CJAL]) tmp32b = pc_id_i;
+ else if (cheri_operator_i[CAUIPCC]) tmp32b = pc_id_i;
+ else if (cheri_operator_i[CAUICGP]) tmp32b = rf_rdata_a;
+ else if (cheri_operator_i[CSET_ADDR]) tmp32b = 32'h0;
+ else if (cheri_operator_i[CINC_ADDR]) tmp32b = rf_rdata_a;
+ else if (cheri_operator_i[CINC_ADDR_IMM]) tmp32b = rf_rdata_a;
+ else tmp32b = 0;
+
+ addr_result = tmp32a + tmp32b;
+ end
+
+ //
+ // Big combinational functions
+ // - break out to make sure we can properly gate off operands to save power
+ //
+ always_comb begin: set_address_comb
+ full_cap_t tfcap1;
+ logic [31:0] taddr1;
+
+ // set_addr operation 1
+ if (cheri_operator_i[CJAL] | cheri_operator_i[CJALR]) begin
+ // we don't really need the representability check here, but update_temp_fields is necessary
+ tfcap1 = pcc2fullcap(pcc_cap_i); // pcc to link register
+ taddr1 = pc_id_nxt;
+ end else if (cheri_operator_i[CAUIPCC]) begin
+ tfcap1 = pcc2fullcap(pcc_cap_i);
+ taddr1 = addr_result;
+ end else if (cheri_operator_i[CSET_ADDR] | cheri_operator_i[CINC_ADDR] |
+ cheri_operator_i[CINC_ADDR_IMM] | cheri_operator_i[CAUICGP]) begin
+ tfcap1 = rf_fullcap_a;
+ taddr1 = addr_result;
+ end else if (scr_legalization) begin
+ tfcap1 = rf_fullcap_a;
+ taddr1 = csr_wdata_o;
+ end else begin
+ tfcap1 = NULL_FULL_CAP;
+ taddr1 = 32'h0;
+ end
+
+ // representability check only
+ setaddr1_outcap = set_address(tfcap1, taddr1, 0, 0);
+ end
+
+ bound_req_t bound_req1, bound_req2;
+
+ always_comb begin: set_bounds_comb
+ logic [31:0] newlen;
+ logic req_exact;
+ logic [31:0] tmp_addr;
+ full_cap_t tfcap3;
+
+ // set_bounds
+ if (cheri_operator_i[CSET_BOUNDS] | cheri_operator_i[CSET_BOUNDS_RNDN]) begin
+ newlen = rf_rdata_b;
+ req_exact = 1'b0;
+ tfcap3 = rf_fullcap_a;
+ tmp_addr = rf_rdata_a;
+ end else if (cheri_operator_i[CSET_BOUNDS_EX]) begin
+ newlen = rf_rdata_b;
+ req_exact = 1'b1;
+ tfcap3 = rf_fullcap_a;
+ tmp_addr = rf_rdata_a;
+ end else if (cheri_operator_i[CSET_BOUNDS_IMM]) begin
+ newlen = 32'(cheri_imm12_i); // unsigned imm
+ req_exact = 1'b0;
+ tfcap3 = rf_fullcap_a;
+ tmp_addr = rf_rdata_a;
+ end else if (cheri_operator_i[CRRL] | cheri_operator_i[CRAM]) begin
+ newlen = rf_rdata_a;
+ req_exact = 1'b0;
+ tfcap3 = NULL_FULL_CAP;
+ tmp_addr = 0;
+ end else begin
+ newlen = 32'h0;
+ req_exact = 1'b0;
+ tfcap3 = NULL_FULL_CAP;
+ tmp_addr = 0;
+ end
+
+ bound_req1 = CheriCapIT8 ? prep_bound_req_it8 (tfcap3, tmp_addr, newlen) :
+ prep_bound_req (tfcap3, tmp_addr, newlen);
+
+ setbounds_outcap = set_bounds(tfcap3, tmp_addr, bound_req2, req_exact);
+
+ setbounds_rndn_outcap = CheriCapIT8 ? set_bounds_rndn_it8(tfcap3, tmp_addr, bound_req2) :
+ set_bounds_rndn(tfcap3, tmp_addr, bound_req2);
+ end
+
+ if (CheriSBND2) begin
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ bound_req2 <= '{0, 0, 0, 0, 0, 0};
+ set_bounds_done <= 1'b0;
+ end else begin
+ bound_req2 <= bound_req1;
+ // set_bounds_done is asserted in the 2nd cycle of execution when SBD2 == 1
+ // note in ibex it actaully is ok to hold set_bounds_done high for both cycles
+ // since the multicycle control logic won't look at ex_valid till the 2nd cycle
+ // however this is the cleaner solution.
+ set_bounds_done <= (cheri_operator_i[CSET_BOUNDS] | cheri_operator_i[CSET_BOUNDS_IMM] |
+ cheri_operator_i[CSET_BOUNDS_EX] | cheri_operator_i[CRRL] |
+ cheri_operator_i[CRAM]) & cheri_exec_id_i & ~set_bounds_done ;
+ end
+ end
+ end else begin
+ assign bound_req2 = bound_req1;
+ assign set_bounds_done = 1'b1;
+ end
+
+
+
+ // address bound and permission checks for
+ // - cheri no-LSU instructions
+ // - cheri LSU (cap) instructions (including internal instr like LBC)
+ // - RV32 LSU (data) instructions
+ // this is a architectural access check (apply to the whole duration of an instruction)
+ // - based on architectural capability registers and addresses
+
+ // - orginally we combine checking for CHERI and RV32 but it caused a combi loop
+ // that goes from instr_executing -> rv32_lsu_req -> lsu_error -> cheri_ex_err -> instr_executing
+ // it's not a real runtime issue but it does confuses timing tools so let's split for now.
+ // Besides - note checking/lsu_cheri_err_o is one timing critical path
+ logic [31:0] rv32_ls_chkaddr;
+ assign rv32_ls_chkaddr = rv32_lsu_addr_i;
+
+ always_comb begin : check_rv32
+ logic [31:0] top_offset;
+ logic [32:0] top_bound;
+ logic [31:0] base_bound, base_chkaddr;
+ logic top_vio, base_vio;
+ logic [32:0] top_chkaddr;
+ logic top_size_ok;
+
+ // generate the address used to check top bound violation
+ base_chkaddr = rv32_ls_chkaddr;
+
+ if (rv32_lsu_type_i == 2'b00) begin
+ top_offset = 32'h4;
+ top_size_ok = |rf_fullcap_a.top33[32:2]; // at least 4 bytes
+ end else if (rv32_lsu_type_i == 2'b01) begin
+ top_offset = 32'h2;
+ top_size_ok = |rf_fullcap_a.top33[32:1];
+ end else begin
+ top_offset = 32'h1;
+ top_size_ok = |rf_fullcap_a.top33[32:0];
+ end
+
+ //top_chkaddr = base_chkaddr + top_offset;
+ top_chkaddr = {1'b0, base_chkaddr};
+
+ // top_bound = rf_fullcap_a.top33;
+ top_bound = rf_fullcap_a.top33 - top_offset;
+ base_bound = rf_fullcap_a.base32;
+
+ top_vio = (top_chkaddr > top_bound) || ~top_size_ok;
+ base_vio = (base_chkaddr < base_bound);
+
+ // timing critical (data_req_o) path - don't add any unnecssary terms.
+ // we will chose with is_cheri on the LSU interface later.
+ // for unaligned access, only check the starting (1st) address
+ // (if there is an error, addr_incr_req won't be thre anyway
+ addr_bound_vio_rv32 = (top_vio | base_vio) & ~addr_incr_req_i ;
+
+ // main permission logic
+ perm_vio_vec_rv32 = 0;
+
+ perm_vio_vec_rv32[PVIO_TAG] = ~rf_fullcap_a.valid;
+ perm_vio_vec_rv32[PVIO_SEAL] = is_cap_sealed(rf_fullcap_a);
+ perm_vio_vec_rv32[PVIO_LD] = ((~rv32_lsu_we_i) && (~rf_fullcap_a.perms[PERM_LD]));
+ perm_vio_vec_rv32[PVIO_SD] = (rv32_lsu_we_i && (~rf_fullcap_a.perms[PERM_SD]));
+
+ perm_vio_rv32 = |perm_vio_vec_rv32;
+ end
+
+ assign rv32_lsu_err = cheri_pmode_i & ~debug_mode_i & (addr_bound_vio_rv32 | perm_vio_rv32);
+
+ // Cheri instr address bound checking
+ // -- we choose to centralize the address bound checking here
+ // so that we can mux the inputs and save some area
+
+
+ logic [31:0] cheri_ls_chkaddr;
+ assign cheri_ls_chkaddr = cs1_addr_plusimm;
+
+ always_comb begin : check_cheri
+ logic [31:0] top_offset;
+ logic [32:0] top_bound;
+ logic [31:0] base_bound, base_chkaddr;
+ logic [32:0] top_chkaddr;
+ logic top_vio, base_vio, top_equal;
+ logic cs2_bad_type;
+ logic cs1_otype_0, cs1_otype_1, cs1_otype_45, cs1_otype_23;
+ logic cs2_otype_45;
+
+ // generate the address used to check top bound violation
+ if (cheri_operator_i[CSEAL])
+ base_chkaddr = rf_rdata_b; // cs2.address
+ else if (cheri_operator_i[CUNSEAL])
+ // inCapBounds(cs2_val, zero_extend(cs1_val.otype), 1)
+ base_chkaddr = {28'h0, decode_otype(rf_fullcap_a.otype, rf_fullcap_a.perms[PERM_EX])};
+ else if (cheri_operator_i[CIS_SUBSET])
+ base_chkaddr = rf_fullcap_b.base32; // cs2.base32
+ else // CLC/CSC
+ base_chkaddr = cheri_ls_chkaddr; // cs1.address + offset
+
+ if (cheri_operator_i[CIS_SUBSET])
+ top_chkaddr = rf_fullcap_b.top33;
+ else if (is_cap) // CLC/CSC
+ top_chkaddr = {1'b0, base_chkaddr[31:3], 3'b000};
+ else
+ top_chkaddr = {1'b0, base_chkaddr};
+
+ if (cheri_operator_i[CSEAL] | cheri_operator_i[CUNSEAL]) begin
+ top_bound = rf_fullcap_b.top33;
+ base_bound = rf_fullcap_b.base32;
+ end else if (is_cap) begin // CLC/CSC
+ top_bound = {rf_fullcap_a.top33[32:3], 3'b000}; // 8-byte aligned access only
+ base_bound = rf_fullcap_a.base32;
+ end else begin
+ top_bound = rf_fullcap_a.top33;
+ base_bound = rf_fullcap_a.base32;
+ end
+
+ top_vio = (top_chkaddr > top_bound);
+ base_vio = (base_chkaddr < base_bound);
+ top_equal = (top_chkaddr == top_bound);
+
+ if (debug_mode_i)
+ addr_bound_vio = 1'b0;
+ else if (is_cap)
+ addr_bound_vio = top_vio | base_vio | top_equal;
+ else if (cheri_operator_i[CIS_SUBSET])
+ addr_bound_vio = top_vio | base_vio;
+ else if (cheri_operator_i[CSEAL] | cheri_operator_i[CUNSEAL])
+ addr_bound_vio = top_vio | base_vio | top_equal;
+ else
+ addr_bound_vio = 1'b0;
+
+ // main permission logic
+ perm_vio_vec = 0;
+ perm_vio = 0;
+ perm_vio_slc = 0;
+ cs2_bad_type = 1'b0;
+ illegal_scr_addr = 1'b0;
+
+ // otype_1: forward sentry; otype_23: forward inherit sentry; otype_45: backward sentry;
+ cs1_otype_0 = (rf_fullcap_a.otype == 3'h0);
+ cs1_otype_1 = rf_fullcap_a.perms[PERM_EX] & (rf_fullcap_a.otype == 3'h1); // fwd sentry
+ cs1_otype_45 = rf_fullcap_a.perms[PERM_EX] & ((rf_fullcap_a.otype == 3'h4) || (rf_fullcap_a.otype == 3'h5));
+ cs1_otype_23 = rf_fullcap_a.perms[PERM_EX] & ((rf_fullcap_a.otype == 3'h2) || (rf_fullcap_a.otype == 3'h3));
+
+ cs2_otype_45 = rf_fullcap_b.perms[PERM_EX] & ((rf_fullcap_b.otype == 3'h4) || (rf_fullcap_b.otype == 3'h5));
+
+ // note cseal/unseal/cis_subject doesn't generate exceptions,
+ // so for all exceptions, violations can always be attributed to cs1, thus no need to further split
+ // exceptions based on source operands.
+ if (is_load_cap) begin
+ perm_vio_vec[PVIO_TAG] = ~rf_fullcap_a.valid;
+ perm_vio_vec[PVIO_SEAL] = is_cap_sealed(rf_fullcap_a);
+ perm_vio_vec[PVIO_LD] = ~(rf_fullcap_a.perms[PERM_LD]);
+ perm_vio_vec[PVIO_ALIGN] = (cheri_ls_chkaddr[2:0] != 0);
+ end else if (is_store_cap) begin
+ perm_vio_vec[PVIO_TAG] = (~rf_fullcap_a.valid);
+ perm_vio_vec[PVIO_SEAL] = is_cap_sealed(rf_fullcap_a);
+ perm_vio_vec[PVIO_SD] = ~rf_fullcap_a.perms[PERM_SD];
+ perm_vio_vec[PVIO_SC] = (~rf_fullcap_a.perms[PERM_MC] && rf_fullcap_b.valid);
+ perm_vio_vec[PVIO_ALIGN] = (cheri_ls_chkaddr[2:0] != 0);
+ perm_vio_slc = ~rf_fullcap_a.perms[PERM_SL] && rf_fullcap_b.valid &&
+ (~rf_fullcap_b.perms[PERM_GL]) ;
+ end else if (cheri_operator_i[CSEAL]) begin
+ cs2_bad_type = rf_fullcap_a.perms[PERM_EX] ?
+ ((rf_rdata_b[31:3]!=0)||(rf_rdata_b[2:0]==0)) :
+ ((|rf_rdata_b[31:4]) || (rf_rdata_b[3:0] <= 8));
+ // cs2.addr check : ex: 0-7, non-ex: 9-15
+ perm_vio_vec[PVIO_TAG] = ~rf_fullcap_b.valid;
+ perm_vio_vec[PVIO_SEAL] = is_cap_sealed(rf_fullcap_a) || is_cap_sealed(rf_fullcap_b) ||
+ (~rf_fullcap_b.perms[PERM_SE]) || cs2_bad_type;
+ end else if (cheri_operator_i[CUNSEAL]) begin
+ perm_vio_vec[PVIO_TAG] = ~rf_fullcap_b.valid;
+ perm_vio_vec[PVIO_SEAL] = (~is_cap_sealed(rf_fullcap_a)) || is_cap_sealed(rf_fullcap_b) ||
+ (~rf_fullcap_b.perms[PERM_US]);
+ end else if (cheri_operator_i[CJALR]) begin
+ perm_vio_vec[PVIO_TAG] = ~rf_fullcap_a.valid;
+ perm_vio_vec[PVIO_SEAL] = (is_cap_sealed(rf_fullcap_a) && (cheri_imm12_i != 0)) ||
+ ~(((rf_waddr_i == 0) && (rf_raddr_a_i == 5'h1) && cs1_otype_45) ||
+ ((rf_waddr_i == 0) && (rf_raddr_a_i != 5'h1) && (cs1_otype_0 || cs1_otype_1)) ||
+ ((rf_waddr_i == 5'h1) && (cs1_otype_0 | cs1_otype_23)) ||
+ ((rf_waddr_i != 0) && (cs1_otype_0 | cs1_otype_1)));
+
+ perm_vio_vec[PVIO_EX] = ~rf_fullcap_a.perms[PERM_EX];
+ end else if (cheri_operator_i[CCSR_RW]) begin
+ perm_vio_vec[PVIO_ASR] = ~pcc_cap_i.perms[PERM_SR];
+ illegal_scr_addr = ~debug_mode_i & (csr_addr_o < 27);
+ end else begin
+ perm_vio_vec = 0;
+ end
+
+ perm_vio = | perm_vio_vec;
+
+ end
+
+ // qualified by lsu_req later
+ // store_local error only causes tag clearing unless escalated to fault for debugging
+ assign cheri_lsu_err = cheri_pmode_i & ~debug_mode_i &
+ (addr_bound_vio | perm_vio | (csr_dbg_tclr_fault_i & perm_vio_slc));
+
+ //
+ // fault case mtval generation
+ // report to csr as mtval
+ logic ls_addr_misaligned_only;
+
+ assign cheri_ex_err_info_o = 12'h0; // no ex stage cheri error currently
+ assign cheri_wb_err_info_o = cheri_wb_err_info_q;
+
+ assign cheri_wb_err_d = cheri_wb_err_raw & cheri_exec_id_i & cheri_ex_valid_raw & ~debug_mode_i;
+
+ // addr_bound_vio is the timing optimized version (gating data_req)
+ // However we need to generate full version of addr_bound_vio to match the sail exception
+ // priority definition (bound_vio has higher priority over alignment_error).
+ // this has less timing impact since it goes to a flop stage
+ logic addr_bound_vio_ext;
+ logic [32:0] cheri_top_chkaddr_ext;
+
+ assign cheri_top_chkaddr_ext = cheri_ls_chkaddr + 8; // extend to 33 bit for compare
+ assign addr_bound_vio_ext = is_cap ? addr_bound_vio | (cheri_top_chkaddr_ext > rf_fullcap_a.top33) :
+ addr_bound_vio;
+
+ always_comb begin : err_cause_comb
+ cheri_err_cause = vio_cause_enc(addr_bound_vio_ext, perm_vio_vec);
+ rv32_err_cause = vio_cause_enc(addr_bound_vio_rv32, perm_vio_vec_rv32);
+
+
+ ls_addr_misaligned_only = perm_vio_vec[PVIO_ALIGN] && (perm_vio_vec[PVIO_ALIGN-1:0] == 0) && ~addr_bound_vio_ext;
+
+ // cheri_wb_err_raw is already qualified by instr
+ // bit 15:13: reserved
+ // bit 12: illegal_scr_addr
+ // bit 11: alignment error (load/store)
+ // bit 10:0 mtval as defined by CHERIoT arch spec
+ if (cheri_operator_i[CCSR_RW] & cheri_wb_err_raw & illegal_scr_addr & cheri_exec_id_i)
+ // cspecialrw trap, illegal addr, treated as illegal_insn
+ cheri_wb_err_info_d = {3'h0, 1'b1, 12'h0};
+ else if (cheri_operator_i[CCSR_RW] & cheri_wb_err_raw & cheri_exec_id_i)
+ // cspecialrw traps, PERM_SR
+ cheri_wb_err_info_d = {5'h0, 1'b1, cheri_cs2_dec_i, cheri_err_cause};
+ else if (cheri_wb_err_raw & cheri_exec_id_i)
+ cheri_wb_err_info_d = {5'h0, 1'b0, rf_raddr_a_i, cheri_err_cause};
+ else if ((is_load_cap | is_store_cap) & cheri_lsu_err & cheri_exec_id_i)
+ cheri_wb_err_info_d = {4'h0, ls_addr_misaligned_only, 1'b0, rf_raddr_a_i, cheri_err_cause};
+ else if (rv32_lsu_req_i & rv32_lsu_err)
+ cheri_wb_err_info_d = {5'h0, 1'b0, rf_raddr_a_i, rv32_err_cause};
+ else
+ cheri_wb_err_info_d = cheri_wb_err_info_q;
+ end
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ cheri_wb_err_q <= 1'b0;
+ cheri_wb_err_info_q <= 'h0;
+ end else begin
+ // Simple flop here works since
+ // -- cheri_wb_err is gated by cheri_exec_id/ex_valid
+ // -- all non-load/store cheriot instructions that can generate exceptions
+ // only takes 1 cycle in ID/EX stage
+ // -- faulted non-load/store instruction can only stay 1 cycle in wb_stage
+ cheri_wb_err_q <= cheri_wb_err_d;
+ cheri_wb_err_info_q <= cheri_wb_err_info_d;
+ end
+ end
+
+ //
+ // muxing in cheri LSU signals with the rv32 signals
+ //
+ assign lsu_req_o = (instr_is_cheri_i ? cheri_lsu_req : rv32_lsu_req_i);
+ assign cpu_lsu_dec_o = ((instr_is_cheri_i && is_cap) | instr_is_rv32lsu_i);
+
+
+ assign cpu_lsu_cheri_err = instr_is_cheri_i ? cheri_lsu_err : rv32_lsu_err;
+ assign cpu_lsu_addr = instr_is_cheri_i ? cheri_lsu_addr : rv32_lsu_addr_i;
+ assign cpu_lsu_we = instr_is_cheri_i ? cheri_lsu_we : rv32_lsu_we_i;
+ assign cpu_lsu_wdata = instr_is_cheri_i ? cheri_lsu_wdata : {1'b0, rv32_lsu_wdata_i};
+ assign cpu_lsu_is_cap = instr_is_cheri_i & cheri_lsu_is_cap;
+
+ // muxing tbre ctrl inputs and CPU ctrl inputs
+
+ assign lsu_cheri_err_o = ~lsu_tbre_sel_i ? cpu_lsu_cheri_err : 1'b0;
+ assign lsu_we_o = ~lsu_tbre_sel_i ? cpu_lsu_we : tbre_lsu_we_i;
+ assign lsu_addr_o = ~lsu_tbre_sel_i ? cpu_lsu_addr : tbre_lsu_addr_i;
+ assign lsu_wdata_o = ~lsu_tbre_sel_i ? cpu_lsu_wdata : tbre_lsu_wdata_i;
+ assign lsu_is_cap_o = ~lsu_tbre_sel_i ? cpu_lsu_is_cap : tbre_lsu_is_cap_i;
+
+ assign lsu_lc_clrperm_o = (~lsu_tbre_sel_i & instr_is_cheri_i) ? cheri_lsu_lc_clrperm : 0;
+ assign lsu_type_o = (~lsu_tbre_sel_i & ~instr_is_cheri_i) ? rv32_lsu_type_i : 2'b00;
+ assign lsu_wcap_o = (~lsu_tbre_sel_i & instr_is_cheri_i) ? cheri_lsu_wcap : NULL_REG_CAP;
+ assign lsu_sign_ext_o = (~lsu_tbre_sel_i & ~instr_is_cheri_i) ? rv32_lsu_sign_ext_i : 1'b0;
+
+
+ // rv32 core side signals
+ // request phase: be nice and mux using the current EX instruction to select
+
+ // addr_incr:
+ // -- must qualify addr_incr otherwise it goes to ALU and mess up non-LSU instructions
+ // -- however for LEC to gate this with cheri_pmode, otherwise illegal_insn will feed into addr logic
+ // since illegal_insn goes into instr_is_rv32lsu
+ // assign rv32_addr_incr_req_o = instr_is_rv32lsu_i ? addr_incr_req_i : 1'b0; // original
+ assign rv32_addr_incr_req_o = (~cheri_pmode_i | instr_is_rv32lsu_i) ? addr_incr_req_i : 1'b0;
+
+ assign rv32_addr_last_o = addr_last_i;
+
+ // req_done, resp_valid, load/store_err will be directly from LSU
+
+ //
+ // Stack high watermark CSR update
+ //
+
+ // Notes,
+ // - this should also take care of unaligned access (which increases addr only)
+ // (although stack access should not have any)
+ // - it's also ok if the prev instr gets faulted in WB, since stall_mem/data_req_allowed logic ensures
+ // that lsu_req won't be issued till memory response/error comes back
+ // - what if the instruction gets faulted later in WB stage? Also fine since worst case even if HM is
+ // too aggressive we will just have to spend more time zeroing out more stack area.
+
+ assign csr_mshwm_set_o = lsu_req_o & ~lsu_cheri_err_o & lsu_we_o &
+ (lsu_addr_o[31:4] >= csr_mshwmb_i[31:4]) & (lsu_addr_o[31:4] < csr_mshwm_i[31:4]);
+ assign csr_mshwm_new_o = {lsu_addr_o[31:4], 4'h0};
+
+
+ //
+ // Stack fast clearing support
+ //
+
+ if (CheriStkZ) begin
+ logic lsu_addr_in_stkz_range, stkz_stall_q;
+
+ assign lsu_addr_in_stkz_range = cpu_lsu_dec_o && (cpu_lsu_addr[31:4] >= stkz_base_i[31:4]) &&
+ (cpu_lsu_addr[31:2] < stkz_ptr_i[31:2]);
+
+ // cpu_lsu_dec_o is meant to be an early hint to help LSU to generate mux selects for
+ // address/ctrl/wdata (eventually to help timing on those output ports)
+ // - we always suppress lsu_req if stkclr active and address-in-range (to be cleared)
+ // - however in the first cycle we speculatively still assert cpu_lsu_dec_o to let LSU choose
+ // the address from cpu core (and hold back stkz/tbre_req). In the next cycle we can deassert
+ // cpu_lsu_dec_o to let stkz/tbre_req go through
+ // - we also require that lsu_req (after gated by cpu_stkz_stall0) can only go from 0 to 1
+ // once in an instruction cycle. It's satisfied b/c,
+ // -- Note stkz_active_i is asserted synchronously by writing to the new stkz_ptr CSR.
+ // As such it is not possible for active to go from '0' to '1' in the middle of an
+ // load/store instruction when we want to keep lsu_req high while waiting for lsu_req_done
+ // -- Also, since the cpu_lsu_addr only increments (clc/csc/unaligned) and stkz address
+ // only decrements, if lsu_addr_in_range = 0 for the 1st word, it will stay 0 for 2nd
+ // -- Need to ensure stkz design meet those requirements
+ assign cpu_stall_by_stkz_o = stkz_active_i & lsu_addr_in_stkz_range;
+ assign cpu_grant_to_stkz_o = ~instr_first_cycle_i & stkz_stall_q;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ stkz_stall_q <= 1'b0;
+ end else begin
+ stkz_stall_q <= stkz_active_i & lsu_addr_in_stkz_range;
+ end
+ end
+
+ end else begin
+ assign cpu_stall_by_stkz_o = 1'b0;
+ assign cpu_grant_to_stkz_o = 1'b0;
+ end
+
+ //
+ // debug signal for FPGA only
+ //
+ logic [15:0] dbg_status;
+ logic [66:0] dbg_cs1_vec, dbg_cs2_vec, dbg_cd_vec;
+
+ assign dbg_status = {4'h0,
+ instr_is_rv32lsu_i, rv32_lsu_req_i, rv32_lsu_we_i, rv32_lsu_err,
+ cheri_exec_id_i, cheri_lsu_err, rf_fullcap_a.valid, result_cap_o.valid,
+ addr_bound_vio, perm_vio, addr_bound_vio_rv32, perm_vio_rv32};
+
+ assign dbg_cs1_vec = {rf_fullcap_a.top_cor, rf_fullcap_a.base_cor, // 66:64
+ rf_fullcap_a.exp, // 63:59
+ rf_fullcap_a.top, rf_fullcap_a.base, // 58:41
+ rf_fullcap_a.otype, rf_fullcap_a.cperms, // 40:32
+ rf_rdata_a}; // 31:0
+
+ assign dbg_cs2_vec = {rf_fullcap_b.top_cor, rf_fullcap_b.base_cor, // 66:64
+ rf_fullcap_b.exp, // 63:59
+ rf_fullcap_b.top, rf_fullcap_b.base, // 58:41
+ rf_fullcap_b.otype, rf_fullcap_b.cperms, // 40:32
+ rf_rdata_b}; // 31:0
+
+ assign dbg_cd_vec = {result_cap_o.top_cor, result_cap_o.base_cor, // 66:64
+ result_cap_o.exp, // 63:59
+ result_cap_o.top, result_cap_o.base, // 58:41
+ result_cap_o.otype, result_cap_o.cperms, // 40:32
+ result_data_o}; // 31:0
+
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheri_pkg.sv b/hw/ip/cheriot-ibex/rtl/cheri_pkg.sv
new file mode 100644
index 0000000..186ce55
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheri_pkg.sv
@@ -0,0 +1,1247 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+package cheri_pkg;
+
+ // bit field widths
+ parameter int unsigned ADDR_W = 32;
+ parameter int unsigned TOP_W = 9;
+ parameter int unsigned TOP8_W = 8; // IT8 encoding only
+ parameter int unsigned BOT_W = 9;
+ parameter int unsigned CEXP_W = 4;
+ parameter int unsigned CEXP5_W = 5; // IT8 encoding only
+ parameter int unsigned EXP_W = 5;
+ parameter int unsigned OTYPE_W = 3;
+ parameter int unsigned CPERMS_W = 6;
+ parameter int unsigned PERMS_W = 12;
+
+ parameter int unsigned REGCAP_W = 37;
+
+ parameter bit [4:0] RESETEXP = 24;
+ parameter int unsigned UPPER_W = 24;
+ parameter bit [4:0] RESETCEXP = 15; // only used in non-IT8 encoding
+
+ // bit index of PERMS field
+ // U0 SE US EX SR MC LD SL LM SD LG GL
+ parameter int unsigned PERM_GL = 0; // global flag
+ parameter int unsigned PERM_LG = 1; // load global
+ parameter int unsigned PERM_SD = 2; // store
+ parameter int unsigned PERM_LM = 3; // load mutable
+ parameter int unsigned PERM_SL = 4; // store local
+ parameter int unsigned PERM_LD = 5; // load
+ parameter int unsigned PERM_MC = 6; // capability load/store
+ parameter int unsigned PERM_SR = 7; // access system registes
+ parameter int unsigned PERM_EX = 8; // execution
+ parameter int unsigned PERM_US = 9; // unseal
+ parameter int unsigned PERM_SE = 10; // seal
+ parameter int unsigned PERM_U0 = 11; //
+
+ parameter logic [2:0] OTYPE_SENTRY_IE_BKWD = 3'd5;
+ parameter logic [2:0] OTYPE_SENTRY_ID_BKWD = 3'd4;
+ parameter logic [2:0] OTYPE_SENTRY_IE_FWD = 3'd3;
+ parameter logic [2:0] OTYPE_SENTRY_ID_FWD = 3'd2;
+ parameter logic [2:0] OTYPE_SENTRY = 3'd1;
+ parameter logic [2:0] OTYPE_UNSEALED = 3'd0;
+
+ // Compressed (regFile) capability type
+ typedef struct packed {
+ logic valid;
+ logic [1:0] top_cor;
+ logic base_cor;
+ logic [EXP_W-1 :0] exp; // expanded
+ logic [TOP_W-1 :0] top;
+ logic [BOT_W-1 :0] base;
+ logic [OTYPE_W-1 :0] otype;
+ logic [CPERMS_W-1:0] cperms;
+ logic rsvd;
+ } reg_cap_t;
+
+ typedef struct packed {
+ logic valid;
+ logic [EXP_W-1 :0] exp; // expanded
+ logic [ADDR_W :0] top33;
+ logic [ADDR_W-1 :0] base32;
+ logic [OTYPE_W-1 :0] otype;
+ logic [PERMS_W-1: 0] perms;
+ logic [1:0] top_cor;
+ logic base_cor;
+ logic [TOP_W-1 :0] top;
+ logic [BOT_W-1 :0] base;
+ logic [CPERMS_W-1:0] cperms;
+ logic [31:0] maska;
+ logic rsvd;
+ logic [31:0] rlen;
+ } full_cap_t;
+
+ typedef struct packed {
+ logic valid;
+ logic [EXP_W-1 :0] exp; // expanded
+ logic [ADDR_W :0] top33;
+ logic [ADDR_W-1 :0] base32;
+ logic [OTYPE_W-1 :0] otype;
+ logic [PERMS_W-1: 0] perms;
+ logic [CPERMS_W-1:0] cperms;
+ logic rsvd;
+ } pcc_cap_t;
+
+ typedef struct packed {
+ logic [32:0] top33req;
+ logic [EXP_W-1:0] exp1;
+ logic [EXP_W-1:0] exp2;
+ logic [EXP_W:0] explen;
+ logic [EXP_W:0] expb; // this can be 32 so must be 6-bit
+ logic in_bound;
+ } bound_req_t;
+
+ parameter reg_cap_t NULL_REG_CAP = '{0, 0, 0, 0, 0, 0, 0, 0, 0};
+ parameter full_cap_t NULL_FULL_CAP = '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ parameter pcc_cap_t NULL_PCC_CAP = '{0, 0, 0, 0, 0, 0, 0, 0};
+
+ parameter logic [5:0] CPERMS_TX = 6'b101111; // Tx (execution root)
+ parameter logic [5:0] CPERMS_TM = 6'b111111; // Tm (memory data root)
+ parameter logic [5:0] CPERMS_TS = 6'b100111; // Tx (seal root)
+
+ parameter pcc_cap_t PCC_RESET_CAP = '{1'b1, RESETEXP, 33'h10000_0000, 0, OTYPE_UNSEALED, 13'h1eb, CPERMS_TX, 1'b0}; // Tx (execution root)
+
+ parameter reg_cap_t MTVEC_RESET_CAP = '{1'b1, 0, 0, RESETEXP, 9'h100, 0, OTYPE_UNSEALED, CPERMS_TX, 1'b0}; // Tx (execution root)
+ parameter reg_cap_t MTDC_RESET_CAP = '{1'b1, 0, 0, RESETEXP, 9'h100, 0, OTYPE_UNSEALED, CPERMS_TM, 1'b0}; // Tm
+ parameter reg_cap_t MEPC_RESET_CAP = '{1'b1, 0, 0, RESETEXP, 9'h100, 0, OTYPE_UNSEALED, CPERMS_TX, 1'b0}; // Tx
+ parameter reg_cap_t MSCRATCHC_RESET_CAP = '{1'b1, 0, 0, RESETEXP, 9'h100, 0, OTYPE_UNSEALED, CPERMS_TS, 1'b0}; // Ts
+
+
+ parameter logic [PERMS_W-1: 0] PERM_MC_IMSK = (1<<PERM_LD) | (1<<PERM_MC) | (1<<PERM_SD);
+ parameter logic [PERMS_W-1: 0] PERM_LC_IMSK = (1<<PERM_LD) | (1<<PERM_MC);
+ parameter logic [PERMS_W-1: 0] PERM_SC_IMSK = (1<<PERM_SD) | (1<<PERM_MC);
+ parameter logic [PERMS_W-1: 0] PERM_DD_IMSK = 0;
+ parameter logic [PERMS_W-1: 0] PERM_EX_IMSK = (1<<PERM_EX) | (1<<PERM_MC) | (1<<PERM_LD);
+ parameter logic [PERMS_W-1: 0] PERM_SE_IMSK = 0;
+
+ // expand the perms from memory representation
+ function automatic logic [PERMS_W-1:0] expand_perms(logic [CPERMS_W-1:0] cperms);
+ logic [PERMS_W-1:0] perms;
+
+ perms = 0;
+ perms[PERM_GL] = cperms[5];
+
+ if (cperms[4:3] == 2'b11) begin
+ perms[PERM_LG] = cperms[0];
+ perms[PERM_LM] = cperms[1];
+ perms[PERM_SL] = cperms[2];
+ perms = perms | PERM_MC_IMSK;
+ end else if (cperms[4:2] == 3'b101) begin
+ perms[PERM_LG] = cperms[0];
+ perms[PERM_LM] = cperms[1];
+ perms = perms | PERM_LC_IMSK;
+ end else if (cperms[4:0] == 5'b10000) begin
+ perms = perms | PERM_SC_IMSK;
+ end else if (cperms[4:2] == 3'b100) begin
+ perms[PERM_SD] = cperms[0];
+ perms[PERM_LD] = cperms[1];
+ perms = perms | PERM_DD_IMSK;
+ end else if (cperms[4:3] == 2'b01) begin
+ perms[PERM_LG] = cperms[0];
+ perms[PERM_LM] = cperms[1];
+ perms[PERM_SR] = cperms[2];
+ perms = perms | PERM_EX_IMSK;
+ end else if (cperms[4:3] == 2'b00) begin
+ perms[PERM_US] = cperms[0];
+ perms[PERM_SE] = cperms[1];
+ perms[PERM_U0] = cperms[2];
+ perms = perms | PERM_SE_IMSK;
+ end
+
+ return perms;
+ endfunction
+
+ // test the implict permission mask (any bits not 1?)
+ `define TEST_IMSK(P, M) (&((P) | ~(M)))
+
+ // compress perms field to memory representation
+ function automatic logic [CPERMS_W-1:0] compress_perms (logic [PERMS_W-1:0] perms, logic [1:0] unused_qqq); // unused_qqq is a place holder, just to compatible with the old encoding for now.
+ logic [CPERMS_W-1:0] cperms;
+
+ // test all types encoding and determine encoding (Robert's priority order)
+ // Encoding explicit bits based on type
+ cperms = 0;
+ cperms[5] = perms[PERM_GL];
+
+ if (`TEST_IMSK(perms, PERM_EX_IMSK)) begin
+ cperms[0] = perms[PERM_LG];
+ cperms[1] = perms[PERM_LM];
+ cperms[2] = perms[PERM_SR];
+ cperms[4:3] = 2'b01;
+ end else if (`TEST_IMSK(perms, PERM_MC_IMSK)) begin
+ cperms[0] = perms[PERM_LG];
+ cperms[1] = perms[PERM_LM];
+ cperms[2] = perms[PERM_SL];
+ cperms[4:3] = 2'b11;
+ end else if (`TEST_IMSK(perms, PERM_LC_IMSK)) begin
+ cperms[0] = perms[PERM_LG];
+ cperms[1] = perms[PERM_LM];
+ cperms[4:2] = 3'b101;
+ end else if (`TEST_IMSK(perms, PERM_SC_IMSK)) begin
+ cperms[4:0] = 5'b10000;
+ end else if (perms[PERM_SD] | perms[PERM_LD]) begin
+ cperms[0] = perms[PERM_SD];
+ cperms[1] = perms[PERM_LD];
+ cperms[4:2] = 3'b100;
+ end else begin
+ cperms[0] = perms[PERM_US];
+ cperms[1] = perms[PERM_SE];
+ cperms[2] = perms[PERM_U0];
+ cperms[4:3] = 2'b00;
+ end
+
+ //$display("-------compress_perms:%t: %x - %x", $time, perms, cperms);
+ return cperms;
+ endfunction
+
+ // handling cperms in loaded cap based on the loading cap requirment
+ function automatic logic [CPERMS_W-1:0] mask_clcperms (logic [CPERMS_W-1:0] cperms_in, logic [3:0] clrperm,
+ logic valid_in, logic sealed);
+ logic [CPERMS_W-1:0] cperms_out;
+ logic clr_gl, clr_lg, clr_sdlm;
+
+ clr_gl = clrperm[0] & valid_in;
+ clr_lg = clrperm[0] & valid_in & ~sealed;
+ clr_sdlm = clrperm[1] & valid_in & ~sealed; // only clear SD/LM if not sealed
+
+ cperms_out = cperms_in;
+ cperms_out[5] = cperms_in[5] & ~clr_gl; // GL
+
+ if (cperms_in[4:3] == 2'b11) begin
+ cperms_out[0] = cperms_in[0] & ~clr_lg; // LG
+ cperms_out[1] = cperms_in[1] & ~clr_sdlm; // LM
+ cperms_out[4:2] = clr_sdlm ? 3'b101 : cperms_in[4:2];
+ end else if (cperms_in[4:2] == 3'b101) begin
+ cperms_out[0] = cperms_in[0] & ~clr_lg; // LG
+ cperms_out[1] = cperms_in[1] & ~clr_sdlm; // LM
+ end else if (cperms_in[4:0] == 5'b10000) begin
+ cperms_out[4:0] = clr_sdlm? 5'h0 : cperms_in[4:0]; // clear SD will results in NULL permission
+ end else if (cperms_in[4:2] == 3'b100) begin
+ cperms_out[4] = ~(clr_sdlm & ~cperms_in[1]); // must decode to 5'h0 if both ld/sd are 0.
+ cperms_out[0] = cperms_in[0] & ~clr_sdlm;
+ end else if (cperms_in[4:3] == 2'b01) begin
+ cperms_out[0] = cperms_in[0] & ~clr_lg; // LG
+ cperms_out[1] = cperms_in[1] & ~clr_sdlm; // LM
+ end
+
+ return cperms_out;
+ endfunction
+
+ // caculate length (mem size) in bytes of a capability
+ function automatic logic[31:0] get_cap_len (full_cap_t full_cap);
+ logic [32:0] tmp33;
+ logic [31:0] result;
+
+ tmp33 = full_cap.top33 - full_cap.base32;
+ result = tmp33[32]? 32'hffff_ffff: tmp33[31:0];
+
+ return result;
+ endfunction
+
+ // obtain 32-bit representation of top
+ function automatic logic[32:0] get_bound33(logic [TOP_W-1:0] top, logic [1:0] cor,
+ logic [EXP_W-1:0] exp, logic [31:0] addr);
+ logic [32:0] t1, t2, mask, cor_val;
+
+ if (cor[1])
+ cor_val = {33{cor[1]}}; // negative sign extension
+ else
+ cor_val = {32'h0, (~cor[1]) & cor[0]};
+
+ cor_val = (cor_val << exp) << TOP_W;
+ mask = (33'h1_ffff_ffff << exp) << TOP_W;
+
+ t1 = ({1'b0, addr} & mask) + cor_val; // apply correction and truncate
+//$display("gb33: corval=%09x, mask=%09x, t1=%09x", cor_val, mask, t1);
+ t2 = {24'h0, top}; // extend to 32 bit
+ t1 = t1 | (t2 << exp);
+
+ return t1;
+
+ endfunction
+
+ // this implementation give slightly better timing/area results
+ function automatic logic[32:0] get_bound33_trial(logic [TOP_W-1:0] top, logic [1:0] cor,
+ logic [EXP_W-1:0] exp, logic [31:0] addr);
+ logic [32:0] t33a, t33b, result;
+ logic [23:0] t24a, t24b, mask24, cor24;
+
+ if (cor[1])
+ cor24 = {24{cor[1]}}; // negative sign extension
+ else
+ cor24 = {23'h0, (~cor[1]) & cor[0]};
+
+ cor24 = (cor24 << exp);
+ mask24 = {24{1'b1}} << exp;
+
+ t24a = ({1'b0, addr[31:9]} & mask24) + cor24; // apply correction and truncate
+//$display("gb33: corval=%09x, mask=%09x, t1=%09x", cor_val, mask, t1);
+ t33a = {24'h0, top};
+ result = {t24a, 9'h0} | (t33a << exp);
+
+ return result;
+
+ endfunction
+
+ // update the top/base correction for a cap
+ function automatic logic [2:0] update_temp_fields(logic [TOP_W-1:0] top, logic [BOT_W-1:0] base,
+ logic [BOT_W-1:0] addrmi);
+ logic top_hi, addr_hi;
+ logic [2:0] res3;
+
+ top_hi = (top < base);
+ addr_hi = (addrmi < base);
+
+ // top_cor
+ res3[2:1] = (top_hi == addr_hi)? 2'b00 : ((top_hi && (!addr_hi))? 2'b01 : 2'b11);
+
+ // base_cor
+ res3[0] = (addr_hi) ? 1 : 0;
+
+ return res3;
+ endfunction
+
+ // set address of a capability
+ // by default we check for representability only.
+ // use checktop/checkbase to check explicitly against top33/base32 bounds (pcc updates)
+ // * note, representability check in most cases (other than exp=24) covers the base32 check
+
+ function automatic full_cap_t set_address (full_cap_t in_cap, logic [31:0] newptr, logic chktop, logic chkbase);
+ full_cap_t out_cap;
+ logic [32:0] tmp33;
+ logic [32-TOP_W:0] tmp24, mask24;
+ logic [2:0] tmp3;
+ logic [BOT_W-1:0] ptrmi9;
+ logic top_lt;
+
+ out_cap = in_cap;
+ mask24 = {(33-TOP_W){1'b1}} << in_cap.exp; // mask24 = 0 if exp == 24
+
+ tmp33 = {1'b0, newptr} - {1'b0, in_cap.base32}; // extend to make sure we can see carry from MSB
+ tmp24 = tmp33[32:TOP_W] & mask24;
+ top_lt = ({1'b0, newptr} < {in_cap.top33[32:1], 1'b0});
+
+ if ((tmp24 != 0) || (chktop & ~top_lt) || (chkbase & tmp33[32]))
+ out_cap.valid = 1'b0;
+
+ ptrmi9 = BOT_W'(newptr >> in_cap.exp);
+ tmp3 = update_temp_fields(out_cap.top, out_cap.base, ptrmi9);
+ out_cap.top_cor = tmp3[2:1];
+ out_cap.base_cor = tmp3[0];
+
+ return out_cap;
+ endfunction
+
+ //
+ // utility functions
+ //
+
+ // return the size (bit length) of input number without leading zeros
+ function automatic logic [5:0] get_size(logic [31:0] din);
+ logic [5:0] count;
+ logic [31:0] a32;
+ int i;
+
+ a32 = {din[31], 31'h0};
+ for (i = 30; i >= 0; i--) a32[i] = a32[i+1] | din[i];
+ count = thermo_dec32(a32);
+
+ return count;
+ endfunction
+
+ // return the exp of a 32-bit input (by count trailing zeros)
+ function automatic logic [5:0] count_tz (logic [31:0] din);
+ logic [5:0] count;
+ logic [31:0] a32, b32;
+ int i;
+
+ a32 = {31'h0, din[0]};
+ for (i = 1; i < 32; i++) a32[i] = a32[i-1] | din[i];
+ // count = a32[31] ? thermo_dec32(~a32) : 0; // if input all zero, return 0
+ count = thermo_dec32(~a32); // if input all zero, return 32
+
+ return count;
+ endfunction
+
+ // this simply count the number of 1's in a thermoeter-encoded input vector
+ // (32-N zeros followed by N ones)
+ //
+ function automatic logic [5:0] thermo_dec32(logic [31:0] a32);
+ logic [5:0] count;
+ logic [31:0] b32;
+
+ if (a32[31]) count = 32;
+ else begin
+ count[5] = 1'b0;
+ count[4] = a32[15];
+ b32[15:0] = count[4] ? a32[31:16] : a32[15:0];
+ count[3] = b32[7];
+ b32[ 7:0] = count[3] ? b32[15:8] : b32[7:0];
+ count[2] = b32[3];
+ b32[ 3:0] = count[2] ? b32[7:4] : b32[3:0];
+ count[1] = b32[1];
+ b32[ 1:0] = count[1] ? b32[3:2] : b32[1:0];
+ count[0] = b32[0];
+ end
+
+ return count;
+ endfunction
+
+ // set bounds (top/base/exp/addr) of a capability
+
+ // break up into 2 parts to enable 2-cycle option
+ function automatic bound_req_t prep_bound_req (full_cap_t in_cap, logic [31:0] addr, logic [31:0] length);
+ bound_req_t result;
+ logic [5:0] size_result;
+
+ result.top33req = {1'b0, addr} + {1'b0, length}; // "requested" 33-bit top
+ result.expb = count_tz(addr);
+ result.explen = get_size({9'h0, length[31:9]}); // length exp without saturation
+
+ size_result = result.explen;
+ result.exp1 = (size_result >= 6'(RESETCEXP)) ? EXP_W'(RESETEXP) : EXP_W'(size_result);
+
+ size_result += 1;
+ result.exp2 = (size_result >= 6'(RESETCEXP)) ? EXP_W'(RESETEXP) : EXP_W'(size_result);
+
+ // move this to prep_bound_req to share with set_bounds_rndown
+ // should be ok to fit this in cycle 1 since it is a straight compare
+ result.in_bound = ~((result.top33req > in_cap.top33) || (addr < in_cap.base32));
+
+ return result;
+ endfunction
+
+ function automatic bound_req_t prep_bound_req_it8 (full_cap_t in_cap, logic [31:0] addr, logic [31:0] length); // IT8 encoding
+ bound_req_t result;
+ logic [4:0] size_result;
+ logic gt24;
+ logic [4:0] limit24_mask;
+
+ result.top33req = {1'b0, addr} + {1'b0, length}; // "requested" 33-bit top
+ result.expb = count_tz(addr);
+ result.explen = get_size({9'h0, length[31:9]}); // length exp without saturation, max 23
+
+ // since explen <= 23, exp1 and exp must be <= 24. No need for saturation logic
+ result.exp1 = result.explen;
+ result.exp2 = result.explen + 1;
+
+ // move this to prep_bound_req to share with set_bounds_rndown
+ // should be ok to fit this in cycle 1 since it is a straight compare
+ result.in_bound = ~((result.top33req > in_cap.top33) || (addr < in_cap.base32));
+
+ return result;
+ endfunction
+
+ function automatic full_cap_t set_bounds (full_cap_t in_cap, logic[31:0] addr,
+ bound_req_t bound_req, logic req_exact);
+ full_cap_t out_cap;
+
+ logic [EXP_W-1:0] exp1, exp2;
+ logic [32:0] top33req;
+ logic [BOT_W:0] base1, base2, top1, top2, len1, len2;
+ logic [32:0] mask1, mask2;
+ logic ovrflw, topoff1, topoff2, topoff;
+ logic baseoff1, baseoff2, baseoff;
+ logic tophi1, tophi2, tophi;
+ logic in_bound;
+
+ out_cap = in_cap;
+
+ top33req = bound_req.top33req;
+ exp1 = bound_req.exp1;
+ exp2 = bound_req.exp2;
+ in_bound = bound_req.in_bound;
+
+ // 1st path
+ mask1 = {33{1'b1}} << exp1;
+ base1 = (BOT_W+1)'(addr >> exp1);
+ topoff1 = |(top33req & ~mask1);
+ baseoff1 = |({1'b0, addr} & ~mask1);
+ top1 = (BOT_W+1)'(top33req >> exp1) + (BOT_W+1)'(topoff1);
+ len1 = top1 - base1;
+ tophi1 = (top1[8:0] >= base1[8:0]);
+
+ // overflow detection based on 1st path
+ ovrflw = len1[9];
+
+ // 2nd path in parallel
+ mask2 = {33{1'b1}} << exp2;
+ base2 = (BOT_W+1)'(addr >> exp2);
+ topoff2 = |(top33req & ~mask2);
+ baseoff2 = |({1'b0, addr} & ~mask2);
+ top2 = (BOT_W+1)'(top33req >> exp2) + (BOT_W+1)'(topoff2);
+ len2 = top2 - base2;
+ tophi2 = (top2[8:0] >= base2[8:0]);
+
+ // select results
+ if (~ovrflw) begin
+ out_cap.exp = exp1;
+ out_cap.top = top1[TOP_W-1:0];
+ out_cap.base = base1[BOT_W-1:0];
+ out_cap.maska = mask1[31:0];
+ out_cap.rlen = {22'h0, len1} << exp1;
+ topoff = topoff1;
+ baseoff = baseoff1;
+ tophi = tophi1;
+ end else begin
+ out_cap.exp = exp2;
+ out_cap.top = top2[TOP_W-1:0];
+ out_cap.base = base2[BOT_W-1:0];
+ out_cap.maska = mask2[31:0];
+ out_cap.rlen = {22'h0, len2} << exp2;
+ topoff = topoff2;
+ baseoff = baseoff2;
+ tophi = tophi2;
+ end
+
+`ifdef CHERI_PKG_DEBUG
+
+$display("--- set_bounds: exact = %x, ovrflw = %x, exp1 = %x, exp2 = %x, exp = %x, len = %x", ~(topoff|baseoff), ovrflw, exp1, exp2, out_cap.exp, out_cap.rlen);
+$display("--- set_bounds: b1 = %x, t1 = %x, b2 = %x, t2 = %x", base1, top1, base2, top2);
+`endif
+
+ // top/base correction values
+ // Note the new base == addr >> exp, so addr_hi == FALSE, thus base_cor == 0
+ // as such, top_cor can only be either either 0 or +1;
+ out_cap.top_cor = tophi ? 2'b00 : 2'b01;
+ out_cap.base_cor = 1'b0;
+
+ if (req_exact & (topoff | baseoff)) out_cap.valid = 1'b0;
+
+ // we used the "requested top" to verify the results against original bounds
+ // also compare address >= old base 32 to handle exp=24 case
+ // exp = 24 case: can have addr < base (not covered by representibility checking);
+ // other exp cases: always addr >= base when out_cap.tag == 1
+ if (~in_bound)
+ out_cap.valid = 1'b0;
+
+ return out_cap;
+ endfunction
+
+ function automatic full_cap_t set_bounds_rndn (full_cap_t in_cap, logic[31:0] addr,
+ bound_req_t bound_req);
+ full_cap_t out_cap;
+
+ logic [EXP_W:0] explen, expb, exp_final;
+ logic [32:0] top33req;
+ logic in_bound;
+ logic el_gt_eb, el_gt_14, eb_gt_14;
+ logic tophi;
+
+ out_cap = in_cap;
+
+ top33req = bound_req.top33req;
+ explen = bound_req.explen;
+ expb = bound_req.expb;
+ in_bound = bound_req.in_bound;
+
+ el_gt_eb = (explen > expb);
+ el_gt_14 = (explen > 14);
+ eb_gt_14 = (expb > 14);
+
+ // final exp = min(14, e_l, e_b)
+ exp_final = (el_gt_eb & !eb_gt_14) ? expb : (el_gt_14 ? 14 : explen);
+
+ // if (el_gt_eb & eb_gt_14) exp_final = 14; // min(14, min(e_l, e_b)), el > eb, eb > 14
+ // else if (el_gt_eb) exp_final = expb; // min(14, min(e_l, e_b)), el > eb, eb <= 14
+ // else if (el_gt_14) exp_final = 14; // min(14, min(e_l, e_b)), el <= eb, el > 14
+ // else exp_final = explen; // e_l, el <= eb, el <= 14
+
+ out_cap.exp = exp_final;
+ out_cap.base = (BOT_W)'(addr >> exp_final);
+
+ out_cap.top = (el_gt_eb | el_gt_14) ? ((BOT_W)'(out_cap.base-1)) :
+ ((BOT_W)'(top33req >> exp_final));
+
+ if (~in_bound) out_cap.valid = 1'b0;
+
+ // top/base correction values
+ // Note the new base == addr >> exp, so addr_hi == FALSE, thus base_cor == 0
+ // as such, top_cor can only be either either 0 or +1;
+ tophi = (out_cap.top >= out_cap.base);
+ out_cap.top_cor = tophi ? 2'b00 : 2'b01;
+ out_cap.base_cor = 2'b00;
+
+ return out_cap;
+ endfunction
+
+
+ function automatic full_cap_t set_bounds_rndn_it8 (full_cap_t in_cap, logic[31:0] addr, // IT8 encoding
+ bound_req_t bound_req);
+ full_cap_t out_cap;
+
+ logic [EXP_W:0] explen, expb, exp_final;
+ logic [32:0] top33req;
+ logic in_bound;
+ logic el_gt_eb;
+ logic tophi;
+
+ out_cap = in_cap;
+
+ top33req = bound_req.top33req;
+ explen = bound_req.explen;
+ expb = bound_req.expb;
+ in_bound = bound_req.in_bound;
+
+ el_gt_eb = (explen > expb);
+
+ exp_final = (el_gt_eb) ? expb : explen;
+
+ out_cap.exp = exp_final;
+ out_cap.base = (BOT_W)'(addr >> exp_final);
+
+ out_cap.top = (el_gt_eb) ? ((BOT_W)'(out_cap.base-1)) : ((BOT_W)'(top33req >> exp_final));
+
+ if (~in_bound) out_cap.valid = 1'b0;
+
+ // top/base correction values
+ // Note the new base == addr >> exp, so addr_hi == FALSE, thus base_cor == 0
+ // as such, top_cor can only be either either 0 or +1;
+ tophi = (out_cap.top >= out_cap.base);
+ out_cap.top_cor = tophi ? 2'b00 : 2'b01;
+ out_cap.base_cor = 2'b00;
+
+ return out_cap;
+ endfunction
+
+
+
+ // seal/unseal related functions
+ function automatic full_cap_t seal_cap (full_cap_t in_cap, logic [OTYPE_W-1:0] new_otype);
+ full_cap_t out_cap;
+
+ out_cap = in_cap;
+ out_cap.otype = new_otype;
+ return out_cap;
+ endfunction
+
+ function automatic full_cap_t unseal_cap (full_cap_t in_cap);
+ full_cap_t out_cap;
+ out_cap = in_cap;
+ out_cap.otype = OTYPE_UNSEALED;
+ return out_cap;
+ endfunction
+
+ function automatic logic is_cap_sealed (full_cap_t in_cap);
+ logic result;
+
+ result = (in_cap.otype != OTYPE_UNSEALED);
+ return result;
+ endfunction
+
+ //function automatic logic is_cap_sentry (full_cap_t in_cap);
+ // logic result;
+
+ // result = (in_cap.perms[PERM_EX]) && ((in_cap.otype == OTYPE_SENTRY) || (in_cap.otype == OTYPE_SENTRY_ID) ||
+ // (in_cap.otype == OTYPE_SENTRY_IE));
+ // return result;
+ //endfunction
+
+
+ function automatic logic [3:0] decode_otype (logic [2:0] otype3, logic perm_ex);
+ logic [3:0] otype4;
+
+ otype4 = {~perm_ex & (otype3 != 0), otype3};
+ return otype4;
+ endfunction
+
+ // reg_cap decompression (to full_cap)
+ function automatic full_cap_t reg2fullcap (reg_cap_t reg_cap, logic [31:0] addr);
+ full_cap_t full_cap;
+
+ full_cap.perms = expand_perms(reg_cap.cperms);
+ full_cap.valid = reg_cap.valid;
+ full_cap.exp = reg_cap.exp;
+ full_cap.otype = reg_cap.otype;
+ full_cap.top_cor = reg_cap.top_cor;
+ full_cap.base_cor = reg_cap.base_cor;
+ full_cap.top = reg_cap.top;
+ full_cap.base = reg_cap.base;
+ full_cap.cperms = reg_cap.cperms;
+ full_cap.rsvd = reg_cap.rsvd;
+
+ full_cap.top33 = get_bound33(reg_cap.top, reg_cap.top_cor, reg_cap.exp, addr);
+ full_cap.base32 = get_bound33(reg_cap.base, {2{reg_cap.base_cor}}, reg_cap.exp, addr);
+ // full_cap = update_bounds(full_cap, addr); // for some reason this increases area
+
+ full_cap.maska = 0;
+ full_cap.rlen = 0;
+
+ return full_cap;
+ endfunction
+
+ // full_cap compression (to reg_cap).
+ // note we don't recalculate top/base_cor here since the address/bounds of a capability
+ // won't change without an explicit instruction (only exception is PCC)
+ function automatic reg_cap_t full2regcap (full_cap_t full_cap);
+ reg_cap_t reg_cap;
+
+ reg_cap = NULL_REG_CAP;
+ reg_cap.valid = full_cap.valid;
+ reg_cap.top_cor = full_cap.top_cor;
+ reg_cap.base_cor = full_cap.base_cor;
+ reg_cap.exp = full_cap.exp;
+ reg_cap.top = full_cap.top;
+ reg_cap.base = full_cap.base;
+ reg_cap.cperms = full_cap.cperms;
+ reg_cap.rsvd = full_cap.rsvd;
+ reg_cap.otype = full_cap.otype;
+
+ return reg_cap;
+ endfunction
+
+ // pcc_cap expansion (to full_cap).
+ // -- pcc is a special case since the address (PC) moves around..
+ // so have to adjust correction factors and validate bounds here
+ // function automatic full_cap_t pcc2fullcap (pcc_cap_t pcc_cap, logic [31:0] pc_addr);
+ function automatic full_cap_t pcc2fullcap (pcc_cap_t in_pcap);
+ full_cap_t pcc_fullcap;
+
+ pcc_fullcap.valid = in_pcap.valid;
+ pcc_fullcap.exp = in_pcap.exp;
+ pcc_fullcap.top33 = in_pcap.top33;
+ pcc_fullcap.base32 = in_pcap.base32;
+ pcc_fullcap.otype = in_pcap.otype;
+ pcc_fullcap.perms = in_pcap.perms;
+ pcc_fullcap.top_cor = 2'b0; // will be updated by set_address()
+ pcc_fullcap.base_cor = 1'b0;
+ pcc_fullcap.top = TOP_W'(in_pcap.top33 >> (in_pcap.exp));
+ pcc_fullcap.base = BOT_W'(in_pcap.base32 >> (in_pcap.exp));
+ pcc_fullcap.cperms = in_pcap.cperms;
+ pcc_fullcap.maska = 0; // not used in pcc_cap
+ pcc_fullcap.rsvd = in_pcap.rsvd;
+ pcc_fullcap.rlen = 0; // not used in pcc_cap
+
+ return pcc_fullcap;
+ endfunction
+
+ // compress full_cap to pcc_cap
+ function automatic pcc_cap_t full2pcap (full_cap_t full_cap);
+ pcc_cap_t pcc_cap;
+
+ pcc_cap.valid = full_cap.valid;
+ pcc_cap.exp = full_cap.exp;
+ pcc_cap.top33 = full_cap.top33;
+ pcc_cap.base32 = full_cap.base32;
+ pcc_cap.otype = full_cap.otype;
+ pcc_cap.perms = full_cap.perms;
+ pcc_cap.cperms = full_cap.cperms;
+ pcc_cap.rsvd = full_cap.rsvd;
+
+ return pcc_cap;
+ endfunction
+
+ function automatic reg_cap_t pcc2mepcc (pcc_cap_t pcc_cap, logic [31:0] address, logic clrtag);
+ reg_cap_t reg_cap;
+ full_cap_t tfcap0, tfcap1;
+
+ tfcap0 = pcc2fullcap(pcc_cap);
+ // Still need representability check to cover save_pc_if and save_pc_wb cases
+ tfcap1 = set_address(tfcap0, address, 0, 0);
+
+ reg_cap = full2regcap(tfcap1);
+ if (clrtag) reg_cap.valid = 1'b0;
+
+ return reg_cap;
+ endfunction
+
+ //
+ // pack/unpack the cap+addr between reg and memory
+ // format 0: lsw32 = addr, msw33 = cap fields
+ //
+ // p’7 otype’3 E’4 B’9 T’9
+ localparam integer RSVD_LO = 31;
+ localparam integer CPERMS_LO = 25;
+ localparam integer OTYPE_LO = 22;
+ localparam integer CEXP_LO = 18;
+ localparam integer CEXP5_LO = 17; // IT8 encoding only
+ localparam integer TOP_LO = 9;
+ localparam integer BASE_LO = 0;
+
+ // mem2reg, cap meta data, original cap bound encoding, memfmt0
+ function automatic reg_cap_t mem2regcap_fmt0 (logic [32:0] msw, logic [32:0] addr33, logic [3:0] clrperm);
+ reg_cap_t regcap;
+ logic [EXP_W-1:0] tmp5;
+ logic [2:0] tmp3;
+ logic [CPERMS_W-1:0] cperms_mem;
+ logic [BOT_W-1:0] addrmi9;
+ logic sealed;
+ logic valid_in;
+
+ valid_in = msw[32] & addr33[32];
+ regcap.valid = valid_in & ~clrperm[3];
+
+ tmp5 = {1'b0, msw[CEXP_LO+:CEXP_W]};
+ if (tmp5 == EXP_W'(RESETCEXP)) tmp5 = RESETEXP;
+ regcap.exp = tmp5;
+
+ regcap.top = msw[TOP_LO+:TOP_W];
+ regcap.base = msw[BASE_LO+:BOT_W];
+ regcap.otype = msw[OTYPE_LO+:OTYPE_W];
+
+ sealed = (regcap.otype != OTYPE_UNSEALED);
+ cperms_mem = msw[CPERMS_LO+:CPERMS_W];
+ regcap.cperms = mask_clcperms(cperms_mem, clrperm, regcap.valid, sealed);
+ addrmi9 = BOT_W'({1'b0, addr33[31:0]} >> regcap.exp); // ignore the tag valid bit
+ tmp3 = update_temp_fields(regcap.top, regcap.base, addrmi9);
+ regcap.top_cor = tmp3[2:1];
+ regcap.base_cor = tmp3[0];
+
+ regcap.rsvd = msw[RSVD_LO];
+
+ return regcap;
+
+ endfunction
+
+ // mem2reg, cap meta data, IT8 encoding, memfmt0
+ function automatic reg_cap_t mem2regcap_it8_fmt0 (logic [32:0] msw, logic [32:0] addr33, logic [3:0] clrperm); // IT8
+ reg_cap_t regcap;
+ logic [EXP_W-1:0] cexp;
+ logic [TOP_W-2:0] top8, base8;
+ logic [TOP_W-1:0] top9, base9;
+ logic denorm, ltop, btop, ttop, tcin;
+ logic top_hi, addr_hi;
+ logic [2:0] res3;
+
+ logic [CPERMS_W-1:0] cperms_mem;
+ logic [BOT_W-1:0] addrmi9;
+ logic sealed;
+ logic valid_in;
+
+ valid_in = msw[32] & addr33[32];
+ regcap.valid = valid_in & ~clrperm[3];
+
+ cexp = msw[CEXP5_LO+:CEXP5_W];
+ denorm = (cexp == 0);
+
+ btop = msw[BASE_LO+BOT_W-1];
+ base8 = msw[BASE_LO+:(BOT_W-1)];
+ top8 = msw[TOP_LO+:(TOP_W-1)];
+
+ tcin = (top8 < base8); // can actually merge it with t_hi in update_temp_fields QQQ
+ ltop = ~denorm;
+ ttop = ltop ^ tcin ^ btop;
+
+ regcap.exp = cexp ^ {5{~denorm}}; // this is the ^0b11111 part
+ top9 = {ttop, top8};
+ base9 = {btop, base8};
+ regcap.top = top9;
+ regcap.base = base9;
+
+ regcap.otype = msw[OTYPE_LO+:OTYPE_W];
+
+ sealed = (regcap.otype != OTYPE_UNSEALED);
+ cperms_mem = msw[CPERMS_LO+:CPERMS_W];
+ regcap.cperms = mask_clcperms(cperms_mem, clrperm, regcap.valid, sealed);
+ addrmi9 = BOT_W'({1'b0, addr33[31:0]} >> regcap.exp); // ignore the tag valid bit
+
+ // update temp fields
+ // tmp3 = update_temp_fields(regcap.top, regcap.base, addrmi9);
+ // top_hi = (top < base);
+ top_hi = (btop ^ ttop) ? ~ttop : tcin;
+ addr_hi = (addrmi9 < base9);
+
+ regcap.top_cor = (top_hi == addr_hi)? 2'b00 : ((top_hi && (!addr_hi))? 2'b01 : 2'b11);
+ regcap.base_cor = (addr_hi) ? 1'b1 : 1'b0;
+
+ regcap.rsvd = msw[RSVD_LO];
+
+ return regcap;
+
+ endfunction
+
+ // reg to mem, meta data, original cap bound encoding, memfmt0
+ function automatic logic[32:0] reg2memcap_fmt0 (reg_cap_t regcap);
+
+ logic [32:0] msw;
+
+ msw[32] = regcap.valid ;
+
+ msw[CEXP_LO+:CEXP_W] = (regcap.exp == RESETEXP) ? RESETCEXP : regcap.exp[CEXP_W-1:0];
+ msw[TOP_LO+:TOP_W] = regcap.top ;
+ msw[BASE_LO+:BOT_W] = regcap.base ;
+ msw[OTYPE_LO+:OTYPE_W] = regcap.otype ;
+ msw[CPERMS_LO+:CPERMS_W] = regcap.cperms;
+ msw[RSVD_LO] = regcap.rsvd;
+
+ return msw;
+
+ endfunction
+
+ // reg to mem, meta data, IT8 encoding, memfmt0
+ function automatic logic[32:0] reg2memcap_it8_fmt0 (reg_cap_t regcap); // IT8
+
+ logic [32:0] msw;
+ logic denorm, ltop, cor;
+ logic [9:0] top10, base10, len10;
+
+ cor = (regcap.top_cor == {2{regcap.base_cor}});
+ top10 = {~cor, regcap.top};
+ base10 = {1'b0, regcap.base};
+ len10 = top10 - base10;
+ ltop = len10[9] | len10[8];
+
+ denorm = (regcap.exp == 0) && ~ltop;
+
+ msw[32] = regcap.valid;
+
+ msw[CEXP5_LO+:CEXP5_W] = regcap.exp ^ {5{~denorm}};
+ msw[TOP_LO+:(TOP_W-1)] = regcap.top[7:0];
+ msw[BASE_LO+:BOT_W] = regcap.base ;
+ msw[OTYPE_LO+:OTYPE_W] = regcap.otype ;
+ msw[CPERMS_LO+:CPERMS_W] = regcap.cperms;
+ msw[RSVD_LO] = regcap.rsvd;
+
+ return msw;
+
+ endfunction
+
+ //
+ // pack/unpack the cap+addr between reg and memory
+ // format 1: lsw32 = RSVD+EXP+T+B+A9, msw32 = CPERMS+OTYPE+A23
+ //
+
+ // mem to reg, meta data, original cap bound encoding, memfmt1
+ function automatic reg_cap_t mem2regcap_fmt1 (logic [32:0] msw, logic [32:0] lsw, logic [3:0] clrperm);
+ reg_cap_t regcap;
+ logic [2:0] tmp3;
+ logic sealed;
+ logic [8:0] addrmi9;
+ logic [CPERMS_W-1:0] cperms_mem;
+ logic valid_in;
+
+ // lsw is now EXP+B+T+A
+ valid_in = msw[32] & lsw[32];
+ regcap.valid = valid_in & ~clrperm[3];
+ regcap.exp = (lsw[30:27] == RESETCEXP) ? RESETEXP : {1'b0, lsw[30:27]};
+ regcap.base = lsw[26:18];
+ regcap.top = lsw[17:9];
+ addrmi9 = (lsw[30:27] == RESETCEXP) ? {1'b0, lsw[8:1]} : lsw[8:0];
+
+ regcap.otype = msw[25:23];
+ sealed = (regcap.otype != OTYPE_UNSEALED);
+
+ // cperms_mem = {lsw[31], msw[31:26]};
+ cperms_mem = msw[31:26];
+ regcap.cperms = mask_clcperms(cperms_mem, clrperm, regcap.valid, sealed);
+ regcap.rsvd = lsw[31];
+
+ tmp3 = update_temp_fields(regcap.top, regcap.base, addrmi9);
+ regcap.top_cor = tmp3[2:1];
+ regcap.base_cor = tmp3[0];
+
+ return regcap;
+
+ endfunction
+
+
+ // mem to reg, meta data, IT8 encoding, memfmt1
+ function automatic reg_cap_t mem2regcap_it8_fmt1 (logic [32:0] msw, logic [32:0] lsw, logic [3:0] clrperm); // xyz
+ reg_cap_t regcap;
+ logic [EXP_W-1:0] cexp;
+ logic [TOP_W-2:0] top8, base8;
+ logic [TOP_W-1:0] top9, base9;
+ logic denorm, ltop, btop, ttop, tcin;
+ logic top_hi, addr_hi;
+ logic [2:0] res3;
+
+ logic sealed;
+ logic [8:0] addrmi9;
+ logic [CPERMS_W-1:0] cperms_mem;
+ logic valid_in;
+
+
+ // lsw is now EXP+T+B+A
+ valid_in = msw[32] & lsw[32];
+ regcap.valid = valid_in & ~clrperm[3];
+
+ cexp = lsw[30:26];
+ denorm = (cexp == 0);
+
+ btop = lsw[17];
+ base8 = lsw[16:9];
+ top8 = lsw[25:18];
+
+ tcin = (top8 < base8); // can actually merge it with t_hi in update_temp_fields QQQ
+ ltop = ~denorm;
+ ttop = ltop ^ tcin ^ btop;
+
+ regcap.exp = cexp ^ {5{~denorm}}; // this is the ^0b11111 part
+ top9 = {ttop, top8};
+ base9 = {btop, base8};
+ regcap.top = top9;
+ regcap.base = base9;
+
+ // (regcap.exp >= RESETEXP);
+ addrmi9 = (regcap.exp[4] & regcap.exp[3]) ? {1'b0, lsw[8:1]} : lsw[8:0];
+
+ regcap.otype = msw[25:23];
+ sealed = (regcap.otype != OTYPE_UNSEALED);
+
+ // cperms_mem = {lsw[31], msw[31:26]};
+ cperms_mem = msw[31:26];
+ regcap.cperms = mask_clcperms(cperms_mem, clrperm, regcap.valid, sealed);
+ regcap.rsvd = lsw[31];
+
+ // tmp3 = update_temp_fields(regcap.top, regcap.base, addrmi9);
+ top_hi = (btop ^ ttop) ? ~ttop : tcin;
+ addr_hi = (addrmi9 < base9);
+
+ regcap.top_cor = (top_hi == addr_hi)? 2'b00 : ((top_hi && (!addr_hi))? 2'b01 : 2'b11);
+ regcap.base_cor = (addr_hi) ? 1'b1 : 1'b0;
+
+ return regcap;
+
+ endfunction
+
+ // mem to reg, addr32, both original and IT8 encoding, memfmt1
+ function automatic logic[32:0] mem2regaddr_fmt1 (logic [32:0] msw, logic [32:0] lsw, reg_cap_t regcap); // xyz
+ logic [32:0] addr33;
+ logic [31:0] addrmi, addrhi, addrlo;
+ logic [31:0] mask1, mask2;
+
+ // (regcap.exp >= RESETEXP)
+ if (regcap.exp[4] & regcap.exp[3]) begin
+ addrhi = 32'h0;
+ addrmi = {lsw[8:0], 23'h0};
+ addrlo = {9'h0, msw[22:0]};
+ end else begin
+ addrmi = {23'h0, lsw[8:0]} << regcap.exp;
+ mask1 = {32{1'b1}} << regcap.exp;
+ mask2 = mask1 << 9;
+ addrhi = ({9'h0, msw[22:0]} << 9) & mask2;
+ addrlo = {9'h0, msw[22:0]} & (~mask1);
+ end
+
+ addr33 = {lsw[32], addrhi | addrmi | addrlo};
+
+ return addr33;
+ endfunction
+
+ // reg to mem, original cap bound encoding, memfmt1
+ function automatic logic[65:0] reg2mem_fmt1 (reg_cap_t reg_cap, logic[31:0] addr);
+
+ logic [32:0] msw, lsw;
+ logic [31:0] mask1, mask2;
+
+ msw[32] = reg_cap.valid;
+ msw[31:26] = reg_cap.cperms[5:0];
+ msw[25:23] = reg_cap.otype;
+ lsw[32] = reg_cap.valid ;
+ lsw[31] = reg_cap.rsvd;
+ lsw[26:18] = reg_cap.base;
+ lsw[17:9] = reg_cap.top;
+
+ if (reg_cap.exp == RESETEXP) begin
+ msw[22:0] = addr[22:0];
+ lsw[30:27] = RESETCEXP;
+ lsw[8:0] = addr[31:23];
+ end else begin
+ mask1 = {32{1'b1}} << reg_cap.exp;
+ mask2 = mask1 << 9;
+
+ msw[22:0] = 23'((addr & ~mask1) | ((addr & mask2) >> 9));
+ lsw[30:27] = reg_cap.exp[CEXP_W-1:0];
+ lsw[8:0] = 9'(addr >> reg_cap.exp);
+ end
+
+ return {msw, lsw};
+
+ endfunction
+
+ // reg to mem, IT8 encoding, memfmt1
+ function automatic logic[65:0] reg2mem_it8_fmt1 (reg_cap_t regcap, logic[31:0] addr); // xyz
+
+ logic [32:0] msw, lsw;
+ logic [31:0] mask1, mask2;
+ logic denorm, ltop, cor;
+ logic [9:0] top10, base10, len10;
+
+ cor = (regcap.top_cor == {2{regcap.base_cor}});
+ top10 = {~cor, regcap.top};
+ base10 = {1'b0, regcap.base};
+ len10 = top10-base10;
+ ltop = len10[9] | len10[8];
+
+ denorm = (regcap.exp == 0) && ~ltop;
+
+ msw[32] = regcap.valid;
+ msw[31:26] = regcap.cperms[5:0];
+ msw[25:23] = regcap.otype;
+ lsw[32] = regcap.valid ;
+ lsw[31] = regcap.rsvd;
+ lsw[30:26] = regcap.exp ^ {5{~denorm}} ;
+ lsw[25:18] = regcap.top[7:0];
+ lsw[17:9] = regcap.base;
+
+ // (regcap.exp >= RESETEXP)
+ if (regcap.exp[4] & regcap.exp[3]) begin
+ msw[22:0] = addr[22:0];
+ lsw[8:0] = addr[31:23];
+ end else begin
+ mask1 = {32{1'b1}} << regcap.exp;
+ mask2 = mask1 << 9;
+ msw[22:0] = 23'((addr & ~mask1) | ((addr & mask2) >> 9));
+ lsw[8:0] = 9'(addr >> regcap.exp);
+ end
+
+ return {msw, lsw};
+
+ endfunction
+
+ // simply cast regcap to a 38-bit vector.
+ // we can do this with systemverilog casting but let's be explicit here
+ function automatic logic [REGCAP_W-1:0] reg2vec (reg_cap_t regcap);
+
+ logic [REGCAP_W-1:0] vec_out;
+
+ vec_out[REGCAP_W-1] = regcap.valid ;
+ vec_out[34+:2] = regcap.top_cor;
+ vec_out[33+:1] = regcap.base_cor;
+ vec_out[28+:EXP_W] = regcap.exp;
+ vec_out[19+:TOP_W] = regcap.top ;
+ vec_out[10+:BOT_W] = regcap.base ;
+ vec_out[7+:OTYPE_W] = regcap.otype ;
+ vec_out[6+:1] = regcap.rsvd;
+ vec_out[0+:CPERMS_W] = regcap.cperms;
+
+ return vec_out;
+ endfunction
+
+ function automatic reg_cap_t vec2reg (logic [REGCAP_W-1:0] vec_in);
+
+ reg_cap_t regcap;
+
+ regcap.valid = vec_in[REGCAP_W-1];
+ regcap.top_cor = vec_in[34+:2];
+ regcap.base_cor = vec_in[33+:1];
+ regcap.exp = vec_in[28+:EXP_W];
+ regcap.top = vec_in[19+:TOP_W];
+ regcap.base = vec_in[10+:BOT_W];
+ regcap.otype = vec_in[7+:OTYPE_W];
+ regcap.rsvd = vec_in[6+:1];
+ regcap.cperms = vec_in[0+:CPERMS_W];
+
+ return regcap;
+ endfunction
+
+ // test whether 2 caps are equal
+ function automatic logic is_equal (full_cap_t cap_a, full_cap_t cap_b,
+ logic [31:0] addra, logic[31:0] addrb);
+
+ is_equal = (cap_a.valid == cap_b.valid) &&
+ (cap_a.top == cap_b.top) && (cap_a.base == cap_b.base) &&
+ (cap_a.cperms == cap_b.cperms) && (cap_a.rsvd == cap_b.rsvd) &&
+ (cap_a.exp == cap_b.exp) && (cap_a.otype == cap_b.otype) &&
+ (addra == addrb);
+ return is_equal;
+
+ endfunction
+
+ // clear tag of a regcap if needed, otherwise simply pass through
+ function automatic reg_cap_t and_regcap_tag (reg_cap_t in_cap, logic tag_mask);
+ reg_cap_t out_cap;
+
+ out_cap = in_cap;
+ out_cap.valid = in_cap.valid & tag_mask;
+ return out_cap;
+
+ endfunction
+
+ // parameters and constants
+
+ parameter logic[6:0] CHERI_INSTR_OPCODE = 7'h5b;
+ parameter int OPDW = 36; // Must >= number of cheri operator/instructions we support
+
+ typedef enum logic [5:0] {
+ CGET_PERM = 6'h00,
+ CGET_TYPE = 6'h01,
+ CGET_BASE = 6'h02,
+ CGET_LEN = 6'h03,
+ CGET_TAG = 6'h04,
+ CGET_TOP = 6'h05,
+ CGET_HIGH = 6'h06,
+ CGET_ADDR = 6'h07,
+ CSEAL = 6'h08,
+ CUNSEAL = 6'h09,
+ CAND_PERM = 6'h0a,
+ CSET_ADDR = 6'h0b,
+ CINC_ADDR = 6'h0c,
+ CINC_ADDR_IMM = 6'h0d,
+ CSET_BOUNDS = 6'h0e,
+ CSET_BOUNDS_EX = 6'h0f,
+ CSET_BOUNDS_IMM = 6'h10,
+ CIS_SUBSET = 6'h11,
+ CIS_EQUAL = 6'h12,
+ CMOVE_CAP = 6'h13,
+ CSUB_CAP = 6'h14,
+ CCLEAR_TAG = 6'h15,
+ CLOAD_CAP = 6'h16,
+ CSET_HIGH = 6'h17,
+ CSTORE_CAP = 6'h18,
+ CCSR_RW = 6'h19,
+ CJALR = 6'h1a,
+ CJAL = 6'h1b,
+ CAUIPCC = 6'h1c,
+ CAUICGP = 6'h1d,
+ CRRL = 6'h1e,
+ CRAM = 6'h1f,
+ CSET_BOUNDS_RNDN = 6'h20
+ } cheri_op_e;
+
+ typedef enum logic [4:0] {
+ CHERI_CSR_NULL,
+ CHERI_CSR_RW
+ } cheri_csr_op_e;
+
+ parameter logic [4:0] CHERI_SCR_MEPCC = 5'd31;
+ parameter logic [4:0] CHERI_SCR_MSCRATCHC = 5'd30;
+ parameter logic [4:0] CHERI_SCR_MTDC = 5'd29;
+ parameter logic [4:0] CHERI_SCR_MTCC = 5'd28;
+ parameter logic [4:0] CHERI_SCR_ZTOPC = 5'd27;
+ parameter logic [4:0] CHERI_SCR_DSCRATCHC1 = 5'd26;
+ parameter logic [4:0] CHERI_SCR_DSCRATCHC0 = 5'd25;
+ parameter logic [4:0] CHERI_SCR_DEPCC = 5'd24;
+
+ // permission violations
+ parameter int unsigned W_PVIO = 8;
+
+ parameter logic [2:0] PVIO_TAG = 3'h0;
+ parameter logic [2:0] PVIO_SEAL = 3'h1;
+ parameter logic [2:0] PVIO_EX = 3'h2;
+ parameter logic [2:0] PVIO_LD = 3'h3;
+ parameter logic [2:0] PVIO_SD = 3'h4;
+ parameter logic [2:0] PVIO_SC = 3'h5;
+ parameter logic [2:0] PVIO_ASR = 3'h6;
+ parameter logic [2:0] PVIO_ALIGN = 3'h7;
+
+
+ function automatic logic [4:0] vio_cause_enc (logic bound_vio, logic[W_PVIO-1:0] perm_vio_vec);
+ logic [4:0] vio_cause;
+
+ if (perm_vio_vec[PVIO_TAG])
+ vio_cause = 5'h2;
+ else if (perm_vio_vec[PVIO_SEAL])
+ vio_cause = 5'h3;
+ else if (perm_vio_vec[PVIO_EX])
+ vio_cause = 5'h11;
+ else if (perm_vio_vec[PVIO_LD])
+ vio_cause = 5'h12;
+ else if (perm_vio_vec[PVIO_SD])
+ vio_cause = 5'h13;
+ else if (perm_vio_vec[PVIO_SC])
+ vio_cause = 5'h15;
+ else if (perm_vio_vec[PVIO_ASR])
+ vio_cause = 5'h18;
+ else if (bound_vio)
+ vio_cause = 5'h1;
+ else
+ vio_cause = 5'h0;
+
+ return vio_cause;
+ endfunction
+
+endpackage
diff --git a/hw/ip/cheriot-ibex/rtl/cheri_regfile.sv b/hw/ip/cheriot-ibex/rtl/cheri_regfile.sv
new file mode 100644
index 0000000..27c636a
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheri_regfile.sv
@@ -0,0 +1,384 @@
+
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+module cheri_regfile import cheri_pkg::*; #(
+ parameter int unsigned NREGS = 32,
+ parameter int unsigned NCAPS = 32,
+ parameter bit RegFileECC = 1'b0,
+ parameter int unsigned DataWidth = 32,
+ parameter bit CheriPPLBC = 1'b0,
+ parameter bit TRVKBypass = 1'b1
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+ input logic par_rst_ni,
+
+ //Read port R1
+ input logic [4:0] raddr_a_i,
+ output logic [DataWidth-1:0] rdata_a_o,
+ output reg_cap_t rcap_a_o,
+
+ //Read port R2
+ input logic [4:0] raddr_b_i,
+ output logic [DataWidth-1:0] rdata_b_o,
+ output reg_cap_t rcap_b_o,
+
+ // Write port W1
+ input logic [4:0] waddr_a_i,
+ input logic [DataWidth-1:0] wdata_a_i,
+ input reg_cap_t wcap_a_i,
+ input logic we_a_i, // we always write both cap & data in parallel
+
+ // Tag reservation and revocation port
+ output logic [31:0] reg_rdy_o,
+ input logic [4:0] trvk_addr_i,
+ input logic trvk_en_i,
+ input logic trvk_clrtag_i,
+ input logic [6:0] trvk_par_i, // make sure this is included in lockstep compare
+ input logic [4:0] trsv_addr_i,
+ input logic trsv_en_i,
+ input logic [6:0] trsv_par_i,
+
+ output logic alert_o
+);
+
+ localparam logic [6:0] DefParBits[0:31] = '{7'h27,7'h0d,7'h6b,7'h41,7'h62,7'h48,7'h2e,7'h04,
+ 7'h1f,7'h35,7'h53,7'h79,7'h5a,7'h70,7'h16,7'h3c,
+ 7'h6e,7'h44,7'h22,7'h08,7'h2b,7'h01,7'h67,7'h4d,
+ 7'h56,7'h7c,7'h1a,7'h30,7'h13,7'h39,7'h5f,7'h75};
+
+ localparam logic [6:0] TrvkParIncr = 7'h15;
+ localparam logic [6:0] NullParBits = 7'h2a; // 7-bit parity for 32'h0
+
+ logic [31:0] rf_reg [31:0];
+ logic [31:0] rf_reg_q [NREGS-1:1];
+
+ logic [6:0] rf_reg_par [31:0];
+ logic [6:0] rf_reg_par_q [NREGS-1:0];
+
+ reg_cap_t rf_cap [31:0];
+ reg_cap_t rf_cap_q [NCAPS-1:1];
+
+ reg_cap_t rcap_a, rcap_b;
+
+ logic [NREGS-1:1] we_a_dec;
+ logic [NREGS-1:1] trvk_dec, trsv_dec;
+ logic [31:0] reg_rdy_vec;
+
+ logic pplbc_alert;
+
+ always_comb begin : we_a_decoder
+ for (int unsigned i = 1; i < NREGS; i++) begin
+ we_a_dec[i] = (waddr_a_i == 5'(i)) ? we_a_i : 1'b0;
+ trvk_dec[i] = CheriPPLBC ? (trvk_addr_i == 5'(i)) : 1'b0;
+ trsv_dec[i] = CheriPPLBC ? (trsv_addr_i == 5'(i)) : 1'b0;
+ end
+ end
+
+ // No flops for R0 as it's hard-wired to 0
+ for (genvar i = 1; i < NREGS; i++) begin : g_rf_flops
+
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rf_reg_q[i] <= 32'h0;
+ end else if (we_a_dec[i]) begin
+ rf_reg_q[i] <= wdata_a_i[31:0];
+ end
+ end
+
+ if (RegFileECC) begin : g_reg_par
+ logic [6:0] wdata_par;
+ logic trvk_clr_we;
+
+ assign trvk_clr_we = CheriPPLBC & trvk_dec[i] & trvk_en_i & trvk_clrtag_i;
+ assign wdata_par = wdata_a_i[DataWidth-1:DataWidth-7];
+
+ // split reset of data and parity to detect spurious reset (fault protection)
+ always_ff @(posedge clk_i or negedge par_rst_ni) begin
+ if (!par_rst_ni) begin
+ rf_reg_par_q[i] <= DefParBits[i];
+ end else if (trvk_clr_we && we_a_dec[i]) begin
+ rf_reg_par_q[i] <= wdata_par ^ TrvkParIncr;
+ end else if (trvk_clr_we) begin
+ // update parity bits
+ rf_reg_par_q[i] <= rf_reg_par_q[i] ^ TrvkParIncr;
+ end else if (we_a_dec[i]) begin
+ rf_reg_par_q[i] <= wdata_par;
+ end
+ end
+ end else begin : g_no_reg_par
+ assign rf_reg_par_q[i] = 7'h0;
+ end // gen reg_par
+
+ end // g_rf_flops
+
+
+ assign rf_reg[0] = 32'h0;
+ assign rf_reg_par[0] = DefParBits[0];
+ for (genvar i=1; i<32 ; i++) begin
+ if (i < NREGS) begin
+ assign rf_reg[i] = rf_reg_q[i];
+ assign rf_reg_par[i] = rf_reg_par_q[i];
+ end else begin
+ assign rf_reg[i] = 0;
+ assign rf_reg_par[i] = DefParBits[i];
+ end
+ end
+
+ assign rdata_a_o = DataWidth'({rf_reg_par[raddr_a_i], rf_reg[raddr_a_i]});
+ assign rdata_b_o = DataWidth'({rf_reg_par[raddr_b_i], rf_reg[raddr_b_i]});
+
+ // capability meta data (MSW)
+ for (genvar i = 1; i < NCAPS; i++) begin : g_cap_flops
+ logic trvk_clr_we;
+
+ assign trvk_clr_we = CheriPPLBC & trvk_dec[i] & trvk_en_i & trvk_clrtag_i;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rf_cap_q[i] <= NULL_REG_CAP;
+ end else if (trvk_clr_we && we_a_dec[i]) begin
+ rf_cap_q[i] <= and_regcap_tag(wcap_a_i, 1'b0);
+ end else if (trvk_clr_we) begin
+ // prioritize revocation (later in pipeline)
+ rf_cap_q[i] <= and_regcap_tag(rf_cap_q[i], 1'b0);
+ end else if (we_a_dec[i]) begin
+ rf_cap_q[i] <= wcap_a_i;
+ end
+ end
+ end
+
+ assign rf_cap[0] = NULL_REG_CAP;
+ for (genvar i=1; i<32 ; i++) begin
+ if (i < NCAPS) begin
+ assign rf_cap[i] = rf_cap_q[i];
+ end else begin
+ assign rf_cap[i] = NULL_REG_CAP;
+ end
+ end
+
+ assign rcap_a = rf_cap[raddr_a_i];
+ assign rcap_b = rf_cap[raddr_b_i];
+
+ if (CheriPPLBC) begin : g_regrdy
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni)
+ reg_rdy_vec[0] <= 1'b1;
+ end
+
+ for (genvar i=1; i<NCAPS; i++) begin
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni)
+ reg_rdy_vec[i] <= 1'b1;
+ else if (trsv_dec[i] & trsv_en_i) // prioritize trsv t
+ reg_rdy_vec[i] <= 1'b0;
+ else if (trvk_dec[i] & trvk_en_i)
+ reg_rdy_vec[i] <= 1'b1;
+ end // always_ff
+ end
+
+ // unused bits
+ for (genvar i=NCAPS; i<32; i++) begin
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni)
+ reg_rdy_vec[i] <= 1'b1;
+ end
+ end
+
+ // build the shadow copy of reg_rdy_vec for fault protection
+ if (RegFileECC) begin : gen_shdw
+ logic [4:0] trvk_addr_q;
+ logic trvk_en_q;
+ logic trvk_clrtag_q;
+ logic [6:0] trvk_par_q;
+ logic [4:0] trsv_addr_q;
+ logic trsv_en_q;
+ logic [6:0] trsv_par_q;
+
+ logic [31:0] reg_rdy_vec_shdw, reg_rdy_vec_q;
+ logic [NREGS-1:1] trvk_dec_shdw, trsv_dec_shdw;
+ logic shdw_mismatch_err, cap_rvk_err;
+
+
+ always_comb begin
+ for (int unsigned i = 1; i < NREGS; i++) begin
+ trvk_dec_shdw[i] = (trvk_addr_q == 5'(i));
+ trsv_dec_shdw[i] = (trsv_addr_q == 5'(i));
+ end
+ end
+
+ always_ff @(posedge clk_i or negedge par_rst_ni) begin
+ if (!par_rst_ni) begin
+ trvk_addr_q <= 5'h0;
+ trvk_en_q <= 1'b0;
+ trvk_clrtag_q <= 1'b0;
+ trvk_par_q <= NullParBits;
+ trsv_addr_q <= 5'h0;
+ trsv_en_q <= 1'b0;
+ trsv_par_q <= NullParBits;
+ reg_rdy_vec_q <= {32{1'b1}};
+ end else begin
+ trvk_addr_q <= trvk_addr_i;
+ trvk_en_q <= trvk_en_i;
+ trvk_clrtag_q <= trvk_clrtag_i;
+ trvk_par_q <= trvk_par_i;
+ trsv_addr_q <= trsv_addr_i;
+ trsv_en_q <= trsv_en_i;
+ trsv_par_q <= trsv_par_i;
+ reg_rdy_vec_q <= reg_rdy_vec;
+ end
+ end
+
+ for (genvar i = 0; i < 32; i++) begin
+ if ((i == 0) || (i >= NCAPS)) begin
+ assign reg_rdy_vec_shdw[i] = 1'b1;
+ end else begin
+ always_ff @(posedge clk_i or negedge par_rst_ni) begin
+ if (!par_rst_ni)
+ reg_rdy_vec_shdw[i] <= 1'b1;
+ else if (trsv_dec_shdw[i] & trsv_en_q)
+ reg_rdy_vec_shdw[i] <= 1'b0;
+ else if (trvk_dec_shdw[i] & trvk_en_q)
+ reg_rdy_vec_shdw[i] <= 1'b1;
+ end // always_ff
+ end
+ end
+
+ // generate alert
+ assign shdw_mismatch_err = (reg_rdy_vec_shdw != reg_rdy_vec_q);
+
+ // readback revoked cap to make sure the valid bit is actually cleared
+ always_comb begin
+ cap_rvk_err = 0;
+ for (int unsigned i = 1; i < NCAPS; i++) begin
+ cap_rvk_err = cap_rvk_err | (trvk_en_q & trvk_clrtag_q & trvk_dec_shdw[i] & rf_cap_q[i].valid);
+ end
+ end
+
+
+ // check parity of trsv and trvk requests
+ logic [1:0] trsv_ecc_err, trvk_ecc_err;
+
+ prim_secded_inv_39_32_dec trsv_ecc_i (
+ .data_i ({trsv_par_q, 26'h0, trsv_en_q, trsv_addr_q}),
+ .data_o (),
+ .syndrome_o(),
+ .err_o (trsv_ecc_err)
+ );
+
+ prim_secded_inv_39_32_dec trsk_ecc_i (
+ .data_i ({trvk_par_q, 25'h0, trvk_en_q, trvk_clrtag_q, trvk_addr_q}),
+ .data_o (),
+ .syndrome_o(),
+ .err_o (trvk_ecc_err)
+ );
+
+ assign pplbc_alert = shdw_mismatch_err | cap_rvk_err | (|trsv_ecc_err) | (|trvk_ecc_err);
+
+ end else begin : gen_no_shdw // no ECC or shdw checking
+ assign pplbc_alert = 1'b0;
+ end
+
+ end else begin : g_no_regrdy
+ assign reg_rdy_vec = {32{1'b1}};
+ assign pplbc_alert = 1'b0;
+ end // not pplbc
+
+ //
+ // read back last-writen register for fault protection
+ //
+ logic reg_rdbk_err;
+
+ if (RegFileECC) begin : gen_fault_rdbk
+ logic [NREGS-1:1] we_a_dec_shdw;
+ logic [4:0] waddr_a_q;
+ logic [31:0] wdata_a_q;
+ logic [6:0] wpar_a_q;
+ logic [37:0] wcap_vec_q;
+ logic we_a_q;
+ logic [31:0] wdata_tmp;
+ logic [6:0] rpar_tmp;
+ logic [1:0] wreq_ecc_err;
+ logic rdbk_cmp_err;
+
+ // flop the write request and check parity
+ // need all fields to compute parity bits
+ always_ff @(posedge clk_i or negedge par_rst_ni) begin
+ if (!par_rst_ni) begin
+ waddr_a_q <= 5'h0;
+ wdata_a_q <= 32'h0;
+ wpar_a_q <= NullParBits;
+ wcap_vec_q <= 38'h0;
+ we_a_q <= 1'b0;
+ end else begin
+ waddr_a_q <= waddr_a_i;
+ wdata_a_q <= wdata_a_i[31:0];
+ wpar_a_q <= wdata_a_i[DataWidth-1:DataWidth-7];
+ wcap_vec_q <= reg2vec(wcap_a_i);
+ we_a_q <= we_a_i;
+ end
+ end
+
+ assign wdata_tmp = wdata_a_q ^ wcap_vec_q[31:0] ^ {20'h0, we_a_q, waddr_a_q, wcap_vec_q[37:32]};
+
+ prim_secded_inv_39_32_dec wdata_ecc_i (
+ .data_i ({wpar_a_q, wdata_tmp}),
+ .data_o (),
+ .syndrome_o(),
+ .err_o (wreq_ecc_err)
+ );
+
+ // decode and read back to verify (only parity bits)
+ always_comb begin
+ for (int unsigned i = 1; i < NREGS; i++) begin
+ we_a_dec_shdw[i] = (waddr_a_q == 5'(i)) ? we_a_q : 1'b0;
+ end
+ end
+
+ assign rpar_tmp = rf_reg_par[waddr_a_q];
+
+ assign rdbk_cmp_err = (rpar_tmp != wpar_a_q) && (waddr_a_q != 0) && we_a_q;
+
+ assign reg_rdbk_err = (|wreq_ecc_err) | rdbk_cmp_err;
+
+ end else begin : gen_no_fault_rdbk
+ assign reg_rdbk_err = 1'b0;
+ end
+
+ assign alert_o = pplbc_alert | reg_rdbk_err;
+
+ reg_cap_t rcap_a_rvkd, rcap_b_rvkd;
+
+ if (TRVKBypass) begin
+ // Bypass the registier update cycle and directly update the read ports
+ always_comb begin
+ reg_rdy_o = reg_rdy_vec | ({NREGS{trvk_en_i}} & {trvk_dec, 1'b0});
+
+ rcap_a_rvkd = rcap_a;
+ if (trvk_en_i && trvk_clrtag_i && (trvk_addr_i == raddr_a_i))
+ rcap_a_rvkd.valid = 1'b0;
+ rcap_a_o = rcap_a_rvkd;
+
+ rcap_b_rvkd = rcap_b;
+ if (trvk_en_i && trvk_clrtag_i && (trvk_addr_i == raddr_b_i))
+ rcap_b_rvkd.valid = 1'b0;
+ rcap_b_o = rcap_b_rvkd;
+
+ end
+ end else begin
+ assign reg_rdy_o = reg_rdy_vec;
+
+ assign rcap_a_rvkd = rcap_a;
+ assign rcap_a_o = rcap_a_rvkd;
+ assign rcap_b_rvkd = rcap_b;
+ assign rcap_b_o = rcap_b_rvkd;
+ end
+
+
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheri_stkz.sv b/hw/ip/cheriot-ibex/rtl/cheri_stkz.sv
new file mode 100644
index 0000000..ba6ce15
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheri_stkz.sv
@@ -0,0 +1,161 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+
+module cheri_stkz import cheri_pkg::*; (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ // CSR register interface
+ input logic ztop_wr_i,
+ input logic [31:0] ztop_wdata_i,
+ input full_cap_t ztop_wfcap_i,
+ output logic [31:0] ztop_rdata_o,
+ output reg_cap_t ztop_rcap_o,
+
+ input logic unmasked_intr_i,
+
+ output logic stkz_active_o,
+ output logic stkz_abort_o,
+ output logic [31:0] stkz_ptr_o,
+ output logic [31:0] stkz_base_o,
+ output logic stkz_err_o,
+
+ // LSU req/resp interface (to be multiplixed/qualified)
+ input logic lsu_stkz_resp_valid_i,
+ input logic lsu_stkz_resp_err_i,
+ input logic lsu_stkz_req_done_i,
+ output logic stkz_lsu_req_o,
+ output logic stkz_lsu_we_o,
+ output logic stkz_lsu_is_cap_o,
+ output logic [31:0] stkz_lsu_addr_o,
+ output logic [32:0] stkz_lsu_wdata_o
+);
+
+ typedef enum logic [1:0] {STKZ_IDLE, STKZ_ACTIVE, STKZ_ABORT} stkz_fsm_t;
+
+ stkz_fsm_t stkz_fsm_d, stkz_fsm_q;
+
+ logic [29:0] stkz_ptrw, stkz_ptrw_nxt;
+ logic [29:0] stkz_basew;
+ logic stkz_start, stkz_done, stkz_stop, stkz_active;
+ reg_cap_t ztop_rcap, ztop_rcap_nxt;
+ logic [32:0] ztop_wtop33;
+ logic [31:0] ztop_wbase32;
+ logic waddr_eq_base;
+ logic cmd_cap_good;
+ reg_cap_t cmd_wcap;
+ logic cmd_new_cap, cmd_new_null;
+ logic cmd_is_n2z;
+
+ assign stkz_lsu_wdata_o = 33'h0;
+ assign stkz_lsu_is_cap_o = 1'b0; // this means we are really writing 33'h0 to memory
+ assign stkz_lsu_we_o = 1'b1;
+ assign stkz_lsu_req_o = stkz_active;
+ assign stkz_lsu_addr_o = {stkz_ptrw_nxt, 2'h0};
+
+ assign stkz_active_o = stkz_active;
+ assign stkz_active = (stkz_fsm_q != STKZ_IDLE);
+ assign stkz_abort_o = (stkz_fsm_q == STKZ_ABORT);
+
+ assign stkz_ptr_o = {stkz_ptrw, 2'h0};
+ assign stkz_base_o = {stkz_basew, 2'h0};
+
+ assign ztop_rdata_o = {stkz_ptrw, 2'h0};
+ assign ztop_rcap_o = ztop_rcap;
+
+ assign ztop_wbase32 = ztop_wfcap_i.base32;
+ assign ztop_wtop33 = ztop_wfcap_i.top33;
+
+ assign cmd_cap_good = ztop_wfcap_i.valid && (ztop_wtop33[32:2] >= ztop_wdata_i[31:2]) &&
+ ztop_wfcap_i.perms[PERM_SD];
+ assign cmd_is_n2z = cmd_cap_good && (ztop_wdata_i[31:2] == ztop_wbase32[31:2]);
+
+ assign cmd_new_null = ztop_wr_i && (ztop_wfcap_i == NULL_FULL_CAP) && (ztop_wdata_i == 32'h0);
+ assign cmd_new_cap = ztop_wr_i && ~cmd_new_null;
+
+ assign stkz_start = cmd_new_cap && cmd_cap_good && (ztop_wdata_i[31:2] > ztop_wbase32[31:2]);
+ assign stkz_done = (stkz_ptrw_nxt <= stkz_basew);
+ assign stkz_stop = unmasked_intr_i | cmd_new_null;
+
+
+ always_comb begin
+ logic [2:0] tmp3;
+ logic [8:0] addrmi9;
+
+ if ((stkz_fsm_q == STKZ_IDLE) && stkz_start)
+ stkz_fsm_d = STKZ_ACTIVE;
+ else if ((stkz_fsm_q == STKZ_ACTIVE) & stkz_done & lsu_stkz_req_done_i) // "normal" completion
+ stkz_fsm_d = STKZ_IDLE;
+ else if ((stkz_fsm_q == STKZ_ACTIVE) & stkz_stop & lsu_stkz_req_done_i) // abort
+ stkz_fsm_d = STKZ_IDLE;
+ else if ((stkz_fsm_q == STKZ_ACTIVE) & stkz_stop) // pending abort, wait till lsu_req_done
+ stkz_fsm_d = STKZ_ABORT;
+ else if ((stkz_fsm_q == STKZ_ABORT) & lsu_stkz_req_done_i)
+ stkz_fsm_d = STKZ_IDLE; // self clear by any furtherload/store activity
+ else
+ stkz_fsm_d = stkz_fsm_q;
+
+ // clear tag if writing an ztop value with address == base
+ cmd_wcap = full2regcap(ztop_wfcap_i);
+ if (cmd_is_n2z) cmd_wcap.valid = 1'b0;
+
+ // we are doing this in lieu of a full set_address.
+ // note we only start an zeroization if addr > base32 so no need for representability check
+ ztop_rcap_nxt = ztop_rcap;
+ addrmi9 = {stkz_ptrw_nxt, 2'b00} >> ztop_rcap.exp;
+ tmp3 = update_temp_fields(ztop_rcap.top, ztop_rcap.base, addrmi9);
+ ztop_rcap_nxt.top_cor = tmp3[2:1];
+ ztop_rcap_nxt.base_cor = tmp3[0];
+ ztop_rcap_nxt.valid = ztop_rcap.valid & ~stkz_done;
+ end
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ stkz_fsm_q <= STKZ_IDLE;
+ stkz_ptrw <= 30'h0;
+ stkz_ptrw_nxt <= 30'h0;
+ stkz_basew <= 30'h0;
+ stkz_err_o <= 1'b0;
+ ztop_rcap <= NULL_REG_CAP;
+ end else begin
+
+ stkz_fsm_q <= stkz_fsm_d;
+
+ // zcap is an WARL SCR
+ // - if active
+ // - Readback return current progress
+ // - allow writing NULL to stop (readback NULL in this case)
+ //
+ // - if not active, i
+ // - only allow writing tagged cap (legalized), which starts zeroization, however
+ // - speical case: write a tagged cap with addr == base will NOT start zeroization but
+ // will clear tag on read
+ //
+ if (ztop_wr_i) begin
+ stkz_ptrw <= ztop_wdata_i[31:2];
+ ztop_rcap <= cmd_wcap;
+ end else if (stkz_active && lsu_stkz_req_done_i) begin
+ stkz_ptrw <= stkz_ptrw_nxt;
+ ztop_rcap <= ztop_rcap_nxt;
+ end
+
+ // this is the captured hardware zeroization context, only updated for valid zerioation runs
+ if (stkz_start) begin
+ stkz_ptrw_nxt <= ztop_wdata_i[31:2] - 1;
+ stkz_basew <= ztop_wbase32[31:2];
+ end else if (stkz_active && lsu_stkz_req_done_i && ~(stkz_done | stkz_stop)) begin
+ stkz_ptrw_nxt <= stkz_ptrw_nxt - 1;
+ end
+
+ if (~stkz_active && stkz_start)
+ stkz_err_o <= 1'b0;
+ else if (lsu_stkz_resp_valid_i && lsu_stkz_resp_err_i)
+ stkz_err_o <= 1'b1;
+
+ end
+ end
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheri_tbre.sv b/hw/ip/cheriot-ibex/rtl/cheri_tbre.sv
new file mode 100644
index 0000000..eb5df17
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheri_tbre.sv
@@ -0,0 +1,269 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+
+module cheri_tbre #(
+ parameter int unsigned FifoSize = 4, // must be power-of-2
+ parameter int unsigned AddrHi = 31
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ // MMIO register interface
+ input logic [65:0] tbre_ctrl_vec_i,
+ output logic tbre_stat_o,
+ output logic tbre_err_o,
+
+ // LSU req/resp interface (to be multiplixed/qualified)
+ input logic lsu_tbre_resp_valid_i,
+ input logic lsu_tbre_resp_err_i,
+ input logic lsu_tbre_resp_is_wr_i,
+ input logic [32:0] lsu_tbre_raw_lsw_i,
+ input logic lsu_tbre_req_done_i,
+ input logic lsu_tbre_addr_incr_i,
+ output logic tbre_lsu_req_o,
+ output logic tbre_lsu_is_cap_o,
+ output logic tbre_lsu_we_o,
+ output logic [31:0] tbre_lsu_addr_o,
+ output logic [32:0] tbre_lsu_wdata_o,
+
+ // LSU snoop interface
+ input logic snoop_lsu_req_done_i,
+ input logic snoop_lsu_req_i,
+ input logic snoop_lsu_is_cap_i,
+ input logic snoop_lsu_we_i,
+ input logic snoop_lsu_cheri_err_i,
+ input logic [31:0] snoop_lsu_addr_i,
+
+ // trvk interface
+ input logic trvk_en_i,
+ input logic trvk_clrtag_i
+);
+
+ localparam FifoPtrW = $clog2(FifoSize);
+ localparam CapFifoDW = 33+1;
+ localparam ReqFifoDW = AddrHi-1;
+
+
+ logic tbre_go;
+ logic tbre_add1wait;
+ logic load_stop_cond, load_gnt;
+ logic store_gnt;
+ logic store_req_valid;
+ logic [31:0] load_addr, store_addr;
+ logic wait_resp_q;
+
+ logic req_fifo_wr_en, cap_fifo_wr_en, shdw_fifo_wr_en, fifo_rd_en;
+
+ logic [AddrHi-3:0] cur_load_addr8, load_addr8_p1;
+ logic [FifoPtrW:0] req_fifo_ext_wr_ptr, cap_fifo_ext_wr_ptr, shdw_fifo_ext_wr_ptr;
+ logic [FifoPtrW:0] os_req_cnt;
+ logic [FifoPtrW:0] fifo_ext_rd_ptr;
+ logic [FifoPtrW-1:0] req_fifo_wr_ptr, cap_fifo_wr_ptr, shdw_fifo_wr_ptr;
+ logic [FifoPtrW-1:0] fifo_rd_ptr;
+ logic shdw_fifo_wr_data;
+ logic [CapFifoDW-1:0] cap_fifo_wr_data;
+ logic [ReqFifoDW-1:0] req_fifo_wr_data;
+ logic fifo_rd_shdw, fifo_rd_tag, fifo_rd_valid, fifo_rd_err;
+ logic [31:0] fifo_rd_data;
+ logic [AddrHi-3:0] fifo_rd_addr8;
+ logic fifo_not_empty;
+
+
+ typedef enum logic [1:0] {TBRE_IDLE, TBRE_LOAD, TBRE_WAIT} tbre_fsm_t;
+ tbre_fsm_t tbre_fsm_q, tbre_fsm_d;
+
+ typedef enum logic [1:0] {SCH_NONE, SCH_LOAD, SCH_STORE} tbre_sch_t;
+ tbre_sch_t tbre_sch_q, tbre_sch_d;
+
+ typedef struct packed {
+ logic go;
+ logic add1wait;
+ logic [31:0] start_addr;
+ logic [31:0] end_addr;
+ } tbre_ctrl_t;
+
+ tbre_ctrl_t tbre_ctrl;
+
+ // register interface
+ assign tbre_ctrl.go = tbre_ctrl_vec_i[64];
+ assign tbre_ctrl.add1wait = tbre_ctrl_vec_i[65];
+ assign tbre_ctrl.start_addr = tbre_ctrl_vec_i[31:0];
+ assign tbre_ctrl.end_addr = tbre_ctrl_vec_i[63:32];
+ assign tbre_stat_o = (tbre_fsm_q != TBRE_IDLE);
+
+ // note having resp_valid here improves performance but making timing a bit worse
+ // (data_rvalid --> tbre_lsu_req --> core/tbre mux select --> data_wdata_o
+ assign tbre_lsu_req_o = ((tbre_sch_q == SCH_LOAD) | ((tbre_sch_q == SCH_STORE) && store_req_valid)) & (~wait_resp_q | (lsu_tbre_resp_valid_i & ~tbre_ctrl.add1wait));
+ assign tbre_lsu_is_cap_o = (tbre_sch_q == SCH_LOAD);
+ assign tbre_lsu_we_o = (tbre_sch_q == SCH_STORE);
+ assign tbre_lsu_addr_o = (tbre_sch_q == SCH_LOAD) ? load_addr + {lsu_tbre_addr_incr_i, 2'b00} : store_addr;
+ assign tbre_lsu_wdata_o = {1'b0, fifo_rd_data};
+
+ assign load_addr8_p1 = cur_load_addr8 + 1;
+
+ assign load_stop_cond = (load_addr8_p1 > tbre_ctrl.end_addr[AddrHi:3]);
+ assign load_gnt = (tbre_sch_q == SCH_LOAD) & lsu_tbre_req_done_i;
+ assign store_gnt = (tbre_sch_q == SCH_STORE) & lsu_tbre_req_done_i;
+
+ // expand load/store address by concatnating the MSB from start_address (save some area)
+ assign load_addr = (AddrHi >= 31) ? {cur_load_addr8, 3'b000} :
+ {tbre_ctrl.start_addr[31:AddrHi+1], cur_load_addr8, 3'b000};
+ assign store_addr = (AddrHi >= 31) ? {fifo_rd_addr8, 3'b000} :
+ {tbre_ctrl.start_addr[31:AddrHi+1], fifo_rd_addr8, 3'b000};
+
+ always_comb begin
+ logic load_stall, req_fifo_full;
+
+ // state machine tracking the progress of memory walk
+ if ((tbre_fsm_q == TBRE_IDLE) && tbre_ctrl.go)
+ tbre_fsm_d = TBRE_LOAD;
+ else if ((tbre_fsm_q == TBRE_LOAD) && load_gnt & load_stop_cond)
+ tbre_fsm_d = TBRE_WAIT;
+ else if ((tbre_fsm_q == TBRE_WAIT) && (os_req_cnt == 0))
+ tbre_fsm_d = TBRE_IDLE;
+ else
+ tbre_fsm_d = tbre_fsm_q;
+
+ // arbitration between load/store requests, throttle if too many outstanding load requests
+ // TBRE assumes a non-buffered memory model (new req won't be gnt'd if the prev response
+ // still outstanding). If not, we have to change this to throttle on resp as well since
+ // the load_store_unit can't handle multiple outstanding requests.
+
+ load_stall = (os_req_cnt >= FifoSize-1);
+ req_fifo_full = (os_req_cnt >= FifoSize);
+
+ tbre_sch_d = tbre_sch_q; // default
+ case (tbre_sch_q)
+ SCH_NONE:
+ if ((tbre_fsm_q == TBRE_LOAD) && !req_fifo_full)
+ tbre_sch_d = SCH_LOAD;
+ else if (store_req_valid)
+ tbre_sch_d = SCH_STORE;
+ SCH_LOAD:
+ if (load_gnt & (load_stall || (tbre_fsm_d == TBRE_WAIT)) & store_req_valid)
+ tbre_sch_d = SCH_STORE;
+ else if (load_gnt & (load_stall || (tbre_fsm_d == TBRE_WAIT)))
+ tbre_sch_d = SCH_NONE;
+ SCH_STORE:
+ if ((store_gnt | ~store_req_valid) & (tbre_fsm_q == TBRE_LOAD))
+ tbre_sch_d = SCH_LOAD; // no need to check req_fifo_full, since we are dequeing from it
+ else if (store_gnt|~store_req_valid) // go back to NONE to allow reading fifo further
+ tbre_sch_d = SCH_NONE; // no bandwidth loss here since the load req will move ahead anyway
+ default:;
+ endcase
+ end
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ tbre_fsm_q <= TBRE_IDLE;
+ tbre_sch_q <= SCH_NONE;
+ cur_load_addr8 <= 'h0;
+ wait_resp_q <= 1'b0;
+ tbre_err_o <= 1'b0;
+ end else begin
+
+ tbre_fsm_q <= tbre_fsm_d;
+ tbre_sch_q <= tbre_sch_d;
+
+ if (tbre_ctrl.go & (tbre_fsm_q == TBRE_IDLE))
+ cur_load_addr8 <= tbre_ctrl.start_addr[AddrHi:3];
+ else if (load_gnt)
+ cur_load_addr8 <= load_addr8_p1;
+
+ if (load_gnt | store_gnt)
+ wait_resp_q <= 1'b1;
+ else if (lsu_tbre_resp_valid_i)
+ wait_resp_q <= 1'b0;
+
+ // for now just capture/latch errors and flag it to firmware
+ if ((tbre_fsm_q == TBRE_IDLE) && tbre_ctrl.go)
+ tbre_err_o <= 1'b0;
+ else if (lsu_tbre_resp_valid_i && lsu_tbre_resp_err_i)
+ tbre_err_o <= 1'b1;
+ end
+ end
+
+ // FIFOs to buffer caps read from the data memory and shadow bits from the shadow map
+
+ // count of outstand load requests in the pipeline
+ assign os_req_cnt = req_fifo_ext_wr_ptr - fifo_ext_rd_ptr;
+
+ assign req_fifo_wr_ptr = req_fifo_ext_wr_ptr[FifoPtrW-1:0];
+ assign cap_fifo_wr_ptr = cap_fifo_ext_wr_ptr[FifoPtrW-1:0];
+ assign shdw_fifo_wr_ptr = shdw_fifo_ext_wr_ptr[FifoPtrW-1:0];
+ assign fifo_rd_ptr = fifo_ext_rd_ptr[FifoPtrW-1:0];
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ fifo_ext_rd_ptr <= 'h0;
+ req_fifo_ext_wr_ptr <= 'h0;
+ cap_fifo_ext_wr_ptr <= 'h0;
+ shdw_fifo_ext_wr_ptr <= 'h0;
+ end else begin
+ // FIFO size is power-of-2
+ if (fifo_rd_en) fifo_ext_rd_ptr <= fifo_ext_rd_ptr + 1;
+
+ if (req_fifo_wr_en) req_fifo_ext_wr_ptr <= req_fifo_ext_wr_ptr + 1;
+ if (cap_fifo_wr_en) cap_fifo_ext_wr_ptr <= cap_fifo_ext_wr_ptr + 1;
+ if (shdw_fifo_wr_en) shdw_fifo_ext_wr_ptr <= shdw_fifo_ext_wr_ptr + 1;
+ end
+ end
+
+ logic [FifoSize-1:0][ReqFifoDW-1:0] req_fifo_mem; // packed entry: addr, valid, 32-bit data
+ logic [FifoSize-1:0][CapFifoDW-1:0] cap_fifo_mem; // packed entry: addr, valid, 32-bit data
+ logic [FifoSize-1:0] shdw_fifo_mem; // single shadow bit per entry
+
+ for (genvar i= 0; i < FifoSize; i++) begin : gen_fifo_mem
+ logic [28:0] req_fifo_item_addr8;
+ assign req_fifo_item_addr8 = (AddrHi >= 31) ? req_fifo_mem[i][AddrHi-3:0] :
+ {tbre_ctrl.start_addr[31:AddrHi+1], req_fifo_mem[i][AddrHi-3:0]};
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ req_fifo_mem[i] <= 0;
+ cap_fifo_mem[i] <= 0;
+ shdw_fifo_mem[i] <= 1'b0;
+ end else begin
+ // monitoring the ongoing writes to LSU to detect collisiona
+ // also what about a collision between write request and head of the FIFO?
+ if (req_fifo_wr_en && (i == req_fifo_wr_ptr))
+ req_fifo_mem[i] <= req_fifo_wr_data;
+ else if ((req_fifo_item_addr8 == snoop_lsu_addr_i[31:3]) && snoop_lsu_req_done_i && snoop_lsu_we_i)
+ req_fifo_mem[i] <= req_fifo_mem[i] & {1'b0, {(AddrHi-2){1'b1}}};
+
+ if (cap_fifo_wr_en && (i == cap_fifo_wr_ptr)) cap_fifo_mem[i] <= cap_fifo_wr_data;
+ if (shdw_fifo_wr_en && (i == shdw_fifo_wr_ptr)) shdw_fifo_mem[i] <= shdw_fifo_wr_data;
+ end
+ end // always
+ end // generate
+
+ // peek into the current FIFO head
+ assign fifo_rd_addr8 = req_fifo_mem[fifo_rd_ptr][AddrHi-3:0];
+ assign fifo_rd_valid = req_fifo_mem[fifo_rd_ptr][AddrHi-2];
+ assign fifo_rd_data = cap_fifo_mem[fifo_rd_ptr][31:0];
+ assign fifo_rd_tag = cap_fifo_mem[fifo_rd_ptr][32];
+ assign fifo_rd_err = cap_fifo_mem[fifo_rd_ptr][33];
+ assign fifo_rd_shdw = shdw_fifo_mem[fifo_rd_ptr];
+
+ // only issue invalidation store requests if
+ // valid cap returned && no write collision on the address && shadow_bit == 1
+ assign store_req_valid = fifo_not_empty & fifo_rd_tag & fifo_rd_shdw & fifo_rd_valid & ~fifo_rd_err;
+
+ assign fifo_not_empty = (req_fifo_ext_wr_ptr != fifo_ext_rd_ptr) &&
+ (cap_fifo_ext_wr_ptr != fifo_ext_rd_ptr) &&
+ (shdw_fifo_ext_wr_ptr != fifo_ext_rd_ptr);
+
+ assign fifo_rd_en = fifo_not_empty & (((tbre_sch_q == SCH_STORE) & store_gnt) | ~store_req_valid);
+
+ assign req_fifo_wr_en = (tbre_sch_q == SCH_LOAD) & load_gnt;
+ assign req_fifo_wr_data = {1'b1, cur_load_addr8};
+
+ assign cap_fifo_wr_en = lsu_tbre_resp_valid_i & ~lsu_tbre_resp_is_wr_i;
+ assign cap_fifo_wr_data = {lsu_tbre_resp_err_i, lsu_tbre_raw_lsw_i};
+
+ assign shdw_fifo_wr_en = trvk_en_i;
+ assign shdw_fifo_wr_data = trvk_clrtag_i;
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheri_tbre_wrapper.sv b/hw/ip/cheriot-ibex/rtl/cheri_tbre_wrapper.sv
new file mode 100644
index 0000000..de1693f
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheri_tbre_wrapper.sv
@@ -0,0 +1,248 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+
+module cheri_tbre_wrapper import cheri_pkg::*; #(
+ parameter bit CHERIoTEn = 1'b1,
+ parameter bit CheriTBRE = 1'b1,
+ parameter bit CheriStkZ = 1'b1,
+ parameter bit StkZIntrOK = 1'b0,
+ parameter int unsigned MMRegDinW = 128,
+ parameter int unsigned MMRegDoutW = 64
+
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ // MMIO register interface
+ input logic [MMRegDinW-1:0] mmreg_corein_i,
+ output logic [MMRegDoutW-1:0] mmreg_coreout_o,
+
+ // LSU req/resp interface (to be multiplixed/qualified)
+ input logic lsu_tbre_resp_valid_i,
+ input logic lsu_tbre_resp_err_i,
+ input logic lsu_tbre_resp_is_wr_i,
+ input logic [32:0] lsu_tbre_raw_lsw_i,
+ input logic lsu_tbre_req_done_i,
+ input logic lsu_tbre_addr_incr_i,
+ input logic lsu_tbre_sel_i,
+ output logic tbre_lsu_req_o,
+ output logic tbre_lsu_is_cap_o,
+ output logic tbre_lsu_we_o,
+ output logic [31:0] tbre_lsu_addr_o,
+ output logic [32:0] tbre_lsu_wdata_o,
+
+ // LSU snoop interface
+ input logic snoop_lsu_req_done_i,
+ input logic snoop_lsu_req_i,
+ input logic snoop_lsu_is_cap_i,
+ input logic snoop_lsu_we_i,
+ input logic snoop_lsu_cheri_err_i,
+ input logic [31:0] snoop_lsu_addr_i,
+
+ // trvk interface
+ input logic trvk_en_i,
+ input logic trvk_clrtag_i,
+
+ // Stack fast-clearing signals
+ input logic ztop_wr_i,
+ input logic [31:0] ztop_wdata_i,
+ input full_cap_t ztop_wfcap_i,
+ output logic [31:0] ztop_rdata_o,
+ output reg_cap_t ztop_rcap_o,
+
+ input logic unmasked_intr_i,
+
+ output logic stkz_active_o,
+ output logic stkz_abort_o,
+ output logic [31:0] stkz_ptr_o,
+ output logic [31:0] stkz_base_o
+);
+
+ localparam nMSTR = 2;
+
+ logic lsu_blk1_resp_valid;
+ logic lsu_blk1_req_done;
+ logic blk1_lsu_req;
+ logic blk1_lsu_is_cap;
+ logic blk1_lsu_we;
+ logic [31:0] blk1_lsu_addr;
+ logic [32:0] blk1_lsu_wdata;
+
+ logic lsu_blk0_resp_valid;
+ logic lsu_blk0_req_done;
+ logic blk0_lsu_req;
+ logic blk0_lsu_is_cap;
+ logic blk0_lsu_we;
+ logic [31:0] blk0_lsu_addr;
+ logic [32:0] blk0_lsu_wdata;
+
+
+ logic tbre_stat, tbre_err, stkz_err;
+
+ assign mmreg_coreout_o = {{(MMRegDoutW-10){1'b0}}, 2'b00, 2'b00, stkz_err, stkz_active_o,
+ 2'b00, tbre_err, tbre_stat};
+
+ if (CHERIoTEn & CheriTBRE) begin : g_tbre
+ logic [65:0] tbre_ctrl_vec;
+
+ assign tbre_ctrl_vec = mmreg_corein_i[65:0];
+
+ cheri_tbre #(
+ .FifoSize (4),
+ .AddrHi (23)
+ ) cheri_tbre_i (
+ // Clock and Reset
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .tbre_ctrl_vec_i (tbre_ctrl_vec),
+ .tbre_stat_o (tbre_stat),
+ .tbre_err_o (tbre_err),
+ .lsu_tbre_resp_valid_i (lsu_blk1_resp_valid),
+ .lsu_tbre_resp_err_i (lsu_tbre_resp_err_i),
+ .lsu_tbre_resp_is_wr_i (lsu_tbre_resp_is_wr_i),
+ .lsu_tbre_raw_lsw_i (lsu_tbre_raw_lsw_i),
+ .lsu_tbre_req_done_i (lsu_blk1_req_done),
+ .lsu_tbre_addr_incr_i (lsu_tbre_addr_incr_i),
+ .tbre_lsu_req_o (blk1_lsu_req),
+ .tbre_lsu_is_cap_o (blk1_lsu_is_cap),
+ .tbre_lsu_we_o (blk1_lsu_we),
+ .tbre_lsu_addr_o (blk1_lsu_addr),
+ .tbre_lsu_wdata_o (blk1_lsu_wdata),
+ .snoop_lsu_req_done_i (snoop_lsu_req_done_i),
+ .snoop_lsu_req_i (snoop_lsu_req_i),
+ .snoop_lsu_is_cap_i (snoop_lsu_is_cap_i),
+ .snoop_lsu_we_i (snoop_lsu_we_i),
+ .snoop_lsu_cheri_err_i (snoop_lsu_cheri_err_i),
+ .snoop_lsu_addr_i (snoop_lsu_addr_i),
+ .trvk_en_i (trvk_en_i),
+ .trvk_clrtag_i (trvk_clrtag_i)
+ );
+ end else begin
+ assign tbre_stat = 1'b0;
+ assign tbre_err = 1'b0;
+ assign blk1_lsu_req = 1'b0;
+ assign blk1_lsu_is_cap = 1'b0;
+ assign blk1_lsu_we = 1'b0;
+ assign blk1_lsu_addr = 32'h0;
+ assign blk1_lsu_wdata = 33'h0;
+ end
+
+ if (CHERIoTEn & CheriStkZ) begin : g_stkz
+ logic unmasked_intr;
+ assign unmasked_intr = StkZIntrOK & unmasked_intr_i;
+
+ cheri_stkz cheri_stkz_i (
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .ztop_wr_i (ztop_wr_i),
+ .ztop_wdata_i (ztop_wdata_i),
+ .ztop_wfcap_i (ztop_wfcap_i),
+ .ztop_rdata_o (ztop_rdata_o),
+ .ztop_rcap_o (ztop_rcap_o),
+ .unmasked_intr_i (unmasked_intr ),
+ .stkz_active_o (stkz_active_o ),
+ .stkz_abort_o (stkz_abort_o ),
+ .stkz_ptr_o (stkz_ptr_o ),
+ .stkz_base_o (stkz_base_o ),
+ .stkz_err_o (stkz_err ),
+ .lsu_stkz_resp_valid_i (lsu_blk0_resp_valid ),
+ .lsu_stkz_resp_err_i (lsu_tbre_resp_err_i ),
+ .lsu_stkz_req_done_i (lsu_blk0_req_done ),
+ .stkz_lsu_req_o (blk0_lsu_req ),
+ .stkz_lsu_we_o (blk0_lsu_we ),
+ .stkz_lsu_is_cap_o (blk0_lsu_is_cap ),
+ .stkz_lsu_addr_o (blk0_lsu_addr ),
+ .stkz_lsu_wdata_o (blk0_lsu_wdata )
+ );
+
+ end else begin
+ assign stkz_active_o = 1'b0;
+ assign stkz_abort_o = 1'b0;
+ assign stkz_ptr_o = 32'h3; // use this to flag stkz feature doesn't exist
+ assign stkz_base_o = 32'h0;
+ assign stkz_err = 1'b0;
+
+ assign ztop_rcap_o = NULL_REG_CAP;
+ assign ztop_rdata_o = 32'h0000_aa55;
+
+ assign blk0_lsu_req = 1'b0;
+ assign blk0_lsu_is_cap = 1'b0;
+ assign blk0_lsu_we = 1'b0;
+ assign blk0_lsu_addr = 32'h0;
+ assign blk0_lsu_wdata = 33'h0;
+ end
+
+ //
+ // Arbitration for LSU interface between tbre and stkz engines
+ // reuse the obimux logic
+ //
+ logic [nMSTR-1:0] mstr_arbit, mstr_arbit_q, mstr_arbit_comb;
+ logic [nMSTR-1:0] mstr_req;
+ logic req_pending, req_pending_q;
+ logic slv_req, slv_gnt;
+
+ assign slv_req = |mstr_req;
+
+ // arbitration by strict priority assignment - mst_req[0] == highest priority
+ for (genvar i = 0; i < nMSTR; i++) begin
+ logic [7:0] pri_mask;
+ assign pri_mask = 8'hff >> (8-i); // max 8 masters, should be enough
+ assign mstr_arbit[i] = mstr_req[i] & ~(|(mstr_req & pri_mask[nMSTR-1:0]));
+ end
+
+ // Handling delayed-gnt case.
+ // make the next arbiration decision immediately if any master_req active
+ // If slv_gnt doesn't happen in the same cycle, register the decision till
+ // slv_gant so that the address/wdata/ctrl can be hold steady when presenting
+ // the next request to the slave.
+ // Corner case:
+ // -- adding the lsu_tbre_sel term to req_pending (allow the arbitration to
+ // change when LSU is handling CPU requests.
+ // this is needed since TBRE could cancel write requests in the case of
+ // a pipeline hazard (cpu write to the same location TBRE is working on)
+
+ assign mstr_arbit_comb = req_pending_q ? mstr_arbit_q : mstr_arbit;
+ assign req_pending = |mstr_req & ~slv_gnt & ~req_pending_q & lsu_tbre_sel_i;
+
+ always @(posedge clk_i or negedge rst_ni) begin
+ if(~rst_ni) begin
+ req_pending_q <= 1'b0;
+ mstr_arbit_q <= 0;
+ end else begin
+ if (slv_gnt) req_pending_q <= 1'b0;
+ else if (req_pending) req_pending_q <= 1'b1;
+ if (req_pending) mstr_arbit_q <= mstr_arbit;
+ end
+ end
+
+ // muxing the outgoing control signals
+ assign slv_gnt = lsu_tbre_req_done_i;
+ assign mstr_req = {blk1_lsu_req, blk0_lsu_req};
+
+ assign tbre_lsu_req_o = slv_req;
+ assign tbre_lsu_is_cap_o = mstr_arbit_comb[1] ? blk1_lsu_is_cap : blk0_lsu_is_cap;
+ assign tbre_lsu_we_o = mstr_arbit_comb[1] ? blk1_lsu_we : blk0_lsu_we;
+ assign tbre_lsu_addr_o = mstr_arbit_comb[1] ? blk1_lsu_addr : blk0_lsu_addr;
+ assign tbre_lsu_wdata_o = mstr_arbit_comb[1] ? blk1_lsu_wdata : blk0_lsu_wdata;
+
+ assign lsu_blk1_req_done = mstr_arbit_comb[1] & lsu_tbre_req_done_i;
+ assign lsu_blk0_req_done = mstr_arbit_comb[0] & lsu_tbre_req_done_i;
+
+ //
+ logic resp_sel_q;
+ always @(posedge clk_i or negedge rst_ni) begin
+ if(~rst_ni) begin
+ resp_sel_q <= 1'b0;
+ end else if (lsu_tbre_req_done_i) begin
+ resp_sel_q <= (mstr_arbit_comb[1]);
+ end
+ end
+
+ assign lsu_blk0_resp_valid = ~resp_sel_q & lsu_tbre_resp_valid_i;
+ assign lsu_blk1_resp_valid = resp_sel_q & lsu_tbre_resp_valid_i;
+
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheri_trvk_stage.sv b/hw/ip/cheriot-ibex/rtl/cheri_trvk_stage.sv
new file mode 100644
index 0000000..b22ce70
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheri_trvk_stage.sv
@@ -0,0 +1,131 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+module cheri_trvk_stage #(
+ parameter int unsigned HeapBase = 32'h2001_0000,
+ parameter int unsigned TSMapSize = 1024
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic rf_trsv_en_i,
+ input logic [4:0] rf_trsv_addr_i,
+
+ // from LSU
+ input logic lsu_resp_valid_i,
+ input logic lsu_load_err_i,
+ input logic [31:0] rf_wdata_lsu_i,
+ input cheri_pkg::reg_cap_t rf_wcap_lsu_i,
+
+ input logic lsu_resp_is_wr_i,
+
+ input logic lsu_tbre_resp_valid_i,
+ input logic lsu_tbre_resp_err_i,
+
+ output logic [4:0] rf_trvk_addr_o,
+ output logic rf_trvk_en_o,
+ output logic rf_trvk_clrtag_o,
+
+ output logic tbre_trvk_en_o,
+ output logic tbre_trvk_clrtag_o,
+
+ output logic tsmap_cs_o,
+ output logic [15:0] tsmap_addr_o,
+ input logic [31:0] tsmap_rdata_i
+);
+
+ import cheri_pkg::*;
+
+ reg_cap_t in_cap_q;
+ logic [31:0] in_data_q;
+
+ logic cpu_op_active;
+ logic [2:0] cpu_op_valid_q, tbre_op_valid_q, cap_good_q;
+ logic cpu_op_valid, tbre_op_valid, cap_good;
+ logic [4:0] trsv_addr;
+ logic [4:0] trsv_addr_q[2:0];
+ logic trvk_status;
+
+ logic [31:0] base32;
+ logic [31:0] tsmap_ptr;
+ logic [4:0] bitpos_q; // bit index in a 32-bit word
+ logic range_ok;
+ logic [2:1] range_ok_q;
+
+
+ assign base32 = get_bound33(in_cap_q.base, {2{in_cap_q.base_cor}}, in_cap_q.exp, in_data_q);
+ assign tsmap_ptr = (base32 - HeapBase) >> 3;
+
+ assign tsmap_addr_o = tsmap_ptr[15:5];
+
+ // not a sealling cap and pointing to valid TSMAP range
+ assign range_ok = (tsmap_ptr[31:5] <= TSMapSize) &&
+ ~((in_cap_q.cperms[4:3]==2'b00) && (|in_cap_q.cperms[2:0]));
+ assign tsmap_cs_o = (cpu_op_valid_q[0] | tbre_op_valid_q[0]) & cap_good_q[0];
+
+ assign rf_trvk_en_o = cpu_op_valid_q[2];
+ assign rf_trvk_clrtag_o = trvk_status & cap_good_q[2] & range_ok_q[2];
+ assign rf_trvk_addr_o = trsv_addr_q[2];
+
+ assign tbre_trvk_en_o = tbre_op_valid_q[2];
+ assign tbre_trvk_clrtag_o = trvk_status & cap_good_q[2] & range_ok_q[2];
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ cpu_op_active <= 1'b0;
+ trsv_addr <= 5'h0;
+ end else begin
+ if (rf_trsv_en_i) cpu_op_active <= 1'b1;
+ else if (lsu_resp_valid_i) cpu_op_active <= 1'b0;
+
+ if (rf_trsv_en_i) trsv_addr <= rf_trsv_addr_i;
+ end
+ end
+
+
+ assign cpu_op_valid = cpu_op_active & lsu_resp_valid_i; // CPU op only active when Load cap
+ assign tbre_op_valid = lsu_tbre_resp_valid_i & ~lsu_resp_is_wr_i; // TBRE Load
+ assign cap_good = (cpu_op_active & lsu_resp_valid_i & ~lsu_load_err_i & rf_wcap_lsu_i.valid) |
+ (lsu_tbre_resp_valid_i & ~lsu_tbre_resp_err_i & rf_wcap_lsu_i.valid);
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ cpu_op_valid_q <= 0;
+ tbre_op_valid_q <= 0;
+ cap_good_q <= 0;
+ in_cap_q <= NULL_REG_CAP;
+ in_data_q <= 32'h0;
+ bitpos_q <= 0;
+ trvk_status <= 1'b0;
+ range_ok_q <= 0;
+ trsv_addr_q[0] <= 5'b0;
+ trsv_addr_q[1] <= 5'b0;
+ trsv_addr_q[2] <= 5'b0;
+ end else begin
+ // control signal per stage
+ cpu_op_valid_q <= {cpu_op_valid_q[1:0], cpu_op_valid};
+ tbre_op_valid_q <= {tbre_op_valid_q[1:0], tbre_op_valid};
+ cap_good_q <= {cap_good_q[1:0], cap_good};
+ trsv_addr_q[0] <= trsv_addr;
+ trsv_addr_q[1] <= trsv_addr_q[0];
+ trsv_addr_q[2] <= trsv_addr_q[1];
+
+ // stage 0 status: register loaded cap
+ if ((cpu_op_valid & ~lsu_load_err_i) | (tbre_op_valid & ~lsu_tbre_resp_err_i)) begin
+ in_cap_q <= rf_wcap_lsu_i;
+ in_data_q <= rf_wdata_lsu_i;
+ end
+
+ // stage 1 status:
+ bitpos_q <= tsmap_ptr[4:0];
+ range_ok_q[1] <= range_ok;
+
+ // stage 2: index map data
+ range_ok_q[2] <= range_ok_q[1];
+ trvk_status <= tsmap_rdata_i[bitpos_q];
+ end
+ end
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_alu.sv b/hw/ip/cheriot-ibex/rtl/cheriot_alu.sv
new file mode 100644
index 0000000..32d2fe7
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_alu.sv
@@ -0,0 +1,1400 @@
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Arithmetic logic unit
+ */
+module cheriot_alu #(
+ parameter cheriot_pkg::rv32b_e RV32B = cheriot_pkg::RV32BNone
+) (
+ input cheriot_pkg::alu_op_e operator_i,
+ input logic [31:0] operand_a_i,
+ input logic [31:0] operand_b_i,
+
+ input logic instr_first_cycle_i,
+
+ input logic [32:0] multdiv_operand_a_i,
+ input logic [32:0] multdiv_operand_b_i,
+
+ input logic multdiv_sel_i,
+
+ input logic [31:0] imd_val_q_i[2],
+ output logic [31:0] imd_val_d_o[2],
+ output logic [1:0] imd_val_we_o,
+
+ output logic [31:0] adder_result_o,
+ output logic [33:0] adder_result_ext_o,
+
+ output logic [31:0] result_o,
+ output logic comparison_result_o,
+ output logic is_equal_result_o
+);
+ import cheriot_pkg::*;
+
+ logic [31:0] operand_a_rev;
+ logic [32:0] operand_b_neg;
+
+ // bit reverse operand_a for left shifts and bit counting
+ for (genvar k = 0; k < 32; k++) begin : gen_rev_operand_a
+ assign operand_a_rev[k] = operand_a_i[31-k];
+ end
+
+ ///////////
+ // Adder //
+ ///////////
+
+ logic adder_op_a_shift1;
+ logic adder_op_a_shift2;
+ logic adder_op_a_shift3;
+ logic adder_op_b_negate;
+ logic [32:0] adder_in_a, adder_in_b;
+ logic [31:0] adder_result;
+
+ always_comb begin
+ adder_op_a_shift1 = 1'b0;
+ adder_op_a_shift2 = 1'b0;
+ adder_op_a_shift3 = 1'b0;
+ adder_op_b_negate = 1'b0;
+ unique case (operator_i)
+ // Adder OPs
+ ALU_SUB,
+
+ // Comparator OPs
+ ALU_EQ, ALU_NE,
+ ALU_GE, ALU_GEU,
+ ALU_LT, ALU_LTU,
+ ALU_SLT, ALU_SLTU,
+
+ // MinMax OPs (RV32B Ops)
+ ALU_MIN, ALU_MINU,
+ ALU_MAX, ALU_MAXU: adder_op_b_negate = 1'b1;
+
+ // Address Calculation OPs (RV32B Ops)
+ ALU_SH1ADD: if (RV32B != RV32BNone) adder_op_a_shift1 = 1'b1;
+ ALU_SH2ADD: if (RV32B != RV32BNone) adder_op_a_shift2 = 1'b1;
+ ALU_SH3ADD: if (RV32B != RV32BNone) adder_op_a_shift3 = 1'b1;
+
+ default:;
+ endcase
+ end
+
+ // prepare operand a
+ always_comb begin
+ unique case(1'b1)
+ multdiv_sel_i: adder_in_a = multdiv_operand_a_i;
+ adder_op_a_shift1: adder_in_a = {operand_a_i[30:0],2'b01};
+ adder_op_a_shift2: adder_in_a = {operand_a_i[29:0],3'b001};
+ adder_op_a_shift3: adder_in_a = {operand_a_i[28:0],4'b0001};
+ default: adder_in_a = {operand_a_i,1'b1};
+ endcase
+ end
+
+ // prepare operand b
+ assign operand_b_neg = {operand_b_i,1'b0} ^ {33{1'b1}};
+ always_comb begin
+ unique case (1'b1)
+ multdiv_sel_i: adder_in_b = multdiv_operand_b_i;
+ adder_op_b_negate: adder_in_b = operand_b_neg;
+ default: adder_in_b = {operand_b_i, 1'b0};
+ endcase
+ end
+
+ // actual adder
+ assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b);
+
+ assign adder_result = adder_result_ext_o[32:1];
+
+ assign adder_result_o = adder_result;
+
+ ////////////////
+ // Comparison //
+ ////////////////
+
+ logic is_equal;
+ logic is_greater_equal; // handles both signed and unsigned forms
+ logic cmp_signed;
+
+ always_comb begin
+ unique case (operator_i)
+ ALU_GE,
+ ALU_LT,
+ ALU_SLT,
+ // RV32B only
+ ALU_MIN,
+ ALU_MAX: cmp_signed = 1'b1;
+
+ default: cmp_signed = 1'b0;
+ endcase
+ end
+
+ assign is_equal = (adder_result == 32'b0);
+ assign is_equal_result_o = is_equal;
+
+ // Is greater equal
+ always_comb begin
+ if ((operand_a_i[31] ^ operand_b_i[31]) == 1'b0) begin
+ is_greater_equal = (adder_result[31] == 1'b0);
+ end else begin
+ is_greater_equal = operand_a_i[31] ^ (cmp_signed);
+ end
+ end
+
+ // GTE unsigned:
+ // (a[31] == 1 && b[31] == 1) => adder_result[31] == 0
+ // (a[31] == 0 && b[31] == 0) => adder_result[31] == 0
+ // (a[31] == 1 && b[31] == 0) => 1
+ // (a[31] == 0 && b[31] == 1) => 0
+
+ // GTE signed:
+ // (a[31] == 1 && b[31] == 1) => adder_result[31] == 0
+ // (a[31] == 0 && b[31] == 0) => adder_result[31] == 0
+ // (a[31] == 1 && b[31] == 0) => 0
+ // (a[31] == 0 && b[31] == 1) => 1
+
+ // generate comparison result
+ logic cmp_result;
+
+ always_comb begin
+ unique case (operator_i)
+ ALU_EQ: cmp_result = is_equal;
+ ALU_NE: cmp_result = ~is_equal;
+ ALU_GE, ALU_GEU,
+ ALU_MAX, ALU_MAXU: cmp_result = is_greater_equal; // RV32B only
+ ALU_LT, ALU_LTU,
+ ALU_MIN, ALU_MINU, //RV32B only
+ ALU_SLT, ALU_SLTU: cmp_result = ~is_greater_equal;
+
+ default: cmp_result = is_equal;
+ endcase
+ end
+
+ assign comparison_result_o = cmp_result;
+
+ ///////////
+ // Shift //
+ ///////////
+
+ // The shifter structure consists of a 33-bit shifter: 32-bit operand + 1 bit extension for
+ // arithmetic shifts and one-shift support.
+ // Rotations and funnel shifts are implemented as multi-cycle instructions.
+ // The shifter is also used for single-bit instructions and bit-field place as detailed below.
+ //
+ // Standard Shifts
+ // ===============
+ // For standard shift instructions, the direction of the shift is to the right by default. For
+ // left shifts, the signal shift_left signal is set. If so, the operand is initially reversed,
+ // shifted to the right by the specified amount and shifted back again. For arithmetic- and
+ // one-shifts the 33rd bit of the shifter operand can is set accordingly.
+ //
+ // Multicycle Shifts
+ // =================
+ //
+ // Rotation
+ // --------
+ // For rotations, the operand signals operand_a_i and operand_b_i are kept constant to rs1 and
+ // rs2 respectively.
+ //
+ // Rotation pseudocode:
+ // shift_amt = rs2 & 31;
+ // multicycle_result = (rs1 >> shift_amt) | (rs1 << (32 - shift_amt));
+ // ^-- cycle 0 -----^ ^-- cycle 1 --------------^
+ //
+ // Funnel Shifts
+ // -------------
+ // For funnel shifs, operand_a_i is tied to rs1 in the first cycle and rs3 in the
+ // second cycle. operand_b_i is always tied to rs2. The order of applying the shift amount or
+ // its complement is determined by bit [5] of shift_amt.
+ //
+ // Funnel shift Pseudocode: (fsl)
+ // shift_amt = rs2 & 63;
+ // shift_amt_compl = 32 - shift_amt[4:0]
+ // if (shift_amt >=33):
+ // multicycle_result = (rs1 >> shift_amt_compl[4:0]) | (rs3 << shift_amt[4:0]);
+ // ^-- cycle 0 ----------------^ ^-- cycle 1 ------------^
+ // else if (shift_amt <= 31 && shift_amt > 0):
+ // multicycle_result = (rs1 << shift_amt[4:0]) | (rs3 >> shift_amt_compl[4:0]);
+ // ^-- cycle 0 ----------^ ^-- cycle 1 -------------------^
+ // For shift_amt == 0, 32, both shift_amt[4:0] and shift_amt_compl[4:0] == '0.
+ // these cases need to be handled separately outside the shifting structure:
+ // else if (shift_amt == 32):
+ // multicycle_result = rs3
+ // else if (shift_amt == 0):
+ // multicycle_result = rs1.
+ //
+ // Single-Bit Instructions
+ // =======================
+ // Single bit instructions operate on bit operand_b_i[4:0] of operand_a_i.
+
+ // The operations bset, bclr and binv are implemented by generation of a bit-mask using the
+ // shifter structure. This is done by left-shifting the operand 32'h1 by the required amount.
+ // The signal shift_sbmode multiplexes the shifter input and sets the signal shift_left.
+ // Further processing is taken care of by a separate structure.
+ //
+ // For bext, the bit defined by operand_b_i[4:0] is to be returned. This is done by simply
+ // shifting operand_a_i to the right by the required amount and returning bit [0] of the result.
+ //
+ // Bit-Field Place
+ // ===============
+ // The shifter structure is shared to compute bfp_mask << bfp_off.
+
+ logic shift_left;
+ logic shift_ones;
+ logic shift_arith;
+ logic shift_funnel;
+ logic shift_sbmode;
+ logic [5:0] shift_amt;
+ logic [5:0] shift_amt_compl; // complementary shift amount (32 - shift_amt)
+
+ logic [31:0] shift_operand;
+ logic signed [32:0] shift_result_ext_signed;
+ logic [32:0] shift_result_ext;
+ logic unused_shift_result_ext;
+ logic [31:0] shift_result;
+ logic [31:0] shift_result_rev;
+
+ // zbf
+ logic bfp_op;
+ logic [4:0] bfp_len;
+ logic [4:0] bfp_off;
+ logic [31:0] bfp_mask;
+ logic [31:0] bfp_mask_rev;
+ logic [31:0] bfp_result;
+
+ // bfp: shares the shifter structure to compute bfp_mask << bfp_off
+ assign bfp_op = (RV32B != RV32BNone) ? (operator_i == ALU_BFP) : 1'b0;
+ assign bfp_len = {~(|operand_b_i[27:24]), operand_b_i[27:24]}; // len = 0 encodes for len = 16
+ assign bfp_off = operand_b_i[20:16];
+ assign bfp_mask = (RV32B != RV32BNone) ? ~(32'hffff_ffff << bfp_len) : '0;
+ for (genvar i = 0; i < 32; i++) begin : gen_rev_bfp_mask
+ assign bfp_mask_rev[i] = bfp_mask[31-i];
+ end
+
+ assign bfp_result =(RV32B != RV32BNone) ?
+ (~shift_result & operand_a_i) | ((operand_b_i & bfp_mask) << bfp_off) : '0;
+
+ // bit shift_amt[5]: word swap bit: only considered for FSL/FSR.
+ // if set, reverse operations in first and second cycle.
+ assign shift_amt[5] = operand_b_i[5] & shift_funnel;
+ assign shift_amt_compl = 32 - operand_b_i[4:0];
+
+ always_comb begin
+ if (bfp_op) begin
+ shift_amt[4:0] = bfp_off; // length field of bfp control word
+ end else begin
+ shift_amt[4:0] = instr_first_cycle_i ?
+ (operand_b_i[5] && shift_funnel ? shift_amt_compl[4:0] : operand_b_i[4:0]) :
+ (operand_b_i[5] && shift_funnel ? operand_b_i[4:0] : shift_amt_compl[4:0]);
+ end
+ end
+
+ // single-bit mode: shift
+ assign shift_sbmode = (RV32B != RV32BNone) ?
+ (operator_i == ALU_BSET) | (operator_i == ALU_BCLR) | (operator_i == ALU_BINV) : 1'b0;
+
+ // left shift if this is:
+ // * a standard left shift (slo, sll)
+ // * a rol in the first cycle
+ // * a ror in the second cycle
+ // * fsl: without word-swap bit: first cycle, else: second cycle
+ // * fsr: without word-swap bit: second cycle, else: first cycle
+ // * a single-bit instruction: bclr, bset, binv (excluding bext)
+ // * bfp: bfp_mask << bfp_off
+ always_comb begin
+ unique case (operator_i)
+ ALU_SLL: shift_left = 1'b1;
+ ALU_SLO: shift_left = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b1 : 1'b0;
+ ALU_BFP: shift_left = (RV32B != RV32BNone) ? 1'b1 : 1'b0;
+ ALU_ROL: shift_left = (RV32B != RV32BNone) ? instr_first_cycle_i : 0;
+ ALU_ROR: shift_left = (RV32B != RV32BNone) ? ~instr_first_cycle_i : 0;
+ ALU_FSL: shift_left = (RV32B != RV32BNone) ?
+ (shift_amt[5] ? ~instr_first_cycle_i : instr_first_cycle_i) : 1'b0;
+ ALU_FSR: shift_left = (RV32B != RV32BNone) ?
+ (shift_amt[5] ? instr_first_cycle_i : ~instr_first_cycle_i) : 1'b0;
+ default: shift_left = 1'b0;
+ endcase
+ if (shift_sbmode) begin
+ shift_left = 1'b1;
+ end
+ end
+
+ assign shift_arith = (operator_i == ALU_SRA);
+ assign shift_ones = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ?
+ (operator_i == ALU_SLO) | (operator_i == ALU_SRO) : 1'b0;
+ assign shift_funnel = (RV32B != RV32BNone) ?
+ (operator_i == ALU_FSL) | (operator_i == ALU_FSR) : 1'b0;
+
+ // shifter structure.
+ always_comb begin
+ // select shifter input
+ // for bfp, sbmode and shift_left the corresponding bit-reversed input is chosen.
+ if (RV32B == RV32BNone) begin
+ shift_operand = shift_left ? operand_a_rev : operand_a_i;
+ end else begin
+ unique case (1'b1)
+ bfp_op: shift_operand = bfp_mask_rev;
+ shift_sbmode: shift_operand = 32'h8000_0000;
+ default: shift_operand = shift_left ? operand_a_rev : operand_a_i;
+ endcase
+ end
+
+ shift_result_ext_signed =
+ $signed({shift_ones | (shift_arith & shift_operand[31]), shift_operand}) >>> shift_amt[4:0];
+ shift_result_ext = $unsigned(shift_result_ext_signed);
+
+ shift_result = shift_result_ext[31:0];
+ unused_shift_result_ext = shift_result_ext[32];
+
+ for (int unsigned i = 0; i < 32; i++) begin
+ shift_result_rev[i] = shift_result[31-i];
+ end
+
+ shift_result = shift_left ? shift_result_rev : shift_result;
+
+ end
+
+ ///////////////////
+ // Bitwise Logic //
+ ///////////////////
+
+ logic bwlogic_or;
+ logic bwlogic_and;
+ logic [31:0] bwlogic_operand_b;
+ logic [31:0] bwlogic_or_result;
+ logic [31:0] bwlogic_and_result;
+ logic [31:0] bwlogic_xor_result;
+ logic [31:0] bwlogic_result;
+
+ logic bwlogic_op_b_negate;
+
+ always_comb begin
+ unique case (operator_i)
+ // Logic-with-negate OPs (RV32B Ops)
+ ALU_XNOR,
+ ALU_ORN,
+ ALU_ANDN: bwlogic_op_b_negate = (RV32B != RV32BNone) ? 1'b1 : 1'b0;
+ ALU_CMIX: bwlogic_op_b_negate = (RV32B != RV32BNone) ? ~instr_first_cycle_i : 1'b0;
+ default: bwlogic_op_b_negate = 1'b0;
+ endcase
+ end
+
+ assign bwlogic_operand_b = bwlogic_op_b_negate ? operand_b_neg[32:1] : operand_b_i;
+
+ assign bwlogic_or_result = operand_a_i | bwlogic_operand_b;
+ assign bwlogic_and_result = operand_a_i & bwlogic_operand_b;
+ assign bwlogic_xor_result = operand_a_i ^ bwlogic_operand_b;
+
+ assign bwlogic_or = (operator_i == ALU_OR) | (operator_i == ALU_ORN);
+ assign bwlogic_and = (operator_i == ALU_AND) | (operator_i == ALU_ANDN);
+
+ always_comb begin
+ unique case (1'b1)
+ bwlogic_or: bwlogic_result = bwlogic_or_result;
+ bwlogic_and: bwlogic_result = bwlogic_and_result;
+ default: bwlogic_result = bwlogic_xor_result;
+ endcase
+ end
+
+ logic [5:0] bitcnt_result;
+ logic [31:0] minmax_result;
+ logic [31:0] pack_result;
+ logic [31:0] sext_result;
+ logic [31:0] singlebit_result;
+ logic [31:0] rev_result;
+ logic [31:0] shuffle_result;
+ logic [31:0] xperm_result;
+ logic [31:0] butterfly_result;
+ logic [31:0] invbutterfly_result;
+ logic [31:0] clmul_result;
+ logic [31:0] multicycle_result;
+
+ if (RV32B != RV32BNone) begin : g_alu_rvb
+
+ /////////////////
+ // Bitcounting //
+ /////////////////
+
+ // The bit-counter structure computes the number of set bits in its operand. Partial results
+ // (from left to right) are needed to compute the control masks for computation of
+ // bcompress/bdecompress by the butterfly network, if implemented.
+ // For cpop, clz and ctz, only the end result is used.
+
+ logic zbe_op;
+ logic bitcnt_ctz;
+ logic bitcnt_clz;
+ logic bitcnt_cz;
+ logic [31:0] bitcnt_bits;
+ logic [31:0] bitcnt_mask_op;
+ logic [31:0] bitcnt_bit_mask;
+ logic [ 5:0] bitcnt_partial [32];
+ logic [31:0] bitcnt_partial_lsb_d;
+ logic [31:0] bitcnt_partial_msb_d;
+
+
+ assign bitcnt_ctz = operator_i == ALU_CTZ;
+ assign bitcnt_clz = operator_i == ALU_CLZ;
+ assign bitcnt_cz = bitcnt_ctz | bitcnt_clz;
+ assign bitcnt_result = bitcnt_partial[31];
+
+ // Bit-mask generation for clz and ctz:
+ // The bit mask is generated by spreading the lowest-order set bit in the operand to all
+ // higher order bits. The resulting mask is inverted to cover the lowest order zeros. In order
+ // to create the bit mask for leading zeros, the input operand needs to be reversed.
+ assign bitcnt_mask_op = bitcnt_clz ? operand_a_rev : operand_a_i;
+
+ always_comb begin
+ bitcnt_bit_mask = bitcnt_mask_op;
+ bitcnt_bit_mask |= bitcnt_bit_mask << 1;
+ bitcnt_bit_mask |= bitcnt_bit_mask << 2;
+ bitcnt_bit_mask |= bitcnt_bit_mask << 4;
+ bitcnt_bit_mask |= bitcnt_bit_mask << 8;
+ bitcnt_bit_mask |= bitcnt_bit_mask << 16;
+ bitcnt_bit_mask = ~bitcnt_bit_mask;
+ end
+
+ assign zbe_op = (operator_i == ALU_BCOMPRESS) | (operator_i == ALU_BDECOMPRESS);
+
+ always_comb begin
+ case (1'b1)
+ zbe_op: bitcnt_bits = operand_b_i;
+ bitcnt_cz: bitcnt_bits = bitcnt_bit_mask & ~bitcnt_mask_op; // clz / ctz
+ default: bitcnt_bits = operand_a_i; // cpop
+ endcase
+ end
+
+ // The parallel prefix counter is of the structure of a Brent-Kung Adder. In the first
+ // log2(width) stages, the sum of the n preceding bit lines is computed for the bit lines at
+ // positions 2**n-1 (power-of-two positions) where n denotes the current stage.
+ // In stage n=log2(width), the count for position width-1 (the MSB) is finished.
+ // For the intermediate values, an inverse adder tree then computes the bit counts for the bit
+ // lines at positions
+ // m = 2**(n-1) + i*2**(n-2), where i = [1 ... width / 2**(n-1)-1] and n = [log2(width) ... 2].
+ // Thus, at every subsequent stage the result of two previously unconnected sub-trees is
+ // summed, starting at the node summing bits [width/2-1 : 0] and [3*width/4-1: width/2]
+ // and moving to iteratively sum up all the sub-trees.
+ // The inverse adder tree thus features log2(width) - 1 stages the first of these stages is a
+ // single addition at position 3*width/4 - 1. It does not interfere with the last
+ // stage of the primary adder tree. These stages can thus be folded together, resulting in a
+ // total of 2*log2(width)-2 stages.
+ // For more details refer to R. Brent, H. T. Kung, "A Regular Layout for Parallel Adders",
+ // (1982).
+ // For a bitline at position p, only bits
+ // bitcnt_partial[max(i, such that p % log2(i) == 0)-1 : 0] are needed for generation of the
+ // butterfly network control signals. The adders in the intermediate value adder tree thus need
+ // not be full 5-bit adders. We leave the optimization to the synthesis tools.
+ //
+ // Consider the following 8-bit example for illustraton.
+ //
+ // let bitcnt_bits = 8'babcdefgh.
+ //
+ // a b c d e f g h
+ // | /: | /: | /: | /:
+ // |/ : |/ : |/ : |/ :
+ // stage 1: + : + : + : + :
+ // | : /: : | : /: :
+ // |,--+ : : |,--+ : :
+ // stage 2: + : : : + : : :
+ // | : | : /: : : :
+ // |,-----,--+ : : : : ^-primary adder tree
+ // stage 3: + : + : : : : : -------------------------
+ // : | /| /| /| /| /| : ,-intermediate adder tree
+ // : |/ |/ |/ |/ |/ : :
+ // stage 4 : + + + + + : :
+ // : : : : : : : :
+ // bitcnt_partial[i] 7 6 5 4 3 2 1 0
+
+ always_comb begin
+ bitcnt_partial = '{default: '0};
+ // stage 1
+ for (int unsigned i = 1; i < 32; i += 2) begin
+ bitcnt_partial[i] = {5'h0, bitcnt_bits[i]} + {5'h0, bitcnt_bits[i-1]};
+ end
+ // stage 2
+ for (int unsigned i = 3; i < 32; i += 4) begin
+ bitcnt_partial[i] = bitcnt_partial[i-2] + bitcnt_partial[i];
+ end
+ // stage 3
+ for (int unsigned i = 7; i < 32; i += 8) begin
+ bitcnt_partial[i] = bitcnt_partial[i-4] + bitcnt_partial[i];
+ end
+ // stage 4
+ for (int unsigned i = 15; i < 32; i += 16) begin
+ bitcnt_partial[i] = bitcnt_partial[i-8] + bitcnt_partial[i];
+ end
+ // stage 5
+ bitcnt_partial[31] = bitcnt_partial[15] + bitcnt_partial[31];
+ // ^- primary adder tree
+ // -------------------------------
+ // ,-intermediate value adder tree
+ bitcnt_partial[23] = bitcnt_partial[15] + bitcnt_partial[23];
+
+ // stage 6
+ for (int unsigned i = 11; i < 32; i += 8) begin
+ bitcnt_partial[i] = bitcnt_partial[i-4] + bitcnt_partial[i];
+ end
+
+ // stage 7
+ for (int unsigned i = 5; i < 32; i += 4) begin
+ bitcnt_partial[i] = bitcnt_partial[i-2] + bitcnt_partial[i];
+ end
+ // stage 8
+ bitcnt_partial[0] = {5'h0, bitcnt_bits[0]};
+ for (int unsigned i = 2; i < 32; i += 2) begin
+ bitcnt_partial[i] = bitcnt_partial[i-1] + {5'h0, bitcnt_bits[i]};
+ end
+ end
+
+ ///////////////
+ // Min / Max //
+ ///////////////
+
+ assign minmax_result = cmp_result ? operand_a_i : operand_b_i;
+
+ //////////
+ // Pack //
+ //////////
+
+ logic packu;
+ logic packh;
+ assign packu = operator_i == ALU_PACKU;
+ assign packh = operator_i == ALU_PACKH;
+
+ always_comb begin
+ unique case (1'b1)
+ packu: pack_result = {operand_b_i[31:16], operand_a_i[31:16]};
+ packh: pack_result = {16'h0, operand_b_i[7:0], operand_a_i[7:0]};
+ default: pack_result = {operand_b_i[15:0], operand_a_i[15:0]};
+ endcase
+ end
+
+ //////////
+ // Sext //
+ //////////
+
+ assign sext_result = (operator_i == ALU_SEXTB) ?
+ { {24{operand_a_i[7]}}, operand_a_i[7:0]} : { {16{operand_a_i[15]}}, operand_a_i[15:0]};
+
+ /////////////////////////////
+ // Single-bit Instructions //
+ /////////////////////////////
+
+ always_comb begin
+ unique case (operator_i)
+ ALU_BSET: singlebit_result = operand_a_i | shift_result;
+ ALU_BCLR: singlebit_result = operand_a_i & ~shift_result;
+ ALU_BINV: singlebit_result = operand_a_i ^ shift_result;
+ default: singlebit_result = {31'h0, shift_result[0]}; // ALU_BEXT
+ endcase
+ end
+
+ ////////////////////////////////////
+ // General Reverse and Or-combine //
+ ////////////////////////////////////
+
+ // Only a subset of the general reverse and or-combine instructions are implemented in the
+ // balanced version of the B extension. Currently rev8 (shift_amt = 5'b11000) and orc.b
+ // (shift_amt = 5'b00111) are supported in the base extension.
+
+ logic [4:0] zbp_shift_amt;
+ logic gorc_op;
+
+ assign gorc_op = (operator_i == ALU_GORC);
+ assign zbp_shift_amt[2:0] =
+ (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? shift_amt[2:0] : {3{shift_amt[0]}};
+ assign zbp_shift_amt[4:3] =
+ (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? shift_amt[4:3] : {2{shift_amt[3]}};
+
+ always_comb begin
+ rev_result = operand_a_i;
+
+ if (zbp_shift_amt[0]) begin
+ rev_result = (gorc_op ? rev_result : 32'h0) |
+ ((rev_result & 32'h5555_5555) << 1) |
+ ((rev_result & 32'haaaa_aaaa) >> 1);
+ end
+
+ if (zbp_shift_amt[1]) begin
+ rev_result = (gorc_op ? rev_result : 32'h0) |
+ ((rev_result & 32'h3333_3333) << 2) |
+ ((rev_result & 32'hcccc_cccc) >> 2);
+ end
+
+ if (zbp_shift_amt[2]) begin
+ rev_result = (gorc_op ? rev_result : 32'h0) |
+ ((rev_result & 32'h0f0f_0f0f) << 4) |
+ ((rev_result & 32'hf0f0_f0f0) >> 4);
+ end
+
+ if (zbp_shift_amt[3]) begin
+ rev_result = ((RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) &&
+ gorc_op ? rev_result : 32'h0) |
+ ((rev_result & 32'h00ff_00ff) << 8) |
+ ((rev_result & 32'hff00_ff00) >> 8);
+ end
+
+ if (zbp_shift_amt[4]) begin
+ rev_result = ((RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) &&
+ gorc_op ? rev_result : 32'h0) |
+ ((rev_result & 32'h0000_ffff) << 16) |
+ ((rev_result & 32'hffff_0000) >> 16);
+ end
+ end
+
+ logic crc_hmode;
+ logic crc_bmode;
+ logic [31:0] clmul_result_rev;
+
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin : gen_alu_rvb_otearlgrey_full
+
+ /////////////////////////
+ // Shuffle / Unshuffle //
+ /////////////////////////
+
+ localparam logic [31:0] SHUFFLE_MASK_L [4] =
+ '{32'h00ff_0000, 32'h0f00_0f00, 32'h3030_3030, 32'h4444_4444};
+ localparam logic [31:0] SHUFFLE_MASK_R [4] =
+ '{32'h0000_ff00, 32'h00f0_00f0, 32'h0c0c_0c0c, 32'h2222_2222};
+
+ localparam logic [31:0] FLIP_MASK_L [4] =
+ '{32'h2200_1100, 32'h0044_0000, 32'h4411_0000, 32'h1100_0000};
+ localparam logic [31:0] FLIP_MASK_R [4] =
+ '{32'h0088_0044, 32'h0000_2200, 32'h0000_8822, 32'h0000_0088};
+
+ logic [31:0] SHUFFLE_MASK_NOT [4];
+ for(genvar i = 0; i < 4; i++) begin : gen_shuffle_mask_not
+ assign SHUFFLE_MASK_NOT[i] = ~(SHUFFLE_MASK_L[i] | SHUFFLE_MASK_R[i]);
+ end
+
+ logic shuffle_flip;
+ assign shuffle_flip = operator_i == ALU_UNSHFL;
+
+ logic [3:0] shuffle_mode;
+
+ always_comb begin
+ shuffle_result = operand_a_i;
+
+ if (shuffle_flip) begin
+ shuffle_mode[3] = shift_amt[0];
+ shuffle_mode[2] = shift_amt[1];
+ shuffle_mode[1] = shift_amt[2];
+ shuffle_mode[0] = shift_amt[3];
+ end else begin
+ shuffle_mode = shift_amt[3:0];
+ end
+
+ if (shuffle_flip) begin
+ shuffle_result = (shuffle_result & 32'h8822_4411) |
+ ((shuffle_result << 6) & FLIP_MASK_L[0]) |
+ ((shuffle_result >> 6) & FLIP_MASK_R[0]) |
+ ((shuffle_result << 9) & FLIP_MASK_L[1]) |
+ ((shuffle_result >> 9) & FLIP_MASK_R[1]) |
+ ((shuffle_result << 15) & FLIP_MASK_L[2]) |
+ ((shuffle_result >> 15) & FLIP_MASK_R[2]) |
+ ((shuffle_result << 21) & FLIP_MASK_L[3]) |
+ ((shuffle_result >> 21) & FLIP_MASK_R[3]);
+ end
+
+ if (shuffle_mode[3]) begin
+ shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[0]) |
+ (((shuffle_result << 8) & SHUFFLE_MASK_L[0]) |
+ ((shuffle_result >> 8) & SHUFFLE_MASK_R[0]));
+ end
+ if (shuffle_mode[2]) begin
+ shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[1]) |
+ (((shuffle_result << 4) & SHUFFLE_MASK_L[1]) |
+ ((shuffle_result >> 4) & SHUFFLE_MASK_R[1]));
+ end
+ if (shuffle_mode[1]) begin
+ shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[2]) |
+ (((shuffle_result << 2) & SHUFFLE_MASK_L[2]) |
+ ((shuffle_result >> 2) & SHUFFLE_MASK_R[2]));
+ end
+ if (shuffle_mode[0]) begin
+ shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[3]) |
+ (((shuffle_result << 1) & SHUFFLE_MASK_L[3]) |
+ ((shuffle_result >> 1) & SHUFFLE_MASK_R[3]));
+ end
+
+ if (shuffle_flip) begin
+ shuffle_result = (shuffle_result & 32'h8822_4411) |
+ ((shuffle_result << 6) & FLIP_MASK_L[0]) |
+ ((shuffle_result >> 6) & FLIP_MASK_R[0]) |
+ ((shuffle_result << 9) & FLIP_MASK_L[1]) |
+ ((shuffle_result >> 9) & FLIP_MASK_R[1]) |
+ ((shuffle_result << 15) & FLIP_MASK_L[2]) |
+ ((shuffle_result >> 15) & FLIP_MASK_R[2]) |
+ ((shuffle_result << 21) & FLIP_MASK_L[3]) |
+ ((shuffle_result >> 21) & FLIP_MASK_R[3]);
+ end
+ end
+
+ //////////////
+ // Crossbar //
+ //////////////
+ // The crossbar permutation instructions xperm.[nbh] (Zbp) can be implemented using 8
+ // parallel 4-bit-wide, 8-input crossbars. Basically, we permute the 8 nibbles of operand_a_i
+ // based on operand_b_i.
+
+ // Generate selector indices and valid signals.
+ // - sel_n[x] indicates which nibble of operand_a_i is selected for output nibble x.
+ // - vld_n[x] indicates if the selection is valid.
+ logic [7:0][2:0] sel_n; // nibbles
+ logic [7:0] vld_n; // nibbles
+ logic [3:0][1:0] sel_b; // bytes
+ logic [3:0] vld_b; // bytes
+ logic [1:0][0:0] sel_h; // half words
+ logic [1:0] vld_h; // half words
+
+ // Per nibble, 3 bits are needed for the selection. Other bits must be zero.
+ // sel_n bit mask: 32'b0111_0111_0111_0111_0111_0111_0111_0111
+ // vld_n bit mask: 32'b1000_1000_1000_1000_1000_1000_1000_1000
+ for (genvar i = 0; i < 8; i++) begin : gen_sel_vld_n
+ assign sel_n[i] = operand_b_i[i*4 +: 3];
+ assign vld_n[i] = ~|operand_b_i[i*4 + 3 +: 1];
+ end
+
+ // Per byte, 2 bits are needed for the selection. Other bits must be zero.
+ // sel_b bit mask: 32'b0000_0011_0000_0011_0000_0011_0000_0011
+ // vld_b bit mask: 32'b1111_1100_1111_1100_1111_1100_1111_1100
+ for (genvar i = 0; i < 4; i++) begin : gen_sel_vld_b
+ assign sel_b[i] = operand_b_i[i*8 +: 2];
+ assign vld_b[i] = ~|operand_b_i[i*8 + 2 +: 6];
+ end
+
+ // Per half word, 1 bit is needed for the selection only. All other bits must be zero.
+ // sel_h bit mask: 32'b0000_0000_0000_0001_0000_0000_0000_0001
+ // vld_h bit mask: 32'b1111_1111_1111_1110_1111_1111_1111_1110
+ for (genvar i = 0; i < 2; i++) begin : gen_sel_vld_h
+ assign sel_h[i] = operand_b_i[i*16 +: 1];
+ assign vld_h[i] = ~|operand_b_i[i*16 + 1 +: 15];
+ end
+
+ // Convert selector indices and valid signals to control the nibble-based
+ // crossbar logic.
+ logic [7:0][2:0] sel;
+ logic [7:0] vld;
+ always_comb begin
+ unique case (operator_i)
+ ALU_XPERM_N: begin
+ // No conversion needed.
+ sel = sel_n;
+ vld = vld_n;
+ end
+
+ ALU_XPERM_B: begin
+ // Convert byte to nibble indicies.
+ for (int b = 0; b < 4; b++) begin
+ sel[b*2 + 0] = {sel_b[b], 1'b0};
+ sel[b*2 + 1] = {sel_b[b], 1'b1};
+ vld[b*2 +: 2] = {2{vld_b[b]}};
+ end
+ end
+
+ ALU_XPERM_H: begin
+ // Convert half-word to nibble indices.
+ for (int h = 0; h < 2; h++) begin
+ sel[h*4 + 0] = {sel_h[h], 2'b00};
+ sel[h*4 + 1] = {sel_h[h], 2'b01};
+ sel[h*4 + 2] = {sel_h[h], 2'b10};
+ sel[h*4 + 3] = {sel_h[h], 2'b11};
+ vld[h*4 +: 4] = {4{vld_h[h]}};
+ end
+ end
+
+ default: begin
+ // Tie valid to zero to disable the crossbar unless we need it.
+ sel = sel_n;
+ vld = '0;
+ end
+ endcase
+ end
+
+ // The actual nibble-based crossbar logic.
+ logic [7:0][3:0] val_n;
+ logic [7:0][3:0] xperm_n;
+ assign val_n = operand_a_i;
+ for (genvar i = 0; i < 8; i++) begin : gen_xperm_n
+ assign xperm_n[i] = vld[i] ? val_n[sel[i]] : '0;
+ end
+ assign xperm_result = xperm_n;
+
+ ///////////////////////////////////////////////////
+ // Carry-less Multiply + Cyclic Redundancy Check //
+ ///////////////////////////////////////////////////
+
+ // Carry-less multiplication can be understood as multiplication based on
+ // the addition interpreted as the bit-wise xor operation.
+ //
+ // Example: 1101 X 1011 = 1111111:
+ //
+ // 1011 X 1101
+ // -----------
+ // 1101
+ // xor 1101
+ // ---------
+ // 10111
+ // xor 0000
+ // ----------
+ // 010111
+ // xor 1101
+ // -----------
+ // 1111111
+ //
+ // Architectural details:
+ // A 32 x 32-bit array
+ // [ operand_b[i] ? (operand_a << i) : '0 for i in 0 ... 31 ]
+ // is generated. The entries of the array are pairwise 'xor-ed'
+ // together in a 5-stage binary tree.
+ //
+ //
+ // Cyclic Redundancy Check:
+ //
+ // CRC-32 (CRC-32/ISO-HDLC) and CRC-32C (CRC-32/ISCSI) are directly implemented. For
+ // documentation of the crc configuration (crc-polynomials, initialization, reflection, etc.)
+ // see http://reveng.sourceforge.net/crc-catalogue/all.htm
+ // A useful guide to crc arithmetic and algorithms is given here:
+ // http://www.piclist.com/techref/method/math/crcguide.html.
+ //
+ // The CRC operation solves the following equation using binary polynomial arithmetic:
+ //
+ // rev(rd)(x) = rev(rs1)(x) * x**n mod {1, P}(x)
+ //
+ // where P denotes lower 32 bits of the corresponding CRC polynomial, rev(a) the bit reversal
+ // of a, n = 8,16, or 32 for .b, .h, .w -variants. {a, b} denotes bit concatenation.
+ //
+ // Using barret reduction, one can show that
+ //
+ // M(x) mod P(x) = R(x) =
+ // (M(x) * x**n) & {deg(P(x)'{1'b1}}) ^ (M(x) x**-(deg(P(x) - n)) cx mu(x) cx P(x),
+ //
+ // Where mu(x) = polydiv(x**64, {1,P}) & 0xffffffff. Here, 'cx' refers to carry-less
+ // multiplication. Substituting rev(rd)(x) for R(x) and rev(rs1)(x) for M(x) and solving for
+ // rd(x) with P(x) a crc32 polynomial (deg(P(x)) = 32), we get
+ //
+ // rd = rev( (rev(rs1) << n) ^ ((rev(rs1) >> (32-n)) cx mu cx P)
+ // = (rs1 >> n) ^ rev(rev( (rs1 << (32-n)) cx rev(mu)) cx P)
+ // ^-- cycle 0--------------------^
+ // ^- cycle 1 -------------------------------------------^
+ //
+ // In the last step we used the fact that carry-less multiplication is bit-order agnostic:
+ // rev(a cx b) = rev(a) cx rev(b).
+
+ logic clmul_rmode;
+ logic clmul_hmode;
+ logic [31:0] clmul_op_a;
+ logic [31:0] clmul_op_b;
+ logic [31:0] operand_b_rev;
+ logic [31:0] clmul_and_stage[32];
+ logic [31:0] clmul_xor_stage1[16];
+ logic [31:0] clmul_xor_stage2[8];
+ logic [31:0] clmul_xor_stage3[4];
+ logic [31:0] clmul_xor_stage4[2];
+
+ logic [31:0] clmul_result_raw;
+
+ for (genvar i = 0; i < 32; i++) begin : gen_rev_operand_b
+ assign operand_b_rev[i] = operand_b_i[31-i];
+ end
+
+ assign clmul_rmode = operator_i == ALU_CLMULR;
+ assign clmul_hmode = operator_i == ALU_CLMULH;
+
+ // CRC
+ localparam logic [31:0] CRC32_POLYNOMIAL = 32'h04c1_1db7;
+ localparam logic [31:0] CRC32_MU_REV = 32'hf701_1641;
+
+ localparam logic [31:0] CRC32C_POLYNOMIAL = 32'h1edc_6f41;
+ localparam logic [31:0] CRC32C_MU_REV = 32'hdea7_13f1;
+
+ logic crc_op;
+
+ logic crc_cpoly;
+
+ logic [31:0] crc_operand;
+ logic [31:0] crc_poly;
+ logic [31:0] crc_mu_rev;
+
+ assign crc_op = (operator_i == ALU_CRC32C_W) | (operator_i == ALU_CRC32_W) |
+ (operator_i == ALU_CRC32C_H) | (operator_i == ALU_CRC32_H) |
+ (operator_i == ALU_CRC32C_B) | (operator_i == ALU_CRC32_B);
+
+ assign crc_cpoly = (operator_i == ALU_CRC32C_W) |
+ (operator_i == ALU_CRC32C_H) |
+ (operator_i == ALU_CRC32C_B);
+
+ assign crc_hmode = (operator_i == ALU_CRC32_H) | (operator_i == ALU_CRC32C_H);
+ assign crc_bmode = (operator_i == ALU_CRC32_B) | (operator_i == ALU_CRC32C_B);
+
+ assign crc_poly = crc_cpoly ? CRC32C_POLYNOMIAL : CRC32_POLYNOMIAL;
+ assign crc_mu_rev = crc_cpoly ? CRC32C_MU_REV : CRC32_MU_REV;
+
+ always_comb begin
+ unique case (1'b1)
+ crc_bmode: crc_operand = {operand_a_i[7:0], 24'h0};
+ crc_hmode: crc_operand = {operand_a_i[15:0], 16'h0};
+ default: crc_operand = operand_a_i;
+ endcase
+ end
+
+ // Select clmul input
+ always_comb begin
+ if (crc_op) begin
+ clmul_op_a = instr_first_cycle_i ? crc_operand : imd_val_q_i[0];
+ clmul_op_b = instr_first_cycle_i ? crc_mu_rev : crc_poly;
+ end else begin
+ clmul_op_a = clmul_rmode | clmul_hmode ? operand_a_rev : operand_a_i;
+ clmul_op_b = clmul_rmode | clmul_hmode ? operand_b_rev : operand_b_i;
+ end
+ end
+
+ for (genvar i = 0; i < 32; i++) begin : gen_clmul_and_op
+ assign clmul_and_stage[i] = clmul_op_b[i] ? clmul_op_a << i : '0;
+ end
+
+ for (genvar i = 0; i < 16; i++) begin : gen_clmul_xor_op_l1
+ assign clmul_xor_stage1[i] = clmul_and_stage[2*i] ^ clmul_and_stage[2*i+1];
+ end
+
+ for (genvar i = 0; i < 8; i++) begin : gen_clmul_xor_op_l2
+ assign clmul_xor_stage2[i] = clmul_xor_stage1[2*i] ^ clmul_xor_stage1[2*i+1];
+ end
+
+ for (genvar i = 0; i < 4; i++) begin : gen_clmul_xor_op_l3
+ assign clmul_xor_stage3[i] = clmul_xor_stage2[2*i] ^ clmul_xor_stage2[2*i+1];
+ end
+
+ for (genvar i = 0; i < 2; i++) begin : gen_clmul_xor_op_l4
+ assign clmul_xor_stage4[i] = clmul_xor_stage3[2*i] ^ clmul_xor_stage3[2*i+1];
+ end
+
+ assign clmul_result_raw = clmul_xor_stage4[0] ^ clmul_xor_stage4[1];
+
+ for (genvar i = 0; i < 32; i++) begin : gen_rev_clmul_result
+ assign clmul_result_rev[i] = clmul_result_raw[31-i];
+ end
+
+ // clmulr_result = rev(clmul(rev(a), rev(b)))
+ // clmulh_result = clmulr_result >> 1
+ always_comb begin
+ case (1'b1)
+ clmul_rmode: clmul_result = clmul_result_rev;
+ clmul_hmode: clmul_result = {1'b0, clmul_result_rev[31:1]};
+ default: clmul_result = clmul_result_raw;
+ endcase
+ end
+ end else begin : gen_alu_rvb_not_otearlgrey_full
+ assign shuffle_result = '0;
+ assign xperm_result = '0;
+ assign clmul_result = '0;
+ // support signals
+ assign clmul_result_rev = '0;
+ assign crc_bmode = '0;
+ assign crc_hmode = '0;
+ end
+
+ if (RV32B == RV32BFull) begin : gen_alu_rvb_full
+
+ ///////////////
+ // Butterfly //
+ ///////////////
+
+ // The butterfly / inverse butterfly network executing bcompress/bdecompress (zbe)
+ // instructions. For bdecompress, the control bits mask of a local left region is generated
+ // by the inverse of a n-bit left rotate and complement upon wrap (LROTC) operation by the
+ // number of ones in the deposit bitmask to the right of the segment. n hereby denotes the
+ // width of the according segment. The bitmask for a pertaining local right region is equal
+ // to the corresponding local left region. Bcompress uses an analogue inverse process.
+ // Consider the following 8-bit example. For details, see Hilewitz et al. "Fast Bit Gather,
+ // Bit Scatter and Bit Permuation Instructions for Commodity Microprocessors", (2008).
+ //
+ // The bcompress/bdecompress instructions are completed in 2 cycles. In the first cycle, the
+ // control bitmask is prepared by executing the parallel prefix bit count. In the second
+ // cycle, the bit swapping is executed according to the control masks.
+
+ // 8-bit example: (Hilewitz et al.)
+ // Consider the instruction bdecompress operand_a_i deposit_mask
+ // Let operand_a_i = 8'babcd_efgh
+ // deposit_mask = 8'b1010_1101
+ //
+ // control bitmask for stage 1:
+ // - number of ones in the right half of the deposit bitmask: 3
+ // - width of the segment: 4
+ // - control bitmask = ~LROTC(4'b0, 3)[3:0] = 4'b1000
+ //
+ // control bitmask: c3 c2 c1 c0 c3 c2 c1 c0
+ // 1 0 0 0 1 0 0 0
+ // <- L -----> <- R ----->
+ // operand_a_i a b c d e f g h
+ // :\ | | | /: | | |
+ // : +|---|--|-+ : | | |
+ // :/ | | | \: | | |
+ // stage 1 e b c d a f g h
+ // <L-> <R-> <L-> <R->
+ // control bitmask: c3 c2 c3 c2 c1 c0 c1 c0
+ // 1 1 1 1 1 0 1 0
+ // :\ :\ /: /: :\ | /: |
+ // : +:-+-:+ : : +|-+ : |
+ // :/ :/ \: \: :/ | \: |
+ // stage 2 c d e b g f a h
+ // L R L R L R L R
+ // control bitmask: c3 c3 c2 c2 c1 c1 c0 c0
+ // 1 1 0 0 1 1 0 0
+ // :\/: | | :\/: | |
+ // : : | | : : | |
+ // :/\: | | :/\: | |
+ // stage 3 d c e b f g a h
+ // & deposit bitmask: 1 0 1 0 1 1 0 1
+ // result: d 0 e 0 f g 0 h
+
+ logic [ 5:0] bitcnt_partial_q [32];
+
+ // first cycle
+ // Store partial bitcnts
+ for (genvar i = 0; i < 32; i++) begin : gen_bitcnt_reg_in_lsb
+ assign bitcnt_partial_lsb_d[i] = bitcnt_partial[i][0];
+ end
+
+ for (genvar i = 0; i < 16; i++) begin : gen_bitcnt_reg_in_b1
+ assign bitcnt_partial_msb_d[i] = bitcnt_partial[2*i+1][1];
+ end
+
+ for (genvar i = 0; i < 8; i++) begin : gen_bitcnt_reg_in_b2
+ assign bitcnt_partial_msb_d[16+i] = bitcnt_partial[4*i+3][2];
+ end
+
+ for (genvar i = 0; i < 4; i++) begin : gen_bitcnt_reg_in_b3
+ assign bitcnt_partial_msb_d[24+i] = bitcnt_partial[8*i+7][3];
+ end
+
+ for (genvar i = 0; i < 2; i++) begin : gen_bitcnt_reg_in_b4
+ assign bitcnt_partial_msb_d[28+i] = bitcnt_partial[16*i+15][4];
+ end
+
+ assign bitcnt_partial_msb_d[30] = bitcnt_partial[31][5];
+ assign bitcnt_partial_msb_d[31] = 1'b0; // unused
+
+ // Second cycle
+ // Load partial bitcnts
+ always_comb begin
+ bitcnt_partial_q = '{default: '0};
+
+ for (int unsigned i = 0; i < 32; i++) begin : gen_bitcnt_reg_out_lsb
+ bitcnt_partial_q[i][0] = imd_val_q_i[0][i];
+ end
+
+ for (int unsigned i = 0; i < 16; i++) begin : gen_bitcnt_reg_out_b1
+ bitcnt_partial_q[2*i+1][1] = imd_val_q_i[1][i];
+ end
+
+ for (int unsigned i = 0; i < 8; i++) begin : gen_bitcnt_reg_out_b2
+ bitcnt_partial_q[4*i+3][2] = imd_val_q_i[1][16+i];
+ end
+
+ for (int unsigned i = 0; i < 4; i++) begin : gen_bitcnt_reg_out_b3
+ bitcnt_partial_q[8*i+7][3] = imd_val_q_i[1][24+i];
+ end
+
+ for (int unsigned i = 0; i < 2; i++) begin : gen_bitcnt_reg_out_b4
+ bitcnt_partial_q[16*i+15][4] = imd_val_q_i[1][28+i];
+ end
+
+ bitcnt_partial_q[31][5] = imd_val_q_i[1][30];
+ end
+
+ logic [31:0] butterfly_mask_l[5];
+ logic [31:0] butterfly_mask_r[5];
+ logic [31:0] butterfly_mask_not[5];
+ logic [31:0] lrotc_stage [5]; // left rotate and complement upon wrap
+
+ // number of bits in local r = 32 / 2**(stage + 1) = 16/2**stage
+ `define _N(stg) (16 >> stg)
+
+ // bcompress / bdecompress control bit generation
+ for (genvar stg = 0; stg < 5; stg++) begin : gen_butterfly_ctrl_stage
+ // number of segs: 2** stg
+ for (genvar seg=0; seg<2**stg; seg++) begin : gen_butterfly_ctrl
+
+ assign lrotc_stage[stg][2*`_N(stg)*(seg+1)-1 : 2*`_N(stg)*seg] =
+ {{`_N(stg){1'b0}},{`_N(stg){1'b1}}} <<
+ bitcnt_partial_q[`_N(stg)*(2*seg+1)-1][$clog2(`_N(stg)):0];
+
+ assign butterfly_mask_l[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)]
+ = ~lrotc_stage[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)];
+
+ assign butterfly_mask_r[stg][`_N(stg)*(2*seg+1)-1 : `_N(stg)*(2*seg)]
+ = ~lrotc_stage[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)];
+
+ assign butterfly_mask_l[stg][`_N(stg)*(2*seg+1)-1 : `_N(stg)*(2*seg)] = '0;
+ assign butterfly_mask_r[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)] = '0;
+ end
+ end
+ `undef _N
+
+ for (genvar stg = 0; stg < 5; stg++) begin : gen_butterfly_not
+ assign butterfly_mask_not[stg] =
+ ~(butterfly_mask_l[stg] | butterfly_mask_r[stg]);
+ end
+
+ always_comb begin
+ butterfly_result = operand_a_i;
+
+ butterfly_result = butterfly_result & butterfly_mask_not[0] |
+ ((butterfly_result & butterfly_mask_l[0]) >> 16)|
+ ((butterfly_result & butterfly_mask_r[0]) << 16);
+
+ butterfly_result = butterfly_result & butterfly_mask_not[1] |
+ ((butterfly_result & butterfly_mask_l[1]) >> 8)|
+ ((butterfly_result & butterfly_mask_r[1]) << 8);
+
+ butterfly_result = butterfly_result & butterfly_mask_not[2] |
+ ((butterfly_result & butterfly_mask_l[2]) >> 4)|
+ ((butterfly_result & butterfly_mask_r[2]) << 4);
+
+ butterfly_result = butterfly_result & butterfly_mask_not[3] |
+ ((butterfly_result & butterfly_mask_l[3]) >> 2)|
+ ((butterfly_result & butterfly_mask_r[3]) << 2);
+
+ butterfly_result = butterfly_result & butterfly_mask_not[4] |
+ ((butterfly_result & butterfly_mask_l[4]) >> 1)|
+ ((butterfly_result & butterfly_mask_r[4]) << 1);
+
+ butterfly_result = butterfly_result & operand_b_i;
+ end
+
+ always_comb begin
+ invbutterfly_result = operand_a_i & operand_b_i;
+
+ invbutterfly_result = invbutterfly_result & butterfly_mask_not[4] |
+ ((invbutterfly_result & butterfly_mask_l[4]) >> 1)|
+ ((invbutterfly_result & butterfly_mask_r[4]) << 1);
+
+ invbutterfly_result = invbutterfly_result & butterfly_mask_not[3] |
+ ((invbutterfly_result & butterfly_mask_l[3]) >> 2)|
+ ((invbutterfly_result & butterfly_mask_r[3]) << 2);
+
+ invbutterfly_result = invbutterfly_result & butterfly_mask_not[2] |
+ ((invbutterfly_result & butterfly_mask_l[2]) >> 4)|
+ ((invbutterfly_result & butterfly_mask_r[2]) << 4);
+
+ invbutterfly_result = invbutterfly_result & butterfly_mask_not[1] |
+ ((invbutterfly_result & butterfly_mask_l[1]) >> 8)|
+ ((invbutterfly_result & butterfly_mask_r[1]) << 8);
+
+ invbutterfly_result = invbutterfly_result & butterfly_mask_not[0] |
+ ((invbutterfly_result & butterfly_mask_l[0]) >> 16)|
+ ((invbutterfly_result & butterfly_mask_r[0]) << 16);
+ end
+ end else begin : gen_alu_rvb_not_full
+ logic [31:0] unused_imd_val_q_1;
+ assign unused_imd_val_q_1 = imd_val_q_i[1];
+ assign butterfly_result = '0;
+ assign invbutterfly_result = '0;
+ // support signals
+ assign bitcnt_partial_lsb_d = '0;
+ assign bitcnt_partial_msb_d = '0;
+ end
+
+ //////////////////////////////////////
+ // Multicycle Bitmanip Instructions //
+ //////////////////////////////////////
+ // Ternary instructions + Shift Rotations + Bit Compress/Decompress + CRC
+ // For ternary instructions (zbt), operand_a_i is tied to rs1 in the first cycle and rs3 in the
+ // second cycle. operand_b_i is always tied to rs2.
+
+ always_comb begin
+ unique case (operator_i)
+ ALU_CMOV: begin
+ multicycle_result = (operand_b_i == 32'h0) ? operand_a_i : imd_val_q_i[0];
+ imd_val_d_o = '{operand_a_i, 32'h0};
+ if (instr_first_cycle_i) begin
+ imd_val_we_o = 2'b01;
+ end else begin
+ imd_val_we_o = 2'b00;
+ end
+ end
+
+ ALU_CMIX: begin
+ multicycle_result = imd_val_q_i[0] | bwlogic_and_result;
+ imd_val_d_o = '{bwlogic_and_result, 32'h0};
+ if (instr_first_cycle_i) begin
+ imd_val_we_o = 2'b01;
+ end else begin
+ imd_val_we_o = 2'b00;
+ end
+ end
+
+ ALU_FSR, ALU_FSL,
+ ALU_ROL, ALU_ROR: begin
+ if (shift_amt[4:0] == 5'h0) begin
+ multicycle_result = shift_amt[5] ? operand_a_i : imd_val_q_i[0];
+ end else begin
+ multicycle_result = imd_val_q_i[0] | shift_result;
+ end
+ imd_val_d_o = '{shift_result, 32'h0};
+ if (instr_first_cycle_i) begin
+ imd_val_we_o = 2'b01;
+ end else begin
+ imd_val_we_o = 2'b00;
+ end
+ end
+
+ ALU_CRC32_W, ALU_CRC32C_W,
+ ALU_CRC32_H, ALU_CRC32C_H,
+ ALU_CRC32_B, ALU_CRC32C_B: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin
+ unique case (1'b1)
+ crc_bmode: multicycle_result = clmul_result_rev ^ (operand_a_i >> 8);
+ crc_hmode: multicycle_result = clmul_result_rev ^ (operand_a_i >> 16);
+ default: multicycle_result = clmul_result_rev;
+ endcase
+ imd_val_d_o = '{clmul_result_rev, 32'h0};
+ if (instr_first_cycle_i) begin
+ imd_val_we_o = 2'b01;
+ end else begin
+ imd_val_we_o = 2'b00;
+ end
+ end else begin
+ imd_val_d_o = '{operand_a_i, 32'h0};
+ imd_val_we_o = 2'b00;
+ multicycle_result = '0;
+ end
+ end
+
+ ALU_BCOMPRESS, ALU_BDECOMPRESS: begin
+ if (RV32B == RV32BFull) begin
+ multicycle_result = (operator_i == ALU_BDECOMPRESS) ? butterfly_result :
+ invbutterfly_result;
+ imd_val_d_o = '{bitcnt_partial_lsb_d, bitcnt_partial_msb_d};
+ if (instr_first_cycle_i) begin
+ imd_val_we_o = 2'b11;
+ end else begin
+ imd_val_we_o = 2'b00;
+ end
+ end else begin
+ imd_val_d_o = '{operand_a_i, 32'h0};
+ imd_val_we_o = 2'b00;
+ multicycle_result = '0;
+ end
+ end
+
+ default: begin
+ imd_val_d_o = '{operand_a_i, 32'h0};
+ imd_val_we_o = 2'b00;
+ multicycle_result = '0;
+ end
+ endcase
+ end
+
+
+ end else begin : g_no_alu_rvb
+ logic [31:0] unused_imd_val_q[2];
+ assign unused_imd_val_q = imd_val_q_i;
+ logic [31:0] unused_butterfly_result;
+ assign unused_butterfly_result = butterfly_result;
+ logic [31:0] unused_invbutterfly_result;
+ assign unused_invbutterfly_result = invbutterfly_result;
+ // RV32B result signals
+ assign bitcnt_result = '0;
+ assign minmax_result = '0;
+ assign pack_result = '0;
+ assign sext_result = '0;
+ assign singlebit_result = '0;
+ assign rev_result = '0;
+ assign shuffle_result = '0;
+ assign xperm_result = '0;
+ assign butterfly_result = '0;
+ assign invbutterfly_result = '0;
+ assign clmul_result = '0;
+ assign multicycle_result = '0;
+ // RV32B support signals
+ assign imd_val_d_o = '{default: '0};
+ assign imd_val_we_o = '{default: '0};
+ end
+
+ ////////////////
+ // Result mux //
+ ////////////////
+
+ always_comb begin
+ result_o = '0;
+
+ unique case (operator_i)
+ // Bitwise Logic Operations (negate: RV32B)
+ ALU_XOR, ALU_XNOR,
+ ALU_OR, ALU_ORN,
+ ALU_AND, ALU_ANDN: result_o = bwlogic_result;
+
+ // Adder Operations
+ ALU_ADD, ALU_SUB,
+ // RV32B
+ ALU_SH1ADD, ALU_SH2ADD,
+ ALU_SH3ADD: result_o = adder_result;
+
+ // Shift Operations
+ ALU_SLL, ALU_SRL,
+ ALU_SRA,
+ // RV32B
+ ALU_SLO, ALU_SRO: result_o = shift_result;
+
+ // Shuffle Operations (RV32B)
+ ALU_SHFL, ALU_UNSHFL: result_o = shuffle_result;
+
+ // Crossbar Permutation Operations (RV32B)
+ ALU_XPERM_N, ALU_XPERM_B, ALU_XPERM_H: result_o = xperm_result;
+
+ // Comparison Operations
+ ALU_EQ, ALU_NE,
+ ALU_GE, ALU_GEU,
+ ALU_LT, ALU_LTU,
+ ALU_SLT, ALU_SLTU: result_o = {31'h0,cmp_result};
+
+ // MinMax Operations (RV32B)
+ ALU_MIN, ALU_MAX,
+ ALU_MINU, ALU_MAXU: result_o = minmax_result;
+
+ // Bitcount Operations (RV32B)
+ ALU_CLZ, ALU_CTZ,
+ ALU_CPOP: result_o = {26'h0, bitcnt_result};
+
+ // Pack Operations (RV32B)
+ ALU_PACK, ALU_PACKH,
+ ALU_PACKU: result_o = pack_result;
+
+ // Sign-Extend (RV32B)
+ ALU_SEXTB, ALU_SEXTH: result_o = sext_result;
+
+ // Ternary Bitmanip Operations (RV32B)
+ ALU_CMIX, ALU_CMOV,
+ ALU_FSL, ALU_FSR,
+ // Rotate Shift (RV32B)
+ ALU_ROL, ALU_ROR,
+ // Cyclic Redundancy Checks (RV32B)
+ ALU_CRC32_W, ALU_CRC32C_W,
+ ALU_CRC32_H, ALU_CRC32C_H,
+ ALU_CRC32_B, ALU_CRC32C_B,
+ // Bit Compress / Decompress (RV32B)
+ ALU_BCOMPRESS, ALU_BDECOMPRESS: result_o = multicycle_result;
+
+ // Single-Bit Bitmanip Operations (RV32B)
+ ALU_BSET, ALU_BCLR,
+ ALU_BINV, ALU_BEXT: result_o = singlebit_result;
+
+ // General Reverse / Or-combine (RV32B)
+ ALU_GREV, ALU_GORC: result_o = rev_result;
+
+ // Bit Field Place (RV32B)
+ ALU_BFP: result_o = bfp_result;
+
+ // Carry-less Multiply Operations (RV32B)
+ ALU_CLMUL, ALU_CLMULR,
+ ALU_CLMULH: result_o = clmul_result;
+
+ default: ;
+ endcase
+ end
+
+ logic unused_shift_amt_compl;
+ assign unused_shift_amt_compl = shift_amt_compl[5];
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_branch_predict.sv b/hw/ip/cheriot-ibex/rtl/cheriot_branch_predict.sv
new file mode 100644
index 0000000..e99089b
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_branch_predict.sv
@@ -0,0 +1,100 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Branch Predictor
+ *
+ * This implements static branch prediction. It takes an instruction and its PC and determines if
+ * it's a branch or a jump and calculates its target. For jumps it will always predict taken. For
+ * branches it will predict taken if the PC offset is negative.
+ *
+ * This handles both compressed and uncompressed instructions. Compressed instructions must be in
+ * the lower 16-bits of instr.
+ *
+ * The predictor is entirely combinational but takes clk/rst_n signals for use by assertions.
+ */
+
+`include "prim_assert.sv"
+
+module cheriot_branch_predict (
+ input logic clk_i,
+ input logic rst_ni,
+
+ // Instruction from fetch stage
+ input logic [31:0] fetch_rdata_i,
+ input logic [31:0] fetch_pc_i,
+ input logic fetch_valid_i,
+
+ // Prediction for supplied instruction
+ output logic predict_branch_taken_o,
+ output logic [31:0] predict_branch_pc_o
+);
+ import cheriot_pkg::*;
+
+ logic [31:0] imm_j_type;
+ logic [31:0] imm_b_type;
+ logic [31:0] imm_cj_type;
+ logic [31:0] imm_cb_type;
+
+ logic [31:0] branch_imm;
+
+ logic [31:0] instr;
+
+ logic instr_j;
+ logic instr_b;
+ logic instr_cj;
+ logic instr_cb;
+
+ logic instr_b_taken;
+
+ // Provide short internal name for fetch_rdata_i due to reduce line wrapping
+ assign instr = fetch_rdata_i;
+
+ // Extract and sign-extend to 32-bit the various immediates that may be used to calculate the
+ // target
+
+ // Uncompressed immediates
+ assign imm_j_type = { {12{instr[31]}}, instr[19:12], instr[20], instr[30:21], 1'b0 };
+ assign imm_b_type = { {19{instr[31]}}, instr[31], instr[7], instr[30:25], instr[11:8], 1'b0 };
+
+ // Compressed immediates
+ assign imm_cj_type = { {20{instr[12]}}, instr[12], instr[8], instr[10:9], instr[6], instr[7],
+ instr[2], instr[11], instr[5:3], 1'b0 };
+
+ assign imm_cb_type = { {23{instr[12]}}, instr[12], instr[6:5], instr[2], instr[11:10],
+ instr[4:3], 1'b0};
+
+ // Determine if the instruction is a branch or a jump
+
+ // Uncompressed branch/jump
+ assign instr_b = opcode_e'(instr[6:0]) == OPCODE_BRANCH;
+ assign instr_j = opcode_e'(instr[6:0]) == OPCODE_JAL;
+
+ // Compressed branch/jump
+ assign instr_cb = (instr[1:0] == 2'b01) & ((instr[15:13] == 3'b110) | (instr[15:13] == 3'b111));
+ assign instr_cj = (instr[1:0] == 2'b01) & ((instr[15:13] == 3'b101) | (instr[15:13] == 3'b001));
+
+ // Select out the branch offset for target calculation based upon the instruction type
+ always_comb begin
+ branch_imm = imm_b_type;
+
+ unique case (1'b1)
+ instr_j : branch_imm = imm_j_type;
+ instr_b : branch_imm = imm_b_type;
+ instr_cj : branch_imm = imm_cj_type;
+ instr_cb : branch_imm = imm_cb_type;
+ default : ;
+ endcase
+ end
+
+ `ASSERT_IF(BranchInsTypeOneHot, $onehot0({instr_j, instr_b, instr_cj, instr_cb}), fetch_valid_i)
+
+ // Determine branch prediction, taken if offset is negative
+ assign instr_b_taken = (instr_b & imm_b_type[31]) | (instr_cb & imm_cb_type[31]);
+
+ // Always predict jumps taken otherwise take prediction from `instr_b_taken`
+ assign predict_branch_taken_o = fetch_valid_i & (instr_j | instr_cj | instr_b_taken);
+ // Calculate target
+ assign predict_branch_pc_o = fetch_pc_i + branch_imm;
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_compressed_decoder.sv b/hw/ip/cheriot-ibex/rtl/cheriot_compressed_decoder.sv
new file mode 100644
index 0000000..1ebcf3c
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_compressed_decoder.sv
@@ -0,0 +1,362 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Compressed instruction decoder
+ *
+ * Decodes RISC-V compressed instructions into their RV32 equivalent.
+ * This module is fully combinatorial, clock and reset are used for
+ * assertions only.
+ */
+
+`include "prim_assert.sv"
+
+module cheriot_compressed_decoder # (
+ parameter bit CHERIoTEn = 1'b1
+) (
+ input logic clk_i,
+ input logic rst_ni,
+ input logic valid_i,
+ input logic [31:0] instr_i,
+ input logic cheri_pmode_i,
+ output logic [31:0] instr_o,
+ output logic is_compressed_o,
+ output logic illegal_instr_o
+);
+ import cheriot_pkg::*;
+
+ // valid_i indicates if instr_i is valid and is used for assertions only.
+ // The following signal is used to avoid possible lint errors.
+ logic unused_valid;
+ assign unused_valid = valid_i;
+
+ ////////////////////////
+ // Compressed decoder //
+ ////////////////////////
+
+ always_comb begin
+ // By default, forward incoming instruction, mark it as legal.
+ instr_o = instr_i;
+ illegal_instr_o = 1'b0;
+
+ // Check if incoming instruction is compressed.
+ unique case (instr_i[1:0])
+ // C0
+ 2'b00: begin
+ unique case (instr_i[15:13])
+ 3'b000: begin
+ if (CHERIoTEn & cheri_pmode_i)
+ // c.incaddr4cspn -> cincoffsetimm cd', csp, imm
+ instr_o = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5],
+ instr_i[6], 2'b00, 5'h02, 3'b001, 2'b01, instr_i[4:2], {OPCODE_CHERI}};
+ else
+ // c.addi4spn -> addi rd', x2, imm
+ instr_o = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5],
+ instr_i[6], 2'b00, 5'h02, 3'b000, 2'b01, instr_i[4:2], {OPCODE_OP_IMM}};
+ if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1;
+ end
+
+ 3'b010: begin
+ // c.lw -> lw rd', imm(rs1')
+ instr_o = {5'b0, instr_i[5], instr_i[12:10], instr_i[6],
+ 2'b00, 2'b01, instr_i[9:7], 3'b010, 2'b01, instr_i[4:2], {OPCODE_LOAD}};
+ end
+
+ 3'b011: begin
+ if (CHERIoTEn & cheri_pmode_i) begin
+ // CHERI: c.clc -> clc rd', imm(rs1'); reuse c.ld
+ instr_o = {4'b0, instr_i[6:5], instr_i[12:10],
+ 3'b000, 2'b01, instr_i[9:7], 3'b011, 2'b01, instr_i[4:2], {OPCODE_LOAD}};
+ end else begin
+ instr_o = instr_i;
+ illegal_instr_o = 1'b1;
+ end
+ end
+
+ 3'b110: begin
+ // c.sw -> sw rs2', imm(rs1')
+ instr_o = {5'b0, instr_i[5], instr_i[12], 2'b01, instr_i[4:2],
+ 2'b01, instr_i[9:7], 3'b010, instr_i[11:10], instr_i[6],
+ 2'b00, {OPCODE_STORE}};
+ end
+
+ 3'b001,
+ 3'b100,
+ 3'b101: begin
+ illegal_instr_o = 1'b1;
+ end
+
+ 3'b111: begin
+ if (CHERIoTEn & cheri_pmode_i) begin
+ // CHERI: c.csc -> csc rs2', imm(rs1'); reuse c.sd
+ instr_o = {4'b0, instr_i[6:5], instr_i[12], 2'b01, instr_i[4:2],
+ 2'b01, instr_i[9:7], 3'b011, instr_i[11:10], 3'b000, {OPCODE_STORE}};
+ end else begin
+ instr_o = instr_i;
+ illegal_instr_o = 1'b1;
+ end
+
+ end
+
+ default: begin
+ illegal_instr_o = 1'b1;
+ end
+ endcase
+ end
+
+ // C1
+ //
+ // Register address checks for RV32E are performed in the regular instruction decoder.
+ // If this check fails, an illegal instruction exception is triggered and the controller
+ // writes the actual faulting instruction to mtval.
+ 2'b01: begin
+ unique case (instr_i[15:13])
+ 3'b000: begin
+ // c.addi -> addi rd, rd, nzimm
+ // c.nop
+ instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2],
+ instr_i[11:7], 3'b0, instr_i[11:7], {OPCODE_OP_IMM}};
+ end
+
+ 3'b001, 3'b101: begin
+ // 001: c.jal -> jal x1, imm
+ // 101: c.j -> jal x0, imm
+ instr_o = {instr_i[12], instr_i[8], instr_i[10:9], instr_i[6],
+ instr_i[7], instr_i[2], instr_i[11], instr_i[5:3],
+ {9 {instr_i[12]}}, 4'b0, ~instr_i[15], {OPCODE_JAL}};
+ end
+
+ 3'b010: begin
+ // c.li -> addi rd, x0, nzimm
+ // (c.li hints are translated into an addi hint)
+ instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0,
+ 3'b0, instr_i[11:7], {OPCODE_OP_IMM}};
+ end
+
+ 3'b011: begin
+ // c.lui -> lui rd, imm
+ // (c.lui hints are translated into a lui hint)
+ instr_o = {{15 {instr_i[12]}}, instr_i[6:2], instr_i[11:7], {OPCODE_LUI}};
+
+ // c.incaddr16csp -> cincoffsetimm csp, csp, nzimm
+ if (CHERIoTEn & cheri_pmode_i && (instr_i[11:7] == 5'h02)) begin
+ instr_o = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2],
+ instr_i[6], 4'b0, 5'h02, 3'b001, 5'h02, {OPCODE_CHERI}};
+ end else if (instr_i[11:7] == 5'h02) begin
+ // c.addi16sp -> addi x2, x2, nzimm
+ instr_o = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2],
+ instr_i[6], 4'b0, 5'h02, 3'b000, 5'h02, {OPCODE_OP_IMM}};
+ end
+
+ if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1;
+ end
+
+ 3'b100: begin
+ unique case (instr_i[11:10])
+ 2'b00,
+ 2'b01: begin
+ // 00: c.srli -> srli rd, rd, shamt
+ // 01: c.srai -> srai rd, rd, shamt
+ // (c.srli/c.srai hints are translated into a srli/srai hint)
+ instr_o = {1'b0, instr_i[10], 5'b0, instr_i[6:2], 2'b01, instr_i[9:7],
+ 3'b101, 2'b01, instr_i[9:7], {OPCODE_OP_IMM}};
+ if (instr_i[12] == 1'b1) illegal_instr_o = 1'b1;
+ end
+
+ 2'b10: begin
+ // c.andi -> andi rd, rd, imm
+ instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7],
+ 3'b111, 2'b01, instr_i[9:7], {OPCODE_OP_IMM}};
+ end
+
+ 2'b11: begin
+ unique case ({instr_i[12], instr_i[6:5]})
+ 3'b000: begin
+ // c.sub -> sub rd', rd', rs2'
+ instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7],
+ 3'b000, 2'b01, instr_i[9:7], {OPCODE_OP}};
+ end
+
+ 3'b001: begin
+ // c.xor -> xor rd', rd', rs2'
+ instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b100,
+ 2'b01, instr_i[9:7], {OPCODE_OP}};
+ end
+
+ 3'b010: begin
+ // c.or -> or rd', rd', rs2'
+ instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b110,
+ 2'b01, instr_i[9:7], {OPCODE_OP}};
+ end
+
+ 3'b011: begin
+ // c.and -> and rd', rd', rs2'
+ instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b111,
+ 2'b01, instr_i[9:7], {OPCODE_OP}};
+ end
+
+ 3'b100,
+ 3'b101,
+ 3'b110,
+ 3'b111: begin
+ // 100: c.subw
+ // 101: c.addw
+ illegal_instr_o = 1'b1;
+ end
+
+ default: begin
+ illegal_instr_o = 1'b1;
+ end
+ endcase
+ end
+
+ default: begin
+ illegal_instr_o = 1'b1;
+ end
+ endcase
+ end
+
+ 3'b110, 3'b111: begin
+ // 0: c.beqz -> beq rs1', x0, imm
+ // 1: c.bnez -> bne rs1', x0, imm
+ instr_o = {{4 {instr_i[12]}}, instr_i[6:5], instr_i[2], 5'b0, 2'b01,
+ instr_i[9:7], 2'b00, instr_i[13], instr_i[11:10], instr_i[4:3],
+ instr_i[12], {OPCODE_BRANCH}};
+ end
+
+ default: begin
+ illegal_instr_o = 1'b1;
+ end
+ endcase
+ end
+
+ // C2
+ //
+ // Register address checks for RV32E are performed in the regular instruction decoder.
+ // If this check fails, an illegal instruction exception is triggered and the controller
+ // writes the actual faulting instruction to mtval.
+ 2'b10: begin
+ unique case (instr_i[15:13])
+ 3'b000: begin
+ // c.slli -> slli rd, rd, shamt
+ // (c.ssli hints are translated into a slli hint)
+ instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], {OPCODE_OP_IMM}};
+ if (instr_i[12] == 1'b1) illegal_instr_o = 1'b1; // reserved for custom extensions
+ end
+
+ 3'b010: begin
+ // c.lwsp -> lw rd, imm(x2)
+ instr_o = {4'b0, instr_i[3:2], instr_i[12], instr_i[6:4], 2'b00, 5'h02,
+ 3'b010, instr_i[11:7], OPCODE_LOAD};
+ if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
+ end
+
+ 3'b011: begin
+ if (CHERIoTEn & cheri_pmode_i) begin
+ // c.clcsp -> clc cd, imm(c2), reused c.ldsp
+ instr_o = {3'b0, instr_i[4:2], instr_i[12], instr_i[6:5], 3'b000, 5'h02,
+ 3'b011, instr_i[11:7], OPCODE_LOAD};
+ if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
+ end else begin
+ instr_o = instr_i;
+ illegal_instr_o = 1'b1;
+ end
+ end
+
+ 3'b100: begin
+ if (instr_i[12] == 1'b0) begin
+ if (instr_i[6:2] != 5'b0) begin
+ // c.mv -> add rd/rs1, x0, rs2
+ // (c.mv hints are translated into an add hint)
+ instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], {OPCODE_OP}};
+ end else begin
+ // c.jr -> jalr x0, rd/rs1, 0
+ instr_o = {12'b0, instr_i[11:7], 3'b0, 5'b0, {OPCODE_JALR}};
+ if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
+ end
+ end else begin
+ if (instr_i[6:2] != 5'b0) begin
+ // c.add -> add rd, rd, rs2
+ // (c.add hints are translated into an add hint)
+ instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], {OPCODE_OP}};
+ end else begin
+ if (instr_i[11:7] == 5'b0) begin
+ // c.ebreak -> ebreak
+ instr_o = {32'h00_10_00_73};
+ end else begin
+ // c.jalr -> jalr x1, rs1, 0
+ instr_o = {12'b0, instr_i[11:7], 3'b000, 5'b00001, {OPCODE_JALR}};
+ end
+ end
+ end
+ end
+
+ 3'b110: begin
+ // c.swsp -> sw rs2, imm(x2)
+ instr_o = {4'b0, instr_i[8:7], instr_i[12], instr_i[6:2], 5'h02, 3'b010,
+ instr_i[11:9], 2'b00, {OPCODE_STORE}};
+ end
+
+ 3'b111: begin
+ if (CHERIoTEn & cheri_pmode_i) begin
+ // c.cscsp -> csc cs2, imm(c2), reuse c.sdsp
+ instr_o = {3'b0, instr_i[9:7], instr_i[12], instr_i[6:2], 5'h02, 3'b011,
+ instr_i[11:10], 3'b000, {OPCODE_STORE}};
+ end else begin
+ instr_o = instr_i;
+ illegal_instr_o = 1'b1;
+ end
+ end
+
+
+ 3'b001,
+ 3'b101: begin
+ illegal_instr_o = 1'b1;
+ end
+
+ default: begin
+ illegal_instr_o = 1'b1;
+ end
+ endcase
+ end
+
+ // Incoming instruction is not compressed.
+ 2'b11:;
+
+ default: begin
+ illegal_instr_o = 1'b1;
+ end
+ endcase
+ end
+
+ assign is_compressed_o = (instr_i[1:0] != 2'b11);
+
+ ////////////////
+ // Assertions //
+ ////////////////
+
+ // The valid_i signal used to gate below assertions must be known.
+ `ASSERT_KNOWN(IbexInstrValidKnown, valid_i)
+
+ // Selectors must be known/valid.
+ `ASSERT(IbexInstrLSBsKnown, valid_i |->
+ !$isunknown(instr_i[1:0]))
+ `ASSERT(IbexC0Known1, (valid_i && (instr_i[1:0] == 2'b00)) |->
+ !$isunknown(instr_i[15:13]))
+ `ASSERT(IbexC1Known1, (valid_i && (instr_i[1:0] == 2'b01)) |->
+ !$isunknown(instr_i[15:13]))
+ `ASSERT(IbexC1Known2, (valid_i && (instr_i[1:0] == 2'b01) && (instr_i[15:13] == 3'b100)) |->
+ !$isunknown(instr_i[11:10]))
+ `ASSERT(IbexC1Known3, (valid_i &&
+ (instr_i[1:0] == 2'b01) && (instr_i[15:13] == 3'b100) && (instr_i[11:10] == 2'b11)) |->
+ !$isunknown({instr_i[12], instr_i[6:5]}))
+ `ASSERT(IbexC2Known1, (valid_i && (instr_i[1:0] == 2'b10)) |->
+ !$isunknown(instr_i[15:13]))
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_controller.sv b/hw/ip/cheriot-ibex/rtl/cheriot_controller.sv
new file mode 100644
index 0000000..6e2109e
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_controller.sv
@@ -0,0 +1,962 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Main controller of the processor
+ */
+
+`include "prim_assert.sv"
+`include "dv_fcov_macros.svh"
+
+module cheriot_controller #(
+ parameter bit CHERIoTEn = 1'b1,
+ parameter bit WritebackStage = 0,
+ parameter bit BranchPredictor = 0
+ ) (
+ input logic clk_i,
+ input logic rst_ni,
+ input logic cheri_pmode_i,
+
+ output logic ctrl_busy_o, // core is busy processing instrs
+
+ // decoder related signals
+ input logic illegal_insn_i, // decoder has an invalid instr
+ input logic ecall_insn_i, // decoder has ECALL instr
+ input logic mret_insn_i, // decoder has MRET instr
+ input logic dret_insn_i, // decoder has DRET instr
+ input logic wfi_insn_i, // decoder has WFI instr
+ input logic ebrk_insn_i, // decoder has EBREAK instr
+ input logic csr_pipe_flush_i, // do CSR-related pipeline flush
+ input logic csr_access_i, // decoder has CSR access instr
+ input logic csr_cheri_always_ok_i, // cheri safe-listed CSR registers
+
+ // instr from IF-ID pipeline stage
+ input logic instr_valid_i, // instr is valid
+ input logic [31:0] instr_i, // uncompressed instr data for mtval
+ input logic [15:0] instr_compressed_i, // instr compressed data for mtval
+ input logic instr_is_compressed_i, // instr is compressed
+ input logic instr_bp_taken_i, // instr was predicted taken branch
+ input logic instr_fetch_err_i, // instr has error
+ input logic instr_fetch_err_plus2_i, // instr error is x32
+ input logic instr_fetch_cheri_acc_vio_i,
+ input logic instr_fetch_cheri_bound_vio_i,
+
+ input logic [31:0] pc_id_i, // instr address
+
+ // to IF-ID pipeline stage
+ output logic instr_valid_clear_o, // kill instr in IF-ID reg
+ output logic id_in_ready_o, // ID stage is ready for new instr
+ output logic controller_run_o, // Controller is in standard instruction
+ // run mode
+
+ // to prefetcher
+ output logic instr_req_o, // start fetching instructions
+ output logic pc_set_o, // jump to address set by pc_mux
+ output cheriot_pkg::pc_sel_e pc_mux_o, // IF stage fetch address selector
+ // (boot, normal, exception...)
+ output logic nt_branch_mispredict_o, // Not-taken branch in ID/EX was
+ // mispredicted (predicted taken)
+ output cheriot_pkg::exc_pc_sel_e exc_pc_mux_o, // IF stage selector for exception PC
+ output cheriot_pkg::exc_cause_e exc_cause_o, // for IF stage, CSRs
+
+ // LSU
+ input logic [31:0] lsu_addr_last_i, // for mtval
+ input logic load_err_i,
+ input logic store_err_i,
+ input logic lsu_err_is_cheri_i,
+ output logic wb_exception_o, // Instruction in WB taking an exception
+ output logic id_exception_o, // Instruction in ID taking an exception
+ output logic id_exception_nc_o, // no-cheri
+
+ // jump/branch signals
+ input logic branch_set_i, // branch set signal (branch definitely
+ // taken)
+ input logic branch_not_set_i, // branch is definitely not taken
+ input logic jump_set_i, // jump taken set signal
+
+ // interrupt signals
+ input logic csr_mstatus_mie_i, // M-mode interrupt enable bit
+ input logic irq_pending_i, // interrupt request pending
+ input cheriot_pkg::irqs_t irqs_i, // interrupt requests qualified with
+ // mie CSR
+ input logic irq_nm_i, // non-maskeable interrupt
+ output logic nmi_mode_o, // core executing NMI handler
+
+ // debug signals
+ input logic debug_req_i,
+ output cheriot_pkg::dbg_cause_e debug_cause_o,
+ output logic debug_csr_save_o,
+ output logic debug_mode_o,
+ input logic debug_single_step_i,
+ input logic debug_ebreakm_i,
+ input logic debug_ebreaku_i,
+ input logic trigger_match_i,
+
+ output logic csr_save_if_o,
+ output logic csr_save_id_o,
+ output logic csr_save_wb_o,
+ output logic csr_restore_mret_id_o,
+ output logic csr_restore_dret_id_o,
+ output logic csr_save_cause_o,
+ output logic csr_mepcc_clrtag_o,
+
+ output logic [31:0] csr_mtval_o,
+ input cheriot_pkg::priv_lvl_e priv_mode_i,
+ input logic csr_mstatus_tw_i,
+ input logic csr_pcc_perm_sr_i,
+
+ // stall & flush signals
+ input logic stall_id_i,
+ input logic stall_wb_i,
+ output logic flush_id_o,
+ input logic ready_wb_i,
+
+ // performance monitors
+ output logic perf_jump_o, // we are executing a jump
+ // instruction (j, jr, jal, jalr)
+ output logic perf_tbranch_o, // we are executing a taken branch
+ // instruction
+ input logic instr_is_cheri_i, // from decoder
+ input logic cheri_ex_valid_i, // from cheri EX
+ input logic cheri_ex_err_i,
+ input logic cheri_wb_err_i,
+ input logic [11:0] cheri_ex_err_info_i,
+ input logic [15:0] cheri_wb_err_info_i,
+ input logic cheri_branch_req_i,
+ input logic [31:0] cheri_branch_target_i
+);
+ import cheriot_pkg::*;
+
+ // FSM state encoding
+ //typedef enum logic [3:0] {
+ // RESET, BOOT_SET, WAIT_SLEEP, SLEEP, FIRST_FETCH, DECODE, FLUSH,
+ // IRQ_TAKEN, DBG_TAKEN_IF, DBG_TAKEN_ID
+ //} ctrl_fsm_e;
+
+ ctrl_fsm_e ctrl_fsm_cs, ctrl_fsm_ns;
+
+ logic nmi_mode_q, nmi_mode_d;
+ logic debug_mode_q, debug_mode_d;
+ logic load_err_q, load_err_d;
+ logic store_err_q, store_err_d;
+ logic lsu_err_is_cheri_q;
+ logic exc_req_q, exc_req_d, exc_req_nc, exc_req_wb;
+ logic illegal_insn_q, illegal_insn_d;
+ logic cheri_ex_err_q, cheri_ex_err_d;
+ logic cheri_wb_err_q;
+ logic cheri_asr_err_q, cheri_asr_err_d;
+
+ // Of the various exception/fault signals, which one takes priority in FLUSH and hence controls
+ // what happens next (setting exc_cause, csr_mtval etc)
+ logic instr_fetch_err_prio;
+ logic illegal_insn_prio;
+ logic ecall_insn_prio;
+ logic ebrk_insn_prio;
+ logic store_err_prio;
+ logic load_err_prio;
+ logic cheri_ex_err_prio;
+ logic cheri_wb_err_prio;
+ logic cheri_asr_err_prio;
+
+ logic stall;
+ logic halt_if;
+ logic retain_id;
+ logic flush_id;
+ logic illegal_dret;
+ logic illegal_umode;
+ logic exc_req_lsu;
+ logic special_req;
+ logic special_req_pc_change;
+ logic special_req_flush_only;
+ logic do_single_step_d;
+ logic do_single_step_q;
+ logic enter_debug_mode_prio_d;
+ logic enter_debug_mode_prio_q;
+ logic enter_debug_mode;
+ logic ebreak_into_debug;
+ logic handle_irq;
+ logic id_wb_pending;
+
+ logic [3:0] mfip_id;
+ logic unused_irq_timer;
+
+ logic ecall_insn;
+ logic mret_insn;
+ logic dret_insn;
+ logic wfi_insn;
+ logic ebrk_insn;
+ logic csr_pipe_flush;
+ logic instr_fetch_err;
+ logic cheri_ex_err;
+ logic mret_cheri_asr_err;
+ logic csr_cheri_asr_err;
+
+`ifndef SYNTHESIS
+`ifndef DII_SIM
+ // synopsys translate_off
+ // make sure we are called later so that we do not generate messages for
+ // glitches
+ always_ff @(negedge clk_i) begin
+ // print warning in case of decoding errors
+ if ((ctrl_fsm_cs == DECODE) && instr_valid_i && !instr_fetch_err_i && !wb_exception_o && illegal_insn_d) begin
+ $display("%t: Illegal instruction (hart %0x) at PC 0x%h: 0x%h", $time, cheriot_core.hart_id_i,
+ cheriot_id_stage.pc_id_i,
+ (instr_is_compressed_i ? instr_compressed_i : instr_i));
+ // cheriot_id_stage.instr_rdata_i);
+ end
+ end
+ // synopsys translate_on
+`endif
+`endif
+
+ ////////////////
+ // Exceptions //
+ ////////////////
+
+ assign load_err_d = load_err_i;
+ assign store_err_d = store_err_i;
+
+ // Decoder doesn't take instr_valid into account, factor it in here.
+ assign ecall_insn = ecall_insn_i & instr_valid_i;
+ assign mret_insn = mret_insn_i & instr_valid_i;
+ assign dret_insn = dret_insn_i & instr_valid_i;
+ assign wfi_insn = wfi_insn_i & instr_valid_i;
+ assign ebrk_insn = ebrk_insn_i & instr_valid_i;
+ assign csr_pipe_flush = csr_pipe_flush_i & instr_valid_i;
+ assign instr_fetch_err = instr_fetch_err_i & instr_valid_i;
+ assign cheri_ex_err = cheri_ex_err_i & instr_is_cheri_i & instr_valid_i;
+
+ // "Executing DRET outside of Debug Mode causes an illegal instruction exception."
+ // [Debug Spec v0.13.2, p.41]
+ assign illegal_dret = dret_insn & ~debug_mode_q;
+
+ // Some instructions can only be executed in M-Mode
+ assign illegal_umode = (priv_mode_i != PRIV_LVL_M) &
+ // MRET must be in M-Mode. TW means trap WFI to M-Mode.
+ (mret_insn | (csr_mstatus_tw_i & wfi_insn));
+
+ assign mret_cheri_asr_err = CHERIoTEn & cheri_pmode_i & ~csr_pcc_perm_sr_i & mret_insn;
+ assign csr_cheri_asr_err = CHERIoTEn & cheri_pmode_i & ~csr_pcc_perm_sr_i & instr_valid_i &
+ csr_access_i & ~illegal_insn_i & ~csr_cheri_always_ok_i;
+
+ // This is recorded in the illegal_insn_q flop to help timing. Specifically
+ // it is needed to break the path from cheriot_cs_registers/illegal_csr_insn_o
+ // to pc_set_o. Clear when controller is in FLUSH so it won't remain set
+ // once illegal instruction is handled.
+ // All terms in this expression are qualified by instr_valid_i
+ assign illegal_insn_d = illegal_insn_i | illegal_dret | illegal_umode;
+ assign cheri_ex_err_d = cheri_pmode_i & cheri_ex_err & (ctrl_fsm_cs != FLUSH);
+
+ assign cheri_asr_err_d = (~illegal_insn_i & csr_cheri_asr_err) | mret_cheri_asr_err;
+
+ // exception requests
+ // requests are flopped in exc_req_q. This is cleared when controller is in
+ // the FLUSH state so the cycle following exc_req_q won't remain set for an
+ // exception request that has just been handled.
+ // All terms in this expression are qualified by instr_valid_i
+ assign exc_req_d = (ecall_insn | ebrk_insn | illegal_insn_d | instr_fetch_err | (cheri_pmode_i & cheri_ex_err) |
+ cheri_asr_err_d) & (ctrl_fsm_cs != FLUSH);
+ assign exc_req_nc = (ecall_insn | ebrk_insn | illegal_insn_d | instr_fetch_err | cheri_asr_err_d) &
+ (ctrl_fsm_cs != FLUSH);
+
+ // LSU exception requests
+ assign exc_req_lsu = store_err_i | load_err_i;
+ assign exc_req_wb = exc_req_lsu | (cheri_pmode_i & cheri_wb_err_i);
+
+ assign id_exception_o = exc_req_d;
+ assign id_exception_nc_o = exc_req_nc;
+
+ // special requests: special instructions, pipeline flushes, exceptions...
+ // All terms in these expressions are qualified by instr_valid_i except exc_req_lsu which can come
+ // from the Writeback stage with no instr_valid_i from the ID stage
+
+ // These special requests only cause a pipeline flush and in particular don't cause a PC change
+ // that is outside the normal execution flow
+ assign special_req_flush_only = wfi_insn | csr_pipe_flush;
+
+ // These special requests cause a change in PC
+ assign special_req_pc_change = mret_insn | dret_insn | exc_req_d | exc_req_wb;
+
+ // generic special request signal, applies to all instructions
+ assign special_req = special_req_pc_change | special_req_flush_only;
+
+ // Is there an instruction in ID or WB that has yet to complete?
+ assign id_wb_pending = instr_valid_i | ~ready_wb_i;
+
+ // Exception/fault prioritisation is taken from Table 3.7 of Priviledged Spec v1.11
+ if (WritebackStage) begin : g_wb_exceptions
+ always_comb begin
+ instr_fetch_err_prio = 0;
+ illegal_insn_prio = 0;
+ ecall_insn_prio = 0;
+ ebrk_insn_prio = 0;
+ store_err_prio = 0;
+ load_err_prio = 0;
+ cheri_ex_err_prio = 0;
+ cheri_wb_err_prio = 0;
+ cheri_asr_err_prio = 0;
+
+ // Note that with the writeback stage store/load errors occur on the instruction in writeback,
+ // all other exception/faults occur on the instruction in ID/EX. The faults from writeback
+ // must take priority as that instruction is architecurally ordered before the one in ID/EX.
+ if (store_err_q) begin
+ store_err_prio = 1'b1;
+ end else if (load_err_q) begin
+ load_err_prio = 1'b1;
+ end else if (cheri_pmode_i & cheri_wb_err_q) begin
+ cheri_wb_err_prio = 1'b1;
+ end else if (instr_fetch_err) begin
+ instr_fetch_err_prio = 1'b1;
+ end else if (illegal_insn_q) begin
+ illegal_insn_prio = 1'b1;
+ end else if (ecall_insn) begin
+ ecall_insn_prio = 1'b1;
+ end else if (ebrk_insn) begin
+ ebrk_insn_prio = 1'b1;
+ end else if (cheri_pmode_i & cheri_ex_err_q) begin
+ cheri_ex_err_prio = 1'b1;
+ end else if (cheri_asr_err_q) begin
+ cheri_asr_err_prio = 1'b1;
+ end
+ end
+
+ // Instruction in writeback is generating an exception so instruction in ID must not execute
+ assign wb_exception_o = load_err_q | store_err_q | load_err_i | store_err_i | (cheri_pmode_i & cheri_wb_err_i);
+ end else begin : g_no_wb_exceptions
+ always_comb begin
+ instr_fetch_err_prio = 0;
+ illegal_insn_prio = 0;
+ ecall_insn_prio = 0;
+ ebrk_insn_prio = 0;
+ store_err_prio = 0;
+ load_err_prio = 0;
+ cheri_wb_err_prio = 0;
+ cheri_ex_err_prio = 0;
+ cheri_asr_err_prio = 0;
+
+ if (instr_fetch_err) begin
+ instr_fetch_err_prio = 1'b1;
+ end else if (illegal_insn_q) begin
+ illegal_insn_prio = 1'b1;
+ end else if (ecall_insn) begin
+ ecall_insn_prio = 1'b1;
+ end else if (ebrk_insn) begin
+ ebrk_insn_prio = 1'b1;
+ end else if (cheri_ex_err_q) begin
+ cheri_ex_err_prio = 1'b1;
+ end else if (store_err_q) begin
+ store_err_prio = 1'b1;
+ end else if (load_err_q) begin
+ load_err_prio = 1'b1;
+ end else if (cheri_wb_err_q) begin
+ cheri_wb_err_prio = 1'b1;
+ end else if (cheri_asr_err_q) begin
+ cheri_asr_err_prio = 1'b1;
+ end
+ end
+ assign wb_exception_o = 1'b0;
+ end
+
+ `ASSERT_IF(IbexExceptionPrioOnehot,
+ $onehot({instr_fetch_err_prio,
+ illegal_insn_prio,
+ ecall_insn_prio,
+ ebrk_insn_prio,
+ store_err_prio,
+ load_err_prio,
+ cheri_wb_err_prio,
+ cheri_ex_err_prio,
+ cheri_asr_err_prio}),
+ (ctrl_fsm_cs == FLUSH) & csr_save_cause_o)
+
+ ////////////////
+ // Interrupts //
+ ////////////////
+
+ // Enter debug mode due to an external debug_req_i or because the core is in
+ // single step mode (dcsr.step == 1). Single step must be qualified with
+ // instruction valid otherwise the core will immediately enter debug mode
+ // due to a recently flushed IF (or a delay in an instruction returning from
+ // memory) before it has had anything to single step.
+ // Also enter debug mode on a trigger match (hardware breakpoint)
+
+ // Set `do_single_step_q` when a valid instruction is seen outside of debug mode and core is in
+ // single step mode. The first valid instruction on debug mode entry will clear it. Hold its value
+ // when there is no valid instruction so `do_single_step_d` remains asserted until debug mode is
+ // entered.
+ assign do_single_step_d = instr_valid_i ? ~debug_mode_q & debug_single_step_i : do_single_step_q;
+ // Enter debug mode due to:
+ // * external `debug_req_i`
+ // * core in single step mode (dcsr.step == 1).
+ // * trigger match (hardware breakpoint)
+ //
+ // `debug_req_i` and `do_single_step_d` request debug mode with priority. This results in a debug
+ // mode entry even if the controller goes to `FLUSH` in preparation for handling an exception or
+ // interrupt. `trigger_match_i` is not a priority entry into debug mode as it must be ignored
+ // where control flow changes such that the instruction causing the trigger is no longer being
+ // executed.
+ assign enter_debug_mode_prio_d = (debug_req_i | do_single_step_d) & ~debug_mode_q;
+ assign enter_debug_mode = enter_debug_mode_prio_d | (trigger_match_i & ~debug_mode_q);
+
+ // Set when an ebreak should enter debug mode rather than jump to exception
+ // handler
+ assign ebreak_into_debug = priv_mode_i == PRIV_LVL_M ? debug_ebreakm_i :
+ priv_mode_i == PRIV_LVL_U ? debug_ebreaku_i :
+ 1'b0;
+
+ // Interrupts including NMI are ignored,
+ // - while in debug mode [Debug Spec v0.13.2, p.39],
+ // - while in NMI mode (nested NMIs are not supported, NMI has highest priority and
+ // cannot be interrupted by regular interrupts).
+ assign handle_irq = ~debug_mode_q & ~nmi_mode_q &
+ (irq_nm_i | (irq_pending_i & csr_mstatus_mie_i));
+
+ // generate ID of fast interrupts, highest priority to lowest ID
+ always_comb begin : gen_mfip_id
+ mfip_id = 4'd0;
+
+ for (int i = 14; i >= 0; i--) begin
+ if (irqs_i.irq_fast[i]) begin
+ mfip_id = i[3:0];
+ end
+ end
+ end
+
+ assign unused_irq_timer = irqs_i.irq_timer;
+
+ /////////////////////
+ // Core controller //
+ /////////////////////
+
+ always_comb begin
+ // Default values
+ instr_req_o = 1'b1;
+
+ csr_save_if_o = 1'b0;
+ csr_save_id_o = 1'b0;
+ csr_save_wb_o = 1'b0;
+ csr_restore_mret_id_o = 1'b0;
+ csr_restore_dret_id_o = 1'b0;
+ csr_save_cause_o = 1'b0;
+ csr_mepcc_clrtag_o = 1'b0;
+ csr_mtval_o = '0;
+
+ // The values of pc_mux and exc_pc_mux are only relevant if pc_set is set. Some of the states
+ // below always set pc_mux and exc_pc_mux but only set pc_set if certain conditions are met.
+ // This avoid having to factor those conditions into the pc_mux and exc_pc_mux select signals
+ // helping timing.
+ pc_mux_o = PC_BOOT;
+ pc_set_o = 1'b0;
+ nt_branch_mispredict_o = 1'b0;
+
+ exc_pc_mux_o = EXC_PC_IRQ;
+ exc_cause_o = EXC_CAUSE_INSN_ADDR_MISA; // = 6'h00
+
+ ctrl_fsm_ns = ctrl_fsm_cs;
+
+ ctrl_busy_o = 1'b1;
+
+ halt_if = 1'b0;
+ retain_id = 1'b0;
+ flush_id = 1'b0;
+
+ debug_csr_save_o = 1'b0;
+ debug_cause_o = DBG_CAUSE_EBREAK;
+ debug_mode_d = debug_mode_q;
+ nmi_mode_d = nmi_mode_q;
+
+ perf_tbranch_o = 1'b0;
+ perf_jump_o = 1'b0;
+
+ controller_run_o = 1'b0;
+
+ unique case (ctrl_fsm_cs)
+ RESET: begin
+ instr_req_o = 1'b0;
+ pc_mux_o = PC_BOOT;
+ pc_set_o = 1'b1;
+ ctrl_fsm_ns = BOOT_SET;
+ end
+
+ BOOT_SET: begin
+ // copy boot address to instr fetch address
+ instr_req_o = 1'b1;
+ pc_mux_o = PC_BOOT;
+ pc_set_o = 1'b1;
+
+ ctrl_fsm_ns = FIRST_FETCH;
+ end
+
+ WAIT_SLEEP: begin
+ ctrl_busy_o = 1'b0;
+ instr_req_o = 1'b0;
+ halt_if = 1'b1;
+ flush_id = 1'b1;
+ ctrl_fsm_ns = SLEEP;
+ end
+
+ SLEEP: begin
+ // instruction in IF stage is already valid
+ // we begin execution when an interrupt has arrived
+ instr_req_o = 1'b0;
+ halt_if = 1'b1;
+ flush_id = 1'b1;
+
+ // normal execution flow
+ // in debug mode or single step mode we leave immediately (wfi=nop)
+ if (irq_nm_i || irq_pending_i || debug_req_i || debug_mode_q || debug_single_step_i) begin
+ ctrl_fsm_ns = FIRST_FETCH;
+ end else begin
+ // Make sure clock remains disabled.
+ ctrl_busy_o = 1'b0;
+ end
+ end
+
+ FIRST_FETCH: begin
+ // Stall because of IF miss
+ if (id_in_ready_o) begin
+ ctrl_fsm_ns = DECODE;
+ end
+
+ // handle interrupts
+ if (handle_irq) begin
+ // We are handling an interrupt. Set halt_if to tell IF not to give
+ // us any more instructions before it redirects to the handler, but
+ // don't set flush_id: we must allow this instruction to complete
+ // (since it might have outstanding loads or stores).
+ ctrl_fsm_ns = IRQ_TAKEN;
+ halt_if = 1'b1;
+ end
+
+ // enter debug mode
+ if (enter_debug_mode) begin
+ ctrl_fsm_ns = DBG_TAKEN_IF;
+ // Halt IF only for now, ID will be flushed in DBG_TAKEN_IF as the
+ // ID state is needed for correct debug mode entry
+ halt_if = 1'b1;
+ end
+ end
+
+ DECODE: begin
+ // normal operating mode of the ID stage, in case of debug and interrupt requests,
+ // priorities are as follows (lower number == higher priority)
+ // 1. currently running (multicycle) instructions and exceptions caused by these
+ // 2. debug requests
+ // 3. interrupt requests
+
+ controller_run_o = 1'b1;
+
+ // Set PC mux for branch and jump here to ease timing. Value is only relevant if pc_set_o is
+ // also set. Setting the mux value here avoids factoring in special_req and instr_valid_i
+ // which helps timing.
+ pc_mux_o = PC_JUMP;
+
+
+ // Get ready for special instructions, exceptions, pipeline flushes
+ if (special_req) begin
+ // Halt IF but don't flush ID. This leaves a valid instruction in
+ // ID so controller can determine appropriate action in the
+ // FLUSH state.
+ retain_id = 1'b1;
+
+ // Wait for the writeback stage to either be ready for a new instruction or raise its own
+ // exception before going to FLUSH. If the instruction in writeback raises an exception it
+ // must take priority over any exception from an instruction in ID/EX. Only once the
+ // writeback stage is ready can we be certain that won't happen. Without a writeback
+ // stage ready_wb_i == 1 so the FSM will always go directly to FLUSH.
+
+ if (ready_wb_i | wb_exception_o) begin
+ ctrl_fsm_ns = FLUSH;
+ end
+ end
+
+ if (branch_set_i || jump_set_i || (cheri_pmode_i & cheri_branch_req_i)) begin
+ // Only set the PC if the branch predictor hasn't already done the branch for us
+ pc_set_o = BranchPredictor ? ~instr_bp_taken_i : 1'b1;
+
+ perf_tbranch_o = branch_set_i;
+ perf_jump_o = jump_set_i;
+ end
+
+ if (BranchPredictor) begin
+ if (instr_bp_taken_i & branch_not_set_i) begin
+ // If the instruction is a branch that was predicted to be taken but was not taken
+ // signal a mispredict.
+ nt_branch_mispredict_o = 1'b1;
+ end
+ end
+
+ // If entering debug mode or handling an IRQ the core needs to wait until any instruction in
+ // ID or WB has finished executing. Stall IF during that time.
+ if ((enter_debug_mode || handle_irq) && (stall || id_wb_pending)) begin
+ halt_if = 1'b1;
+ end
+
+ if (!stall && !special_req && !id_wb_pending) begin
+ if (enter_debug_mode) begin
+ // enter debug mode
+ ctrl_fsm_ns = DBG_TAKEN_IF;
+ // Halt IF only for now, ID will be flushed in DBG_TAKEN_IF as the
+ // ID state is needed for correct debug mode entry
+ halt_if = 1'b1;
+ end else if (handle_irq) begin
+ // handle interrupt (not in debug mode)
+ ctrl_fsm_ns = IRQ_TAKEN;
+ // We are handling an interrupt (not in debug mode). Set halt_if to
+ // tell IF not to give us any more instructions before it redirects
+ // to the handler, but don't set flush_id: we must allow this
+ // instruction to complete (since it might have outstanding loads
+ // or stores).
+ halt_if = 1'b1;
+ end
+ end
+
+ end // DECODE
+
+ IRQ_TAKEN: begin
+ pc_mux_o = PC_EXC;
+ exc_pc_mux_o = EXC_PC_IRQ;
+
+ if (handle_irq) begin
+ pc_set_o = 1'b1;
+
+ csr_save_if_o = 1'b1;
+ csr_save_cause_o = 1'b1;
+
+ // interrupt priorities according to Privileged Spec v1.11 p.31
+ if (irq_nm_i && !nmi_mode_q) begin
+ exc_cause_o = EXC_CAUSE_IRQ_NM;
+ nmi_mode_d = 1'b1; // enter NMI mode
+ end else if (irqs_i.irq_fast != 15'b0) begin
+ // generate exception cause ID from fast interrupt ID:
+ // - first bit distinguishes interrupts from exceptions,
+ // - second bit adds 16 to fast interrupt ID
+ // for example EXC_CAUSE_IRQ_FAST_0 = {1'b1, 5'd16}
+ exc_cause_o = exc_cause_e'({2'b11, mfip_id});
+ end else if (irqs_i.irq_external) begin
+ exc_cause_o = EXC_CAUSE_IRQ_EXTERNAL_M;
+ end else if (irqs_i.irq_software) begin
+ exc_cause_o = EXC_CAUSE_IRQ_SOFTWARE_M;
+ end else begin // irqs_i.irq_timer
+ exc_cause_o = EXC_CAUSE_IRQ_TIMER_M;
+ end
+ end
+
+ ctrl_fsm_ns = DECODE;
+ end
+
+ DBG_TAKEN_IF: begin
+ pc_mux_o = PC_EXC;
+ exc_pc_mux_o = EXC_PC_DBD;
+
+ // enter debug mode and save PC in IF to dpc
+ // jump to debug exception handler in debug memory
+ flush_id = 1'b1;
+ pc_set_o = 1'b1;
+
+ csr_save_if_o = 1'b1;
+ debug_csr_save_o = 1'b1;
+
+ csr_save_cause_o = 1'b1;
+ if (trigger_match_i) begin
+ debug_cause_o = DBG_CAUSE_TRIGGER;
+ end else if (debug_single_step_i) begin
+ debug_cause_o = DBG_CAUSE_STEP;
+ end else begin
+ debug_cause_o = DBG_CAUSE_HALTREQ;
+ end
+
+ // enter debug mode
+ debug_mode_d = 1'b1;
+
+ ctrl_fsm_ns = DECODE;
+ end
+
+ DBG_TAKEN_ID: begin
+ // enter debug mode and save PC in ID to dpc, used when encountering
+ // 1. EBREAK during debug mode
+ // 2. EBREAK with forced entry into debug mode (ebreakm or ebreaku set).
+ // regular ebreak's go through FLUSH.
+ //
+ // for 1. do not update dcsr and dpc, for 2. do so [Debug Spec v0.13.2, p.39]
+ // jump to debug exception handler in debug memory
+ flush_id = 1'b1;
+ pc_mux_o = PC_EXC;
+ pc_set_o = 1'b1;
+ exc_pc_mux_o = EXC_PC_DBD;
+
+ // update dcsr and dpc
+ if (ebreak_into_debug && !debug_mode_q) begin // ebreak with forced entry
+
+ // dpc (set to the address of the EBREAK, i.e. set to PC in ID stage)
+ csr_save_cause_o = 1'b1;
+ csr_save_id_o = 1'b1;
+
+ // dcsr
+ debug_csr_save_o = 1'b1;
+ debug_cause_o = DBG_CAUSE_EBREAK;
+ end
+
+ // enter debug mode
+ debug_mode_d = 1'b1;
+
+ ctrl_fsm_ns = DECODE;
+ end
+
+ FLUSH: begin
+ // flush the pipeline
+ halt_if = 1'b1;
+ flush_id = 1'b1;
+ ctrl_fsm_ns = DECODE;
+
+ // As pc_mux and exc_pc_mux can take various values in this state they aren't set early
+ // here.
+
+ // exceptions: set exception PC, save PC and exception cause
+ // exc_req_lsu is high for one clock cycle only (in DECODE)
+ if (exc_req_q || store_err_q || load_err_q || (cheri_pmode_i & cheri_wb_err_q)) begin
+ pc_set_o = 1'b1;
+ pc_mux_o = PC_EXC;
+ exc_pc_mux_o = debug_mode_q ? EXC_PC_DBG_EXC : EXC_PC_EXC;
+
+ if (WritebackStage) begin : g_writeback_mepc_save
+ // With the writeback stage present whether an instruction accessing memory will cause
+ // an exception is only known when it is in writeback. So when taking such an exception
+ // epc must come from writeback.
+ csr_save_id_o = ~(store_err_q | load_err_q | (cheri_pmode_i & cheri_wb_err_q));
+ csr_save_wb_o = store_err_q | load_err_q | (cheri_pmode_i & cheri_wb_err_q);
+ end else begin : g_no_writeback_mepc_save
+ csr_save_id_o = 1'b0;
+ end
+
+ csr_save_cause_o = 1'b1;
+
+ // Exception/fault prioritisation logic will have set exactly 1 X_prio signal
+ unique case (1'b1)
+ instr_fetch_err_prio: begin
+ if (instr_fetch_cheri_acc_vio_i) begin // tag violation
+ exc_cause_o = EXC_CAUSE_CHERI_FAULT;
+ csr_mtval_o = {21'h0, 1'b1, 5'h0, 5'h2}; // s=1, cap_idx=0
+ end else if (instr_fetch_cheri_bound_vio_i) begin // bound violation
+ exc_cause_o = EXC_CAUSE_CHERI_FAULT;
+ csr_mtval_o = {21'h0, 1'b1, 5'h0, 5'h1}; // s=1, cap_idx=0
+ csr_mepcc_clrtag_o = 1'b1;
+ end else begin // ext memory error
+ exc_cause_o = EXC_CAUSE_INSTR_ACCESS_FAULT;
+ csr_mtval_o = instr_fetch_err_plus2_i ? (pc_id_i + 32'd2) : pc_id_i;
+ end
+ end
+ illegal_insn_prio: begin
+ exc_cause_o = EXC_CAUSE_ILLEGAL_INSN;
+ csr_mtval_o = (CHERIoTEn & cheri_pmode_i) ? 32'h0 :
+ (instr_is_compressed_i ? {16'b0, instr_compressed_i} : instr_i);
+ end
+ ecall_insn_prio: begin
+ exc_cause_o = (priv_mode_i == PRIV_LVL_M) ? EXC_CAUSE_ECALL_MMODE :
+ EXC_CAUSE_ECALL_UMODE;
+ end
+ ebrk_insn_prio: begin
+ if (debug_mode_q | ebreak_into_debug) begin
+ /*
+ * EBREAK in debug mode re-enters debug mode
+ *
+ * "The only exception is EBREAK. When that is executed in Debug
+ * Mode, it halts the hart again but without updating dpc or
+ * dcsr." [Debug Spec v0.13.2, p.39]
+ */
+
+ /*
+ * dcsr.ebreakm == 1:
+ * "EBREAK instructions in M-mode enter Debug Mode."
+ * [Debug Spec v0.13.2, p.42]
+ */
+ pc_set_o = 1'b0;
+ csr_save_id_o = 1'b0;
+ csr_save_cause_o = 1'b0;
+ ctrl_fsm_ns = DBG_TAKEN_ID;
+ flush_id = 1'b0;
+ end else begin
+ /*
+ * "The EBREAK instruction is used by debuggers to cause control
+ * to be transferred back to a debugging environment. It
+ * generates a breakpoint exception and performs no other
+ * operation. [...] ECALL and EBREAK cause the receiving
+ * privilege mode's epc register to be set to the address of the
+ * ECALL or EBREAK instruction itself, not the address of the
+ * following instruction." [Privileged Spec v1.11, p.40]
+ */
+ exc_cause_o = EXC_CAUSE_BREAKPOINT;
+ if (CHERIoTEn && cheri_pmode_i) csr_mtval_o = pc_id_i; // kliu added to match sail
+ end
+ end
+ store_err_prio: begin
+ if (cheri_pmode_i & lsu_err_is_cheri_q) begin
+ if (cheri_wb_err_info_i[11]) begin
+ exc_cause_o = EXC_CAUSE_STORE_ADDR_MISALIGN;
+ csr_mtval_o = lsu_addr_last_i;
+ end else begin
+ exc_cause_o = EXC_CAUSE_CHERI_FAULT;
+ csr_mtval_o = {21'h0, cheri_wb_err_info_i[10:0]};
+ end
+ end else begin
+ exc_cause_o = EXC_CAUSE_STORE_ACCESS_FAULT;
+ csr_mtval_o = lsu_addr_last_i;
+ end
+ end
+ load_err_prio: begin
+ if (cheri_pmode_i & lsu_err_is_cheri_q) begin
+ if (cheri_wb_err_info_i[11]) begin
+ exc_cause_o = EXC_CAUSE_LOAD_ADDR_MISALIGN;
+ csr_mtval_o = lsu_addr_last_i;
+ end else begin
+ exc_cause_o = EXC_CAUSE_CHERI_FAULT;
+ csr_mtval_o = {21'h0, cheri_wb_err_info_i[10:0]};
+ end
+ end else begin
+ exc_cause_o = EXC_CAUSE_LOAD_ACCESS_FAULT;
+ csr_mtval_o = lsu_addr_last_i;
+ end
+ end
+ cheri_ex_err_prio: begin
+ if (cheri_pmode_i) begin
+ exc_cause_o = EXC_CAUSE_CHERI_FAULT;
+ csr_mtval_o = {21'h0, cheri_ex_err_info_i[10:0]};
+ end
+ end
+ cheri_wb_err_prio: begin
+ if (cheri_pmode_i) begin
+ if (cheri_wb_err_info_i[12]) begin // illegal SCR addr
+ exc_cause_o = EXC_CAUSE_ILLEGAL_INSN;
+ csr_mtval_o = {21'h0, cheri_wb_err_info_i[10:0]};
+ end else begin
+ exc_cause_o = EXC_CAUSE_CHERI_FAULT;
+ csr_mtval_o = {21'h0, cheri_wb_err_info_i[10:0]};
+ end
+ end
+ end
+ cheri_asr_err_prio: begin
+ exc_cause_o = EXC_CAUSE_CHERI_FAULT;
+ //csr_mtval_o = instr_is_compressed_i ? {16'b0, instr_compressed_i} : instr_i;
+ csr_mtval_o = {21'b0, 1'b1, 5'h0, 5'h18}; // S=1, cap_idx=0 (pcc), err=0x18
+ end
+
+ default: ;
+ endcase
+ end else begin
+ // special instructions and pipeline flushes
+ if (mret_insn) begin
+ pc_mux_o = PC_ERET;
+ pc_set_o = 1'b1;
+ csr_restore_mret_id_o = 1'b1;
+ if (nmi_mode_q) begin
+ nmi_mode_d = 1'b0; // exit NMI mode
+ end
+ end else if (dret_insn) begin
+ pc_mux_o = PC_DRET;
+ pc_set_o = 1'b1;
+ debug_mode_d = 1'b0;
+ csr_restore_dret_id_o = 1'b1;
+ end else if (wfi_insn) begin
+ ctrl_fsm_ns = WAIT_SLEEP;
+ end else if (csr_pipe_flush && handle_irq) begin
+ // start handling IRQs when doing CSR-related pipeline flushes
+ ctrl_fsm_ns = IRQ_TAKEN;
+ end
+ end // exc_req_q
+
+ // Entering debug mode due to either single step or debug_req. Ensure
+ // registers are set for exception but then enter debug handler rather
+ // than exception handler [Debug Spec v0.13.2, p.44]
+ // Leave all other signals as is to ensure CSRs and PC get set as if
+ // core was entering exception handler, entry to debug mode will then
+ // see the appropriate state and setup dpc correctly.
+ // If an EBREAK instruction is causing us to enter debug mode on the
+ // same cycle as a debug_req or single step, honor the EBREAK and
+ // proceed to DBG_TAKEN_ID.
+ if (enter_debug_mode_prio_q && !(ebrk_insn_prio && ebreak_into_debug)) begin
+ ctrl_fsm_ns = DBG_TAKEN_IF;
+ end
+ end // FLUSH
+
+ default: begin
+ instr_req_o = 1'b0;
+ ctrl_fsm_ns = RESET;
+ end
+ endcase
+ end
+
+ assign flush_id_o = flush_id;
+
+ // signal to CSR when in debug mode
+ assign debug_mode_o = debug_mode_q;
+
+ // signal to CSR when in an NMI handler (for nested exception handling)
+ assign nmi_mode_o = nmi_mode_q;
+
+ ///////////////////
+ // Stall control //
+ ///////////////////
+
+ // If high current instruction cannot complete this cycle. Either because it needs more cycles to
+ // finish (stall_id_i) or because the writeback stage cannot accept it yet (stall_wb_i). If there
+ // is no writeback stage stall_wb_i is a constant 0.
+ assign stall = stall_id_i | stall_wb_i;
+
+ // signal to IF stage that ID stage is ready for next instr
+ assign id_in_ready_o = ~stall & ~halt_if & ~retain_id;
+
+ // kill instr in IF-ID pipeline reg that are done, or if a
+ // multicycle instr causes an exception for example
+ // retain_id is another kind of stall, where the instr_valid bit must remain
+ // set (unless flush_id is set also). It cannot be factored directly into
+ // stall as this causes a combinational loop.
+ assign instr_valid_clear_o = ~(stall | retain_id) | flush_id;
+
+ // update registers
+ always_ff @(posedge clk_i or negedge rst_ni) begin : update_regs
+ if (!rst_ni) begin
+ ctrl_fsm_cs <= RESET;
+ nmi_mode_q <= 1'b0;
+ do_single_step_q <= 1'b0;
+ debug_mode_q <= 1'b0;
+ enter_debug_mode_prio_q <= 1'b0;
+ load_err_q <= 1'b0;
+ store_err_q <= 1'b0;
+ lsu_err_is_cheri_q <= 1'b0;
+ exc_req_q <= 1'b0;
+ illegal_insn_q <= 1'b0;
+ cheri_ex_err_q <= 1'b0;
+ cheri_wb_err_q <= 1'b0;
+ cheri_asr_err_q <= 1'b0;
+ end else begin
+ ctrl_fsm_cs <= ctrl_fsm_ns;
+ nmi_mode_q <= nmi_mode_d;
+ do_single_step_q <= do_single_step_d;
+ debug_mode_q <= debug_mode_d;
+ enter_debug_mode_prio_q <= enter_debug_mode_prio_d;
+ load_err_q <= load_err_d;
+ store_err_q <= store_err_d;
+ lsu_err_is_cheri_q <= lsu_err_is_cheri_i;
+ exc_req_q <= exc_req_d;
+ illegal_insn_q <= illegal_insn_d;
+ cheri_ex_err_q <= cheri_ex_err_d;
+ cheri_wb_err_q <= cheri_wb_err_i;
+ cheri_asr_err_q <= cheri_asr_err_d;
+ end
+ end
+
+ `ifdef RVFI
+ // Workaround for internal verilator error when using hierarchical refers to calcuate this
+ // directly in cheriot_core
+ logic rvfi_flush_next;
+
+ assign rvfi_flush_next = ctrl_fsm_ns == FLUSH;
+ `endif
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_core.sv b/hw/ip/cheriot-ibex/rtl/cheriot_core.sv
new file mode 100644
index 0000000..7069082
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_core.sv
@@ -0,0 +1,2255 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+`ifdef RISCV_FORMAL
+ `define RVFI
+`endif
+
+`include "prim_assert.sv"
+
+/**
+ * Top level module of the ibex RISC-V core
+ */
+
+//import cheri_pkg::*;
+
+module cheriot_core import cheriot_pkg::*; import cheri_pkg::*; #(
+ parameter bit PMPEnable = 1'b0,
+ parameter int unsigned PMPGranularity = 0,
+ parameter int unsigned PMPNumRegions = 4,
+ parameter int unsigned MHPMCounterNum = 0,
+ parameter int unsigned MHPMCounterWidth = 40,
+ parameter bit RV32E = 1'b0,
+ parameter rv32m_e RV32M = RV32MFast,
+ parameter rv32b_e RV32B = RV32BNone,
+ parameter bit BranchTargetALU = 1'b0,
+ parameter bit WritebackStage = 1'b0,
+ parameter bit ICache = 1'b0,
+ parameter bit ICacheECC = 1'b0,
+ parameter int unsigned BusSizeECC = BUS_SIZE,
+ parameter int unsigned TagSizeECC = IC_TAG_SIZE,
+ parameter int unsigned LineSizeECC = IC_LINE_SIZE,
+ parameter bit BranchPredictor = 1'b0,
+ parameter bit DbgTriggerEn = 1'b0,
+ parameter int unsigned DbgHwBreakNum = 1,
+ parameter bit ResetAll = 1'b0,
+ parameter lfsr_seed_t RndCnstLfsrSeed = RndCnstLfsrSeedDefault,
+ parameter lfsr_perm_t RndCnstLfsrPerm = RndCnstLfsrPermDefault,
+ parameter bit SecureIbex = 1'b0,
+ parameter bit DummyInstructions = 1'b0,
+ parameter bit RegFileECC = 1'b0,
+ parameter int unsigned RegFileDataWidth = 32,
+ parameter int unsigned DmHaltAddr = 32'h1A110800,
+ parameter int unsigned DmExceptionAddr = 32'h1A110808,
+ // CHERIoT paramters
+ parameter bit CHERIoTEn = 1'b1,
+ parameter int unsigned DataWidth = 33,
+ parameter int unsigned HeapBase = 32'h2001_0000,
+ parameter int unsigned TSMapBase = 32'h2002_f000,
+ parameter int unsigned TSMapSize = 1024,
+ parameter bit MemCapFmt = 1'b0,
+ parameter bit CheriPPLBC = 1'b1,
+ parameter bit CheriSBND2 = 1'b0,
+ parameter bit CheriTBRE = 1'b1,
+ parameter bit CheriStkZ = 1'b1,
+ parameter int unsigned MMRegDinW = 128,
+ parameter int unsigned MMRegDoutW = 64,
+ parameter bit CheriCapIT8 = 1'b0
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic [31:0] hart_id_i,
+ input logic [31:0] boot_addr_i,
+ input logic cheri_pmode_i,
+ input logic cheri_tsafe_en_i,
+
+ // Instruction memory interface
+ output logic instr_req_o,
+ input logic instr_gnt_i,
+ input logic instr_rvalid_i,
+ output logic [31:0] instr_addr_o,
+ input logic [31:0] instr_rdata_i,
+ input logic instr_err_i,
+
+ // Data memory interface
+ output logic data_req_o,
+ output logic data_is_cap_o,
+ input logic data_gnt_i,
+ input logic data_rvalid_i,
+ output logic data_we_o,
+ output logic [3:0] data_be_o,
+ output logic [31:0] data_addr_o,
+ output logic [DataWidth-1:0] data_wdata_o,
+ input logic [DataWidth-1:0] data_rdata_i,
+ input logic data_err_i,
+
+ // Register file interface
+ output logic dummy_instr_id_o,
+ output logic [4:0] rf_raddr_a_o,
+ output logic [4:0] rf_raddr_b_o,
+ output logic [4:0] rf_waddr_wb_o,
+ output logic rf_we_wb_o,
+ output logic [RegFileDataWidth-1:0] rf_wdata_wb_ecc_o,
+ output reg_cap_t rf_wcap_wb_o,
+ input logic [RegFileDataWidth-1:0] rf_rdata_a_ecc_i,
+ input logic [RegFileDataWidth-1:0] rf_rdata_b_ecc_i,
+ input reg_cap_t rf_rcap_a_i,
+ input reg_cap_t rf_rcap_b_i,
+ input logic [31:0] rf_reg_rdy_i,
+
+ output logic rf_trsv_en_o,
+ output logic [4:0] rf_trsv_addr_o,
+ output logic [6:0] rf_trsv_par_o,
+ output logic [4:0] rf_trvk_addr_o,
+ output logic rf_trvk_en_o,
+ output logic rf_trvk_clrtag_o,
+ output logic [6:0] rf_trvk_par_o,
+ output logic tsmap_cs_o,
+ output logic [15:0] tsmap_addr_o,
+ input logic [31:0] tsmap_rdata_i,
+ input logic [MMRegDinW-1:0] mmreg_corein_i,
+ output logic [MMRegDoutW-1:0] mmreg_coreout_o,
+ output logic cheri_fatal_err_o,
+
+ // RAMs interface
+ output logic [IC_NUM_WAYS-1:0] ic_tag_req_o,
+ output logic ic_tag_write_o,
+ output logic [IC_INDEX_W-1:0] ic_tag_addr_o,
+ output logic [TagSizeECC-1:0] ic_tag_wdata_o,
+ input logic [TagSizeECC-1:0] ic_tag_rdata_i [IC_NUM_WAYS],
+ output logic [IC_NUM_WAYS-1:0] ic_data_req_o,
+ output logic ic_data_write_o,
+ output logic [IC_INDEX_W-1:0] ic_data_addr_o,
+ output logic [LineSizeECC-1:0] ic_data_wdata_o,
+ input logic [LineSizeECC-1:0] ic_data_rdata_i [IC_NUM_WAYS],
+ input logic ic_scr_key_valid_i,
+
+ // Interrupt inputs
+ input logic irq_software_i,
+ input logic irq_timer_i,
+ input logic irq_external_i,
+ input logic [14:0] irq_fast_i,
+ input logic irq_nm_i, // non-maskeable interrupt
+ output logic irq_pending_o,
+
+ // Debug Interface
+ input logic debug_req_i,
+ output crash_dump_t crash_dump_o,
+ output logic double_fault_seen_o,
+
+ // RISC-V Formal Interface
+ // Does not comply with the coding standards of _i/_o suffixes, but follows
+ // the convention of RISC-V Formal Interface Specification.
+`ifdef RVFI
+ output logic rvfi_valid,
+ output logic [63:0] rvfi_order,
+ output logic [31:0] rvfi_insn,
+ output logic rvfi_trap,
+ output logic rvfi_halt,
+ output logic rvfi_intr,
+ output logic [ 1:0] rvfi_mode,
+ output logic [ 1:0] rvfi_ixl,
+ output logic [ 4:0] rvfi_rs1_addr,
+ output logic [ 4:0] rvfi_rs2_addr,
+ output logic [ 4:0] rvfi_rs3_addr,
+ output logic [31:0] rvfi_rs1_rdata,
+ output reg_cap_t rvfi_rs1_rcap,
+ output logic [31:0] rvfi_rs2_rdata,
+ output reg_cap_t rvfi_rs2_rcap,
+ output logic [31:0] rvfi_rs3_rdata,
+ output logic [ 4:0] rvfi_rd_addr,
+ output logic [31:0] rvfi_rd_wdata,
+ output reg_cap_t rvfi_rd_wcap,
+ output logic [31:0] rvfi_pc_rdata,
+ output logic [31:0] rvfi_pc_wdata,
+ output logic rvfi_mem_is_cap,
+ output logic [31:0] rvfi_mem_addr,
+ output logic [ 3:0] rvfi_mem_rmask,
+ output logic [ 3:0] rvfi_mem_wmask,
+ output logic [DataWidth-1:0] rvfi_mem_rdata,
+ output reg_cap_t rvfi_mem_rcap,
+ output logic [DataWidth-1:0] rvfi_mem_wdata,
+ output reg_cap_t rvfi_mem_wcap,
+ output logic [31:0] rvfi_ext_mip,
+ output logic rvfi_ext_nmi,
+ output logic rvfi_ext_debug_req,
+ output logic [63:0] rvfi_ext_mcycle,
+`endif
+
+ // CPU Control Signals
+ input fetch_enable_t fetch_enable_i,
+ output logic alert_minor_o,
+ output logic alert_major_o,
+ output logic icache_inval_o,
+ output logic core_busy_o
+);
+
+ localparam int unsigned PMP_NUM_CHAN = 3;
+ localparam bit DataIndTiming = SecureIbex;
+ localparam bit PCIncrCheck = SecureIbex;
+ localparam bit ShadowCSR = 1'b0;
+
+ // IF/ID signals
+ logic dummy_instr_id;
+ logic instr_valid_id;
+ logic instr_executing_id;
+ logic instr_new_id;
+ logic [31:0] instr_rdata_id; // Instruction sampled inside IF stage
+ logic [31:0] instr_rdata_alu_id; // Instruction sampled inside IF stage (replicated to
+ // ease fan-out)
+ logic [15:0] instr_rdata_c_id; // Compressed instruction sampled inside IF stage
+ logic instr_is_compressed_id;
+ logic instr_perf_count_id;
+ logic instr_bp_taken_id;
+ logic instr_fetch_err; // Bus error on instr fetch
+ logic instr_fetch_err_plus2; // Instruction error is misaligned
+ logic instr_fetch_cheri_acc_vio;
+ logic instr_fetch_cheri_bound_vio;
+ logic illegal_c_insn_id; // Illegal compressed instruction sent to ID stage
+
+ logic [31:0] pc_if; // Program counter in IF stage
+ logic [31:0] pc_id; // Program counter in ID stage
+ logic [31:0] pc_wb; // Program counter in WB stage
+ logic [33:0] imd_val_d_ex[2]; // Intermediate register for multicycle Ops
+ logic [33:0] imd_val_q_ex[2]; // Intermediate register for multicycle Ops
+ logic [1:0] imd_val_we_ex;
+
+ logic data_ind_timing;
+ logic dummy_instr_en;
+ logic [2:0] dummy_instr_mask;
+ logic dummy_instr_seed_en;
+ logic [31:0] dummy_instr_seed;
+ logic icache_enable;
+ logic icache_inval;
+ logic pc_mismatch_alert;
+ logic csr_shadow_err;
+
+ logic instr_first_cycle_id;
+ logic instr_valid_clear;
+ logic pc_set;
+ logic nt_branch_mispredict;
+ logic [31:0] nt_branch_addr;
+ pc_sel_e pc_mux_id; // Mux selector for next PC
+ exc_pc_sel_e exc_pc_mux_id; // Mux selector for exception PC
+ exc_cause_e exc_cause; // Exception cause
+
+ logic lsu_load_err;
+ logic lsu_store_err;
+ logic lsu_err_is_cheri;
+
+ // LSU signals
+ logic lsu_addr_incr_req;
+ logic [31:0] lsu_addr_last;
+ logic [31:0] lsu_addr;
+
+ // Jump and branch target and decision (EX->IF)
+ logic [31:0] branch_target_ex_rv32;
+ logic [31:0] branch_target_ex_cheri;
+ logic [31:0] branch_target_ex;
+ logic branch_decision;
+
+ // Core busy signals
+ logic ctrl_busy;
+ logic if_busy;
+ logic lsu_busy;
+
+ logic lsu_busy_tbre;
+
+ // Register File
+ logic [4:0] rf_raddr_a;
+ logic [31:0] rf_rdata_a;
+ logic [4:0] rf_raddr_b;
+ logic [31:0] rf_rdata_b;
+ logic rf_ren_a;
+ logic rf_ren_b;
+ logic [4:0] rf_waddr_wb;
+ logic [31:0] rf_wdata_wb;
+
+ reg_cap_t rf_wcap_wb;
+
+ // Writeback register write data that can be used on the forwarding path (doesn't factor in memory
+ // read data as this is too late for the forwarding path)
+ logic [31:0] rf_wdata_fwd_wb;
+
+ reg_cap_t rf_wcap_fwd_wb;
+
+ logic [32:0] rf_wdata_lsu;
+ reg_cap_t rf_wcap_lsu;
+ logic rf_we_wb;
+ logic rf_we_lsu;
+ logic rf_ecc_err_comb;
+
+ logic [4:0] rf_waddr_id;
+ logic [31:0] rf_wdata_id;
+ logic rf_we_id;
+ logic rf_rd_a_wb_match;
+ logic rf_rd_b_wb_match;
+
+ // ALU Control
+ alu_op_e alu_operator_ex;
+ logic [31:0] alu_operand_a_ex;
+ logic [31:0] alu_operand_b_ex;
+
+ logic [31:0] bt_a_operand;
+ logic [31:0] bt_b_operand;
+
+ logic [31:0] alu_adder_result_ex; // Used to forward computed address to LSU
+ logic [31:0] result_ex;
+
+ // Multiplier Control
+ logic mult_en_ex;
+ logic div_en_ex;
+ logic mult_sel_ex;
+ logic div_sel_ex;
+ md_op_e multdiv_operator_ex;
+ logic [1:0] multdiv_signed_mode_ex;
+ logic [31:0] multdiv_operand_a_ex;
+ logic [31:0] multdiv_operand_b_ex;
+ logic multdiv_ready_id;
+
+ // CSR control
+ logic csr_access;
+ csr_op_e csr_op;
+ logic csr_op_en;
+ csr_num_e csr_addr;
+ logic [31:0] csr_rdata;
+ logic [31:0] csr_wdata;
+ logic illegal_csr_insn_id; // CSR access to non-existent register,
+ // with wrong priviledge level,
+ // or missing write permissions
+
+ // Data Memory Control
+ logic lsu_we;
+ logic [1:0] lsu_type;
+ logic lsu_sign_ext;
+ logic lsu_req;
+ logic [32:0] lsu_wdata;
+ reg_cap_t lsu_wcap;
+ logic lsu_req_done;
+
+ // stall control
+ logic id_in_ready;
+ logic ex_valid;
+
+ logic lsu_resp_valid;
+ logic lsu_resp_err;
+
+ // Signals between instruction core interface and pipe (if and id stages)
+ logic instr_req_int; // Id stage asserts a req to instruction core interface
+ logic instr_req_gated;
+
+ // Writeback stage
+ logic en_wb;
+ wb_instr_type_e instr_type_wb;
+ logic ready_wb;
+ logic rf_write_wb;
+ logic outstanding_load_wb;
+ logic outstanding_store_wb;
+
+ // Interrupts
+ logic nmi_mode;
+ irqs_t irqs;
+ logic csr_mstatus_mie;
+ logic [31:0] csr_mepc, csr_depc;
+
+ // PMP signals
+ logic [33:0] csr_pmp_addr [PMPNumRegions];
+ pmp_cfg_t csr_pmp_cfg [PMPNumRegions];
+ pmp_mseccfg_t csr_pmp_mseccfg;
+ logic pmp_req_err [PMP_NUM_CHAN];
+ logic data_req_out;
+
+ logic csr_save_if;
+ logic csr_save_id;
+ logic csr_save_wb;
+ logic csr_restore_mret_id;
+ logic csr_restore_dret_id;
+ logic csr_save_cause;
+ logic csr_mepcc_clrtag;
+ logic csr_mtvec_init;
+ logic [31:0] csr_mtvec;
+ logic [31:0] csr_mtval;
+ logic csr_mstatus_tw;
+ priv_lvl_e priv_mode_id;
+ priv_lvl_e priv_mode_lsu;
+
+ // debug mode and dcsr configuration
+ logic debug_mode;
+ dbg_cause_e debug_cause;
+ logic debug_csr_save;
+ logic debug_single_step;
+ logic debug_ebreakm;
+ logic debug_ebreaku;
+ logic trigger_match;
+
+ // signals relating to instruction movements between pipeline stages
+ // used by performance counters and RVFI
+ logic instr_id_done;
+ logic instr_done_wb;
+
+ logic perf_instr_ret_wb;
+ logic perf_instr_ret_compressed_wb;
+ logic perf_instr_ret_wb_spec;
+ logic perf_instr_ret_compressed_wb_spec;
+ logic perf_iside_wait;
+ logic perf_dside_wait;
+ logic perf_mul_wait;
+ logic perf_div_wait;
+ logic perf_jump;
+ logic perf_branch;
+ logic perf_tbranch;
+ logic perf_load;
+ logic perf_store;
+
+ // for RVFI
+ logic illegal_insn_id, unused_illegal_insn_id; // ID stage sees an illegal instruction
+
+ pcc_cap_t pcc_cap_r, pcc_cap_w;
+
+ logic cheri_branch_req;
+ logic cheri_branch_req_spec;
+ logic instr_is_cheri_id;
+ logic instr_is_rv32lsu_id;
+ logic cheri_exec_id;
+ logic [11:0] cheri_imm12;
+ logic [19:0] cheri_imm20;
+ logic [20:0] cheri_imm21;
+ logic [4:0] cheri_cs2_dec;
+ logic cheri_load_id;
+ logic cheri_store_id;
+ logic cheri_rf_we;
+ logic [31:0] cheri_result_data;
+ reg_cap_t cheri_result_cap;
+ logic cheri_ex_valid;
+ logic cheri_ex_err;
+ logic [11:0] cheri_ex_err_info;
+ logic cheri_wb_err;
+ logic [15:0] cheri_wb_err_info;
+ logic [OPDW-1:0] cheri_operator;
+
+ logic rv32_lsu_req;
+ logic rv32_lsu_we;
+ logic [1:0] rv32_lsu_type;
+ logic [31:0] rv32_lsu_wdata;
+ logic rv32_lsu_sign_ext;
+ logic rv32_lsu_addr_incr_req;
+ logic [31:0] rv32_lsu_addr_last;
+ logic rv32_lsu_resp_valid;
+
+ logic cheri_csr_access;
+ logic [4:0] cheri_csr_addr;
+ logic [31:0] cheri_csr_wdata;
+ reg_cap_t cheri_csr_wcap;
+ cheri_csr_op_e cheri_csr_op;
+ logic cheri_csr_op_en;
+ logic [31:0] cheri_csr_rdata;
+ reg_cap_t cheri_csr_rcap;
+ logic cheri_csr_set_mie;
+ logic cheri_csr_clr_mie;
+
+ logic lsu_is_cap, lsu_cheri_err;
+ logic [3:0] lsu_lc_clrperm;
+
+ logic csr_dbg_tclr_fault;
+
+ logic [31:0] csr_mshwm;
+ logic [31:0] csr_mshwmb;
+ logic csr_mshwm_set;
+ logic [31:0] csr_mshwm_new;
+ logic ztop_wr;
+ logic [31:0] ztop_wdata;
+ full_cap_t ztop_wfcap;
+ logic [31:0] ztop_rdata;
+ reg_cap_t ztop_rcap;
+
+ logic stkz_active;
+ logic stkz_abort;
+ logic [31:0] stkz_ptr;
+ logic [31:0] stkz_base;
+
+ logic lsu_tbre_resp_valid;
+ logic lsu_tbre_resp_err;
+ logic lsu_resp_is_wr;
+ logic [32:0] lsu_tbre_raw_lsw;
+ logic lsu_tbre_req_done;
+ logic lsu_tbre_addr_incr;
+ logic tbre_lsu_req;
+ logic tbre_lsu_is_cap;
+ logic tbre_lsu_we;
+ logic [31:0] tbre_lsu_addr;
+ logic [32:0] tbre_lsu_wdata;
+ logic tbre_trvk_en;
+ logic tbre_trvk_clrtag;
+
+ logic lsu_tbre_sel, cpu_lsu_dec;
+ logic rf_trsv_en;
+
+ logic cpu_stall_by_stkz, cpu_grant_to_stkz;
+
+
+ //////////////////////
+ // Clock management //
+ //////////////////////
+
+ // Before going to sleep, wait for I- and D-side
+ // interfaces to finish ongoing operations.
+ assign core_busy_o = ctrl_busy | if_busy | lsu_busy;
+
+ //////////////
+ // IF stage //
+ //////////////
+
+ cheriot_if_stage #(
+ .DmHaltAddr (DmHaltAddr),
+ .DmExceptionAddr (DmExceptionAddr),
+ .DummyInstructions(DummyInstructions),
+ .ICache (ICache),
+ .ICacheECC (ICacheECC),
+ .BusSizeECC (BusSizeECC),
+ .TagSizeECC (TagSizeECC),
+ .LineSizeECC (LineSizeECC),
+ .PCIncrCheck (PCIncrCheck),
+ .ResetAll (ResetAll ),
+ .RndCnstLfsrSeed (RndCnstLfsrSeed ),
+ .RndCnstLfsrPerm (RndCnstLfsrPerm ),
+ .BranchPredictor (BranchPredictor),
+ .CHERIoTEn (CHERIoTEn)
+ ) if_stage_i (
+ .clk_i (clk_i),
+ .rst_ni(rst_ni),
+
+ .cheri_pmode_i (cheri_pmode_i),
+ .boot_addr_i (boot_addr_i),
+ .req_i (instr_req_gated), // instruction request control
+ .debug_mode_i (debug_mode),
+
+ // instruction cache interface
+ .instr_req_o (instr_req_o),
+ .instr_addr_o (instr_addr_o),
+ .instr_gnt_i (instr_gnt_i),
+ .instr_rvalid_i (instr_rvalid_i),
+ .instr_rdata_i (instr_rdata_i),
+ .instr_err_i (instr_err_i),
+
+ .ic_tag_req_o (ic_tag_req_o),
+ .ic_tag_write_o (ic_tag_write_o),
+ .ic_tag_addr_o (ic_tag_addr_o),
+ .ic_tag_wdata_o (ic_tag_wdata_o),
+ .ic_tag_rdata_i (ic_tag_rdata_i),
+ .ic_data_req_o (ic_data_req_o),
+ .ic_data_write_o (ic_data_write_o),
+ .ic_data_addr_o (ic_data_addr_o),
+ .ic_data_wdata_o (ic_data_wdata_o),
+ .ic_data_rdata_i (ic_data_rdata_i),
+ .ic_scr_key_valid_i(ic_scr_key_valid_i),
+
+ // outputs to ID stage
+ .instr_valid_id_o (instr_valid_id),
+ .instr_new_id_o (instr_new_id),
+ .instr_rdata_id_o (instr_rdata_id),
+ .instr_rdata_alu_id_o (instr_rdata_alu_id),
+ .instr_rdata_c_id_o (instr_rdata_c_id),
+ .instr_is_compressed_id_o(instr_is_compressed_id),
+ .instr_bp_taken_o (instr_bp_taken_id),
+ .instr_fetch_err_o (instr_fetch_err),
+ .instr_fetch_err_plus2_o (instr_fetch_err_plus2),
+ .instr_fetch_cheri_acc_vio_o (instr_fetch_cheri_acc_vio),
+ .instr_fetch_cheri_bound_vio_o (instr_fetch_cheri_bound_vio),
+
+ .illegal_c_insn_id_o (illegal_c_insn_id),
+ .dummy_instr_id_o (dummy_instr_id),
+ .pc_if_o (pc_if),
+ .pc_id_o (pc_id),
+ .pmp_err_if_i (pmp_req_err[PMP_I]),
+ .pmp_err_if_plus2_i (pmp_req_err[PMP_I2]),
+
+ // control signals
+ .instr_valid_clear_i (instr_valid_clear),
+ .pc_set_i (pc_set),
+ .pc_mux_i (pc_mux_id),
+ .nt_branch_mispredict_i(nt_branch_mispredict),
+ .exc_pc_mux_i (exc_pc_mux_id),
+ .exc_cause (exc_cause),
+ .dummy_instr_en_i (dummy_instr_en),
+ .dummy_instr_mask_i (dummy_instr_mask),
+ .dummy_instr_seed_en_i (dummy_instr_seed_en),
+ .dummy_instr_seed_i (dummy_instr_seed),
+ .icache_enable_i (icache_enable),
+ .icache_inval_i (icache_inval),
+
+ // branch targets
+ .branch_target_ex_i(branch_target_ex),
+ .nt_branch_addr_i (nt_branch_addr),
+
+ // CSRs
+ .csr_mepc_i (csr_mepc), // exception return address
+ .csr_depc_i (csr_depc), // debug return address
+ .csr_mtvec_i (csr_mtvec), // trap-vector base address
+ .csr_mtvec_init_o(csr_mtvec_init),
+
+ // pipeline stalls
+ .id_in_ready_i(id_in_ready),
+
+ .pc_mismatch_alert_o(pc_mismatch_alert),
+ .if_busy_o (if_busy),
+ .pcc_cap_i (pcc_cap_r)
+ );
+
+ // Core is waiting for the ISide when ID/EX stage is ready for a new instruction but none are
+ // available
+ assign perf_iside_wait = id_in_ready & ~instr_valid_id;
+
+ // Multi-bit fetch enable used when SecureIbex == 1. When SecureIbex == 0 only use the bottom-bit
+ // of fetch_enable_i. Ensure the multi-bit encoding has the bottom bit set for on and unset for
+ // off so FetchEnableOn/FetchEnableOff can be used without needing to know the value of
+ // SecureIbex.
+ `ASSERT_INIT(FetchEnableSecureOnBottomBitSet, FetchEnableOn[0] == 1'b1)
+ `ASSERT_INIT(FetchEnableSecureOffBottomBitClear, FetchEnableOff[0] == 1'b0)
+
+ // fetch_enable_i can be used to stop the core fetching new instructions
+ if (SecureIbex) begin : g_instr_req_gated_secure
+ // For secure Ibex fetch_enable_i must be a specific multi-bit pattern to enable instruction
+ // fetch
+ assign instr_req_gated = instr_req_int & (fetch_enable_i == FetchEnableOn);
+ end else begin : g_instr_req_gated_non_secure
+ // For non secure Ibex only the bottom bit of fetch enable is considered
+ logic unused_fetch_enable;
+ assign unused_fetch_enable = ^fetch_enable_i[$bits(fetch_enable_t)-1:1];
+
+ assign instr_req_gated = instr_req_int & fetch_enable_i[0];
+ end
+
+ //////////////
+ // ID stage //
+ //////////////
+
+ cheriot_id_stage #(
+ .RV32E (RV32E),
+ .RV32M (RV32M),
+ .RV32B (RV32B),
+ .BranchTargetALU(BranchTargetALU),
+ .DataIndTiming (DataIndTiming),
+ .WritebackStage (WritebackStage),
+ .BranchPredictor(BranchPredictor),
+ .CHERIoTEn (CHERIoTEn),
+ .CheriPPLBC (CheriPPLBC),
+ .CheriSBND2 (CheriSBND2)
+ ) id_stage_i (
+ .clk_i (clk_i),
+ .rst_ni(rst_ni),
+
+ .cheri_pmode_i (cheri_pmode_i),
+ .cheri_tsafe_en_i (cheri_tsafe_en_i),
+
+ // Processor Enable
+ .ctrl_busy_o (ctrl_busy),
+ .illegal_insn_o(illegal_insn_id),
+
+ // from/to IF-ID pipeline register
+ .instr_valid_i (instr_valid_id),
+ .instr_rdata_i (instr_rdata_id),
+ .instr_rdata_alu_i (instr_rdata_alu_id),
+ .instr_rdata_c_i (instr_rdata_c_id),
+ .instr_is_compressed_i(instr_is_compressed_id),
+ .instr_bp_taken_i (instr_bp_taken_id),
+
+ // Jumps and branches
+ .branch_decision_i(branch_decision),
+
+ // IF and ID control signals
+ .instr_first_cycle_id_o(instr_first_cycle_id),
+ .instr_valid_clear_o (instr_valid_clear),
+ .id_in_ready_o (id_in_ready),
+ .instr_req_o (instr_req_int),
+ .pc_set_o (pc_set),
+ .pc_mux_o (pc_mux_id),
+ .nt_branch_mispredict_o(nt_branch_mispredict),
+ .nt_branch_addr_o (nt_branch_addr),
+ .exc_pc_mux_o (exc_pc_mux_id),
+ .exc_cause_o (exc_cause),
+ .icache_inval_o (icache_inval),
+
+ .instr_fetch_err_i (instr_fetch_err),
+ .instr_fetch_err_plus2_i(instr_fetch_err_plus2),
+ .instr_fetch_cheri_acc_vio_i (instr_fetch_cheri_acc_vio),
+ .instr_fetch_cheri_bound_vio_i (instr_fetch_cheri_bound_vio),
+
+ .illegal_c_insn_i (illegal_c_insn_id),
+
+ .pc_id_i(pc_id),
+
+ // Stalls
+ .ex_valid_i (ex_valid),
+ .lsu_resp_valid_i(lsu_resp_valid),
+
+ .alu_operator_ex_o (alu_operator_ex),
+ .alu_operand_a_ex_o(alu_operand_a_ex),
+ .alu_operand_b_ex_o(alu_operand_b_ex),
+
+ .imd_val_q_ex_o (imd_val_q_ex),
+ .imd_val_d_ex_i (imd_val_d_ex),
+ .imd_val_we_ex_i(imd_val_we_ex),
+
+ .bt_a_operand_o(bt_a_operand),
+ .bt_b_operand_o(bt_b_operand),
+
+ .mult_en_ex_o (mult_en_ex),
+ .div_en_ex_o (div_en_ex),
+ .mult_sel_ex_o (mult_sel_ex),
+ .div_sel_ex_o (div_sel_ex),
+ .multdiv_operator_ex_o (multdiv_operator_ex),
+ .multdiv_signed_mode_ex_o(multdiv_signed_mode_ex),
+ .multdiv_operand_a_ex_o (multdiv_operand_a_ex),
+ .multdiv_operand_b_ex_o (multdiv_operand_b_ex),
+ .multdiv_ready_id_o (multdiv_ready_id),
+
+ // CSR ID/EX
+ .csr_access_o (csr_access),
+ .csr_op_o (csr_op),
+ .csr_op_en_o (csr_op_en),
+ .csr_save_if_o (csr_save_if), // control signal to save PC
+ .csr_save_id_o (csr_save_id), // control signal to save PC
+ .csr_save_wb_o (csr_save_wb), // control signal to save PC
+ .csr_restore_mret_id_o(csr_restore_mret_id), // restore mstatus upon MRET
+ .csr_restore_dret_id_o(csr_restore_dret_id), // restore mstatus upon MRET
+ .csr_save_cause_o (csr_save_cause),
+ .csr_mepcc_clrtag_o (csr_mepcc_clrtag),
+ .csr_mtval_o (csr_mtval),
+ .priv_mode_i (priv_mode_id),
+ .csr_mstatus_tw_i (csr_mstatus_tw),
+ .illegal_csr_insn_i (illegal_csr_insn_id),
+ .data_ind_timing_i (data_ind_timing),
+ .csr_pcc_perm_sr_i (pcc_cap_r.perms[PERM_SR]),
+
+ // LSU
+ .lsu_req_o (rv32_lsu_req), // to load store unit
+ .lsu_we_o (rv32_lsu_we), // to load store unit
+ .lsu_type_o (rv32_lsu_type), // to load store unit
+ .lsu_sign_ext_o (rv32_lsu_sign_ext), // to load store unit
+ .lsu_wdata_o (rv32_lsu_wdata), // to load store unit
+ .lsu_req_done_i (lsu_req_done), // from load store unit
+
+ .lsu_addr_incr_req_i(rv32_lsu_addr_incr_req),
+ .lsu_addr_last_i (rv32_lsu_addr_last),
+
+ .lsu_load_err_i (lsu_load_err),
+ .lsu_store_err_i(lsu_store_err),
+ .lsu_err_is_cheri_i(lsu_err_is_cheri),
+
+ // Interrupt Signals
+ .csr_mstatus_mie_i(csr_mstatus_mie),
+ .irq_pending_i (irq_pending_o),
+ .irqs_i (irqs),
+ .irq_nm_i (irq_nm_i),
+ .nmi_mode_o (nmi_mode),
+
+ // Debug Signal
+ .debug_mode_o (debug_mode),
+ .debug_cause_o (debug_cause),
+ .debug_csr_save_o (debug_csr_save),
+ .debug_req_i (debug_req_i),
+ .debug_single_step_i(debug_single_step),
+ .debug_ebreakm_i (debug_ebreakm),
+ .debug_ebreaku_i (debug_ebreaku),
+ .trigger_match_i (trigger_match),
+
+ // write data to commit in the register file
+ .result_ex_i(result_ex),
+ .csr_rdata_i(csr_rdata),
+
+ .rf_raddr_a_o (rf_raddr_a),
+ .rf_rdata_a_i (rf_rdata_a),
+ .rf_raddr_b_o (rf_raddr_b),
+ .rf_rdata_b_i (rf_rdata_b),
+ .rf_ren_a_o (rf_ren_a),
+ .rf_ren_b_o (rf_ren_b),
+ .rf_waddr_id_o (rf_waddr_id),
+ .rf_wdata_id_o (rf_wdata_id),
+ .rf_we_id_o (rf_we_id),
+ .rf_rd_a_wb_match_o(rf_rd_a_wb_match),
+ .rf_rd_b_wb_match_o(rf_rd_b_wb_match),
+
+ .rf_waddr_wb_i (rf_waddr_wb),
+ .rf_wdata_fwd_wb_i(rf_wdata_fwd_wb),
+ .rf_write_wb_i (rf_write_wb),
+ .rf_reg_rdy_i (rf_reg_rdy_i),
+
+ .en_wb_o (en_wb),
+ .instr_type_wb_o (instr_type_wb),
+ .instr_perf_count_id_o (instr_perf_count_id),
+ .ready_wb_i (ready_wb),
+ .outstanding_load_wb_i (outstanding_load_wb),
+ .outstanding_store_wb_i(outstanding_store_wb),
+
+ // Performance Counters
+ .perf_jump_o (perf_jump),
+ .perf_branch_o (perf_branch),
+ .perf_tbranch_o (perf_tbranch),
+ .perf_dside_wait_o(perf_dside_wait),
+ .perf_mul_wait_o (perf_mul_wait),
+ .perf_div_wait_o (perf_div_wait),
+ .instr_id_done_o (instr_id_done),
+
+ .cheri_exec_id_o (cheri_exec_id),
+ .instr_is_cheri_id_o (instr_is_cheri_id),
+ .instr_is_rv32lsu_id_o (instr_is_rv32lsu_id),
+ .cheri_imm12_o (cheri_imm12),
+ .cheri_imm20_o (cheri_imm20),
+ .cheri_imm21_o (cheri_imm21),
+ .cheri_operator_o (cheri_operator),
+ .cheri_cs2_dec_o (cheri_cs2_dec),
+ .cheri_load_o (cheri_load_id),
+ .cheri_store_o (cheri_store_id),
+ .cheri_ex_valid_i (cheri_ex_valid),
+ .cheri_ex_err_i (cheri_ex_err),
+ .cheri_ex_err_info_i (cheri_ex_err_info),
+ .cheri_wb_err_i (cheri_wb_err),
+ .cheri_wb_err_info_i (cheri_wb_err_info),
+ .cheri_branch_req_i (cheri_branch_req_spec),
+ .cheri_branch_target_i (branch_target_ex_cheri)
+ );
+
+
+ assign icache_inval_o = icache_inval;
+ // for RVFI only
+ assign unused_illegal_insn_id = illegal_insn_id;
+
+ cheriot_ex_block #(
+ .RV32M (RV32M),
+ .RV32B (RV32B),
+ .BranchTargetALU(BranchTargetALU)
+ ) ex_block_i (
+ .clk_i (clk_i),
+ .rst_ni(rst_ni),
+
+ // ALU signal from ID stage
+ .alu_operator_i (alu_operator_ex),
+ .alu_operand_a_i (alu_operand_a_ex),
+ .alu_operand_b_i (alu_operand_b_ex),
+ .alu_instr_first_cycle_i(instr_first_cycle_id),
+
+ // Branch target ALU signal from ID stage
+ .bt_a_operand_i(bt_a_operand),
+ .bt_b_operand_i(bt_b_operand),
+
+ // Multipler/Divider signal from ID stage
+ .multdiv_operator_i (multdiv_operator_ex),
+ .mult_en_i (mult_en_ex),
+ .div_en_i (div_en_ex),
+ .mult_sel_i (mult_sel_ex),
+ .div_sel_i (div_sel_ex),
+ .multdiv_signed_mode_i(multdiv_signed_mode_ex),
+ .multdiv_operand_a_i (multdiv_operand_a_ex),
+ .multdiv_operand_b_i (multdiv_operand_b_ex),
+ .multdiv_ready_id_i (multdiv_ready_id),
+ .data_ind_timing_i (data_ind_timing),
+
+ // Intermediate value register
+ .imd_val_we_o(imd_val_we_ex),
+ .imd_val_d_o (imd_val_d_ex),
+ .imd_val_q_i (imd_val_q_ex),
+
+ // Outputs
+ .alu_adder_result_ex_o(alu_adder_result_ex), // to LSU
+ .result_ex_o (result_ex), // to ID
+
+ .branch_target_o (branch_target_ex_rv32), // to IF
+ .branch_decision_o(branch_decision), // to ID
+
+ .ex_valid_o(ex_valid)
+ );
+
+ //////////////
+ // cheri EX //
+ //////////////
+ if (CHERIoTEn) begin : g_cheri_ex
+ cheri_ex #(
+ .WritebackStage (WritebackStage),
+ .MemCapFmt (MemCapFmt),
+ .HeapBase (HeapBase),
+ .TSMapBase (TSMapBase),
+ .TSMapSize (TSMapSize),
+ .CheriPPLBC (CheriPPLBC),
+ .CheriSBND2 (CheriSBND2),
+ .CheriStkZ (CheriStkZ),
+ .CheriCapIT8 (CheriCapIT8)
+ ) u_cheri_ex (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .cheri_pmode_i (cheri_pmode_i),
+ .cheri_tsafe_en_i (cheri_tsafe_en_i),
+ .debug_mode_i (debug_mode),
+ .fwd_we_i (rf_write_wb),
+ .fwd_waddr_i (rf_waddr_wb),
+ .fwd_wdata_i (rf_wdata_fwd_wb),
+ .fwd_wcap_i (rf_wcap_fwd_wb),
+ .rf_raddr_a_i (rf_raddr_a),
+ .rf_rdata_a_i (rf_rdata_a),
+ .rf_rcap_a_i (rf_rcap_a_i),
+ .rf_raddr_b_i (rf_raddr_b),
+ .rf_rdata_b_i (rf_rdata_b),
+ .rf_rcap_b_i (rf_rcap_b_i),
+ .rf_trsv_en_o (rf_trsv_en),
+ .rf_waddr_i (rf_waddr_id),
+ .pcc_cap_i (pcc_cap_r),
+ .pcc_cap_o (pcc_cap_w),
+ .pc_id_i (pc_id),
+ .branch_req_o (cheri_branch_req),
+ .branch_req_spec_o (cheri_branch_req_spec),
+ .branch_target_o (branch_target_ex_cheri),
+ .cheri_exec_id_i (cheri_exec_id),
+ .instr_valid_i (instr_valid_id),
+ .instr_first_cycle_i (instr_first_cycle_id),
+ .instr_is_cheri_i (instr_is_cheri_id),
+ .instr_is_rv32lsu_i (instr_is_rv32lsu_id),
+ .instr_is_compressed_i(instr_is_compressed_id),
+ .cheri_imm12_i (cheri_imm12),
+ .cheri_imm20_i (cheri_imm20),
+ .cheri_imm21_i (cheri_imm21),
+ .cheri_operator_i (cheri_operator),
+ .cheri_cs2_dec_i (cheri_cs2_dec),
+ .cheri_rf_we_o (cheri_rf_we),
+ .result_data_o (cheri_result_data),
+ .result_cap_o (cheri_result_cap),
+ .cheri_ex_valid_o (cheri_ex_valid),
+ .cheri_ex_err_o (cheri_ex_err),
+ .cheri_ex_err_info_o (cheri_ex_err_info),
+ .cheri_wb_err_o (cheri_wb_err),
+ .cheri_wb_err_info_o (cheri_wb_err_info),
+ .lsu_req_o (lsu_req),
+ .lsu_is_cap_o (lsu_is_cap),
+ .lsu_lc_clrperm_o (lsu_lc_clrperm),
+ .lsu_cheri_err_o (lsu_cheri_err),
+ .lsu_we_o (lsu_we),
+ .lsu_addr_o (lsu_addr),
+ .lsu_type_o (lsu_type),
+ .lsu_wdata_o (lsu_wdata),
+ .lsu_wcap_o (lsu_wcap),
+ .cpu_stall_by_stkz_o (cpu_stall_by_stkz),
+ .cpu_grant_to_stkz_o (cpu_grant_to_stkz),
+ .lsu_sign_ext_o (lsu_sign_ext),
+ .addr_incr_req_i (lsu_addr_incr_req),
+ .addr_last_i (lsu_addr_last),
+ .lsu_req_done_i (lsu_req_done),
+ .lsu_rdata_i (rf_wdata_lsu),
+ .lsu_rcap_i (rf_wcap_lsu),
+ .rv32_lsu_req_i (rv32_lsu_req),
+ .rv32_lsu_we_i (rv32_lsu_we),
+ .rv32_lsu_type_i (rv32_lsu_type),
+ .rv32_lsu_wdata_i (rv32_lsu_wdata),
+ .rv32_lsu_sign_ext_i (rv32_lsu_sign_ext),
+ .rv32_lsu_addr_i (alu_adder_result_ex),
+ .rv32_addr_incr_req_o (rv32_lsu_addr_incr_req),
+ .rv32_addr_last_o (rv32_lsu_addr_last),
+ .lsu_tbre_sel_i (lsu_tbre_sel),
+ .tbre_lsu_req_i (tbre_lsu_req),
+ .tbre_lsu_is_cap_i (tbre_lsu_is_cap),
+ .tbre_lsu_we_i (tbre_lsu_we),
+ .tbre_lsu_addr_i (tbre_lsu_addr),
+ .tbre_lsu_wdata_i (tbre_lsu_wdata),
+ .cpu_lsu_dec_o (cpu_lsu_dec),
+ .csr_rdata_i (cheri_csr_rdata),
+ .csr_rcap_i (cheri_csr_rcap),
+ .csr_mstatus_mie_i (csr_mstatus_mie),
+ .csr_access_o (cheri_csr_access),
+ .csr_addr_o (cheri_csr_addr),
+ .csr_wdata_o (cheri_csr_wdata),
+ .csr_wcap_o (cheri_csr_wcap),
+ .csr_op_o (cheri_csr_op),
+ .csr_op_en_o (cheri_csr_op_en),
+ .csr_set_mie_o (cheri_csr_set_mie),
+ .csr_clr_mie_o (cheri_csr_clr_mie),
+ .csr_mshwm_i (csr_mshwm),
+ .csr_mshwmb_i (csr_mshwmb),
+ .csr_mshwm_set_o (csr_mshwm_set),
+ .csr_mshwm_new_o (csr_mshwm_new),
+ .stkz_active_i (stkz_active),
+ .stkz_abort_i (stkz_abort),
+ .stkz_ptr_i (stkz_ptr),
+ .stkz_base_i (stkz_base),
+ .ztop_wr_o (ztop_wr),
+ .ztop_wdata_o (ztop_wdata),
+ .ztop_wfcap_o (ztop_wfcap),
+ .ztop_rdata_i (ztop_rdata),
+ .ztop_rcap_i (ztop_rcap),
+ .csr_dbg_tclr_fault_i (csr_dbg_tclr_fault)
+ );
+
+ assign rf_trsv_en_o = rf_trsv_en;
+ assign rf_trsv_addr_o = rf_waddr_id;
+ assign branch_target_ex = (instr_valid_id & instr_is_cheri_id) ? branch_target_ex_cheri : branch_target_ex_rv32;
+ end else begin : gen_no_cheri_ex
+ assign rf_trsv_en_o = 1'b0;
+ assign rf_trsv_addr_o = 5'h0;
+
+ assign cheri_branch_req = 1'b0;
+ assign cheri_branch_req_spec = 1'b0;
+ assign branch_target_ex = branch_target_ex_rv32;
+ assign pcc_cap_w = NULL_PCC_CAP;
+
+ assign cheri_rf_we = 1'b0;
+ assign cheri_result_data = 32'h0;
+ assign cheri_result_cap = NULL_REG_CAP;
+
+ assign cheri_ex_valid = 1'b0;
+ assign cheri_ex_err = 1'b0;
+ assign cheri_ex_err_info = 11'h0;
+ assign cheri_wb_err = 1'b0;
+ assign cheri_wb_err_info = 16'h0;
+
+ assign lsu_req = rv32_lsu_req;
+ assign lsu_is_cap = 1'b0;
+ assign lsu_lc_clrperm = 4'h0;
+ assign lsu_cheri_err = 1'b0;
+ assign lsu_we = rv32_lsu_we;
+ assign lsu_addr = alu_adder_result_ex;
+ assign lsu_type = rv32_lsu_type;
+ assign lsu_wdata = rv32_lsu_wdata;
+ assign lsu_wcap = NULL_REG_CAP;
+ assign lsu_sign_ext = rv32_lsu_sign_ext;
+ assign rv32_lsu_addr_incr_req = lsu_addr_incr_req;
+ assign rv32_lsu_addr_last = lsu_addr_last;
+
+ assign cpu_lsu_dec = 1'b0;
+ assign cheri_csr_access = 1'b0;
+ assign cheri_csr_addr = 5'h0;
+ assign cheri_csr_wdata = 32'h0;
+ assign cheri_csr_wcap = NULL_REG_CAP;
+ assign cheri_csr_op = CHERI_CSR_NULL;
+ assign cheri_csr_op_en = 1'b0;
+ assign cheri_csr_set_mie = 1'b0;
+ assign cheri_csr_clr_mie = 1'b0;
+
+ assign csr_mshwm_set = 1'b0;
+ assign csr_mshwm_new = 1'b0;
+
+ end
+
+ /////////////////////////////
+ // cheri TS pipeline stage //
+ /////////////////////////////
+
+ if (CHERIoTEn & CheriPPLBC) begin : g_trvk_stage
+ cheri_trvk_stage #(
+ .HeapBase (HeapBase),
+ .TSMapSize (TSMapSize)
+ ) cheri_trvk_stage_i (
+ // Clock and Reset
+ .clk_i (clk_i ),
+ .rst_ni (rst_ni ),
+ .rf_trsv_en_i (rf_trsv_en ),
+ .rf_trsv_addr_i (rf_trsv_addr_o ),
+ .lsu_resp_valid_i (lsu_resp_valid ),
+ .lsu_load_err_i (lsu_load_err ),
+ .rf_wdata_lsu_i (rf_wdata_lsu[31:0]),
+ .rf_wcap_lsu_i (rf_wcap_lsu ),
+ .lsu_resp_is_wr_i (lsu_resp_is_wr),
+ .lsu_tbre_resp_valid_i (lsu_tbre_resp_valid),
+ .lsu_tbre_resp_err_i (lsu_tbre_resp_err),
+ .rf_trvk_addr_o (rf_trvk_addr_o ),
+ .rf_trvk_en_o (rf_trvk_en_o ),
+ .rf_trvk_clrtag_o (rf_trvk_clrtag_o),
+ .tbre_trvk_en_o (tbre_trvk_en ),
+ .tbre_trvk_clrtag_o(tbre_trvk_clrtag),
+ .tsmap_cs_o (tsmap_cs_o ),
+ .tsmap_addr_o (tsmap_addr_o ),
+ .tsmap_rdata_i (tsmap_rdata_i )
+ );
+ end else begin
+ assign rf_trvk_addr_o = 0;
+ assign rf_trvk_en_o = 1'b0;
+ assign rf_trvk_clrtag_o = 1'b0;
+ assign tsmap_cs_o = 1'b0;
+ assign tsmap_addr_o = 0;
+ end
+
+ //////////////////////////////////////////
+ // cheri TS background revocation engine//
+ //////////////////////////////////////////
+
+ logic snoop_lsu_req_done;
+ logic unmasked_intr;
+
+ assign snoop_lsu_req_done = lsu_req_done;
+ assign unmasked_intr = irq_pending_o & csr_mstatus_mie;
+
+ cheri_tbre_wrapper #(
+ .CHERIoTEn (CHERIoTEn),
+ .CheriTBRE (CheriTBRE),
+ .CheriStkZ (CheriStkZ),
+ .MMRegDinW (MMRegDinW),
+ .MMRegDoutW (MMRegDoutW)
+ ) cheri_tbre_wrapper_i (
+ // Clock and Reset
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .mmreg_corein_i (mmreg_corein_i),
+ .mmreg_coreout_o (mmreg_coreout_o),
+ .lsu_tbre_resp_valid_i (lsu_tbre_resp_valid),
+ .lsu_tbre_resp_err_i (lsu_tbre_resp_err),
+ .lsu_tbre_resp_is_wr_i (lsu_resp_is_wr),
+ .lsu_tbre_raw_lsw_i (lsu_tbre_raw_lsw),
+ .lsu_tbre_req_done_i (lsu_tbre_req_done),
+ .lsu_tbre_addr_incr_i (lsu_tbre_addr_incr),
+ .lsu_tbre_sel_i (lsu_tbre_sel),
+ .tbre_lsu_req_o (tbre_lsu_req),
+ .tbre_lsu_is_cap_o (tbre_lsu_is_cap),
+ .tbre_lsu_we_o (tbre_lsu_we),
+ .tbre_lsu_addr_o (tbre_lsu_addr),
+ .tbre_lsu_wdata_o (tbre_lsu_wdata),
+ .snoop_lsu_req_done_i (snoop_lsu_req_done),
+ .snoop_lsu_req_i (lsu_req),
+ .snoop_lsu_is_cap_i (lsu_is_cap),
+ .snoop_lsu_we_i (lsu_we),
+ .snoop_lsu_cheri_err_i (lsu_cheri_err),
+ .snoop_lsu_addr_i (lsu_addr),
+ .trvk_en_i (tbre_trvk_en),
+ .trvk_clrtag_i (tbre_trvk_clrtag),
+ .ztop_wr_i (ztop_wr),
+ .ztop_wdata_i (ztop_wdata),
+ .ztop_wfcap_i (ztop_wfcap),
+ .ztop_rdata_o (ztop_rdata),
+ .ztop_rcap_o (ztop_rcap),
+ .unmasked_intr_i (unmasked_intr),
+ .stkz_active_o (stkz_active),
+ .stkz_abort_o (stkz_abort),
+ .stkz_ptr_o (stkz_ptr),
+ .stkz_base_o (stkz_base)
+ ) ;
+
+
+ /////////////////////
+ // Load/store unit //
+ /////////////////////
+ logic [32:0] data_wdata33, data_rdata33;
+
+ assign data_req_o = data_req_out & ~pmp_req_err[PMP_D];
+ assign lsu_resp_err = lsu_load_err | lsu_store_err;
+ assign data_wdata_o = data_wdata33[DataWidth-1:0];
+
+ if (DataWidth == 33) begin
+ assign data_rdata33 = data_rdata_i;
+ end else begin
+ assign data_rdata33 = {1'b0, data_rdata_i};
+ end
+
+ cheriot_load_store_unit #(
+ .CHERIoTEn(CHERIoTEn),
+ .MemCapFmt(MemCapFmt),
+ .CheriTBRE(CheriTBRE),
+ .CheriCapIT8(CheriCapIT8)
+ ) load_store_unit_i (
+ .clk_i (clk_i),
+ .rst_ni(rst_ni),
+
+ .cheri_pmode_i (cheri_pmode_i),
+ // data interface
+ .data_req_o (data_req_out),
+ .data_is_cap_o (data_is_cap_o),
+ .data_gnt_i (data_gnt_i),
+ .data_rvalid_i (data_rvalid_i),
+ .data_err_i (data_err_i),
+ .data_pmp_err_i(pmp_req_err[PMP_D]),
+
+ .data_addr_o (data_addr_o),
+ .data_we_o (data_we_o),
+ .data_be_o (data_be_o),
+ .data_wdata_o(data_wdata33),
+ .data_rdata_i(data_rdata33),
+
+ // signals to/from ID/EX stage
+ .lsu_we_i (lsu_we),
+ .lsu_type_i (lsu_type),
+ .lsu_wdata_i (lsu_wdata),
+ .lsu_wcap_i (lsu_wcap),
+ .lsu_sign_ext_i(lsu_sign_ext),
+ .cpu_stall_by_stkz_i (cpu_stall_by_stkz),
+ .cpu_grant_to_stkz_i (cpu_grant_to_stkz),
+
+ .lsu_rdata_o (rf_wdata_lsu),
+ .lsu_rcap_o (rf_wcap_lsu),
+ .lsu_rdata_valid_o(rf_we_lsu),
+ .lsu_req_i (lsu_req),
+ .lsu_is_cap_i (lsu_is_cap),
+ .lsu_lc_clrperm_i (lsu_lc_clrperm),
+ .lsu_cheri_err_i (lsu_cheri_err),
+ .lsu_addr_i (lsu_addr),
+
+ .lsu_addr_incr_req_o(lsu_addr_incr_req),
+ .addr_last_o (lsu_addr_last),
+
+ .lsu_req_done_o (lsu_req_done),
+ .lsu_resp_valid_o (lsu_resp_valid),
+ .lsu_resp_is_wr_o (lsu_resp_is_wr),
+
+ .tbre_lsu_req_i (tbre_lsu_req),
+ .cpu_lsu_dec_i (cpu_lsu_dec),
+ .lsu_tbre_sel_o (lsu_tbre_sel),
+ .lsu_tbre_raw_lsw_o (lsu_tbre_raw_lsw),
+ .lsu_tbre_req_done_o (lsu_tbre_req_done),
+ .lsu_tbre_resp_valid_o (lsu_tbre_resp_valid),
+ .lsu_tbre_resp_err_o (lsu_tbre_resp_err),
+ .lsu_tbre_addr_incr_req_o(lsu_tbre_addr_incr),
+
+ // exception signals
+ .load_err_o (lsu_load_err),
+ .store_err_o(lsu_store_err),
+ .lsu_err_is_cheri_o(lsu_err_is_cheri),
+
+ .busy_o(lsu_busy),
+
+ .busy_tbre_o(lsu_busy_tbre),
+
+ .perf_load_o (perf_load),
+ .perf_store_o(perf_store)
+ );
+
+ cheriot_wb_stage #(
+ .ResetAll ( ResetAll ),
+ .WritebackStage(WritebackStage)
+ ) wb_stage_i (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .en_wb_i (en_wb),
+ .instr_type_wb_i (instr_type_wb),
+ .pc_id_i (pc_id),
+ .instr_is_compressed_id_i(instr_is_compressed_id),
+ .instr_perf_count_id_i (instr_perf_count_id),
+ .instr_is_cheri_i (instr_is_cheri_id),
+ .cheri_load_i (cheri_load_id),
+ .cheri_store_i (cheri_store_id),
+
+ .ready_wb_o (ready_wb),
+ .rf_write_wb_o (rf_write_wb),
+ .outstanding_load_wb_o (outstanding_load_wb),
+ .outstanding_store_wb_o (outstanding_store_wb),
+ .pc_wb_o (pc_wb),
+ .perf_instr_ret_wb_o (perf_instr_ret_wb),
+ .perf_instr_ret_compressed_wb_o (perf_instr_ret_compressed_wb),
+ .perf_instr_ret_wb_spec_o (perf_instr_ret_wb_spec),
+ .perf_instr_ret_compressed_wb_spec_o(perf_instr_ret_compressed_wb_spec),
+
+ .rf_waddr_id_i(rf_waddr_id),
+ .rf_wdata_id_i(rf_wdata_id),
+ .rf_we_id_i (rf_we_id),
+
+ .cheri_rf_we_i (cheri_rf_we),
+ .cheri_rf_wdata_i (cheri_result_data),
+ .cheri_rf_wcap_i (cheri_result_cap),
+
+ .rf_wdata_lsu_i(rf_wdata_lsu[31:0]),
+ .rf_wcap_lsu_i(rf_wcap_lsu),
+ .rf_we_lsu_i (rf_we_lsu),
+
+ .rf_wdata_fwd_wb_o(rf_wdata_fwd_wb),
+ .rf_wcap_fwd_wb_o (rf_wcap_fwd_wb),
+
+ .rf_waddr_wb_o(rf_waddr_wb),
+ .rf_wdata_wb_o(rf_wdata_wb),
+ .rf_wcap_wb_o (rf_wcap_wb),
+ .rf_we_wb_o (rf_we_wb),
+
+ .lsu_resp_valid_i(lsu_resp_valid),
+ .lsu_resp_err_i (lsu_resp_err),
+
+ .instr_done_wb_o(instr_done_wb)
+ );
+
+ /////////////////////////////
+ // Register file interface //
+ /////////////////////////////
+
+ assign dummy_instr_id_o = dummy_instr_id;
+ assign rf_raddr_a_o = rf_raddr_a;
+ assign rf_waddr_wb_o = rf_waddr_wb;
+ assign rf_we_wb_o = rf_we_wb;
+ assign rf_raddr_b_o = rf_raddr_b;
+
+ assign rf_wcap_wb_o = rf_wcap_wb;
+
+ if (RegFileECC & CHERIoTEn) begin : gen_ecc_cheriot
+ logic [37:0] rf_wcap_vec, rf_rcap_a_vec, rf_rcap_b_vec;
+ logic [1:0] rf_ecc_err_a, rf_ecc_err_b;
+ logic rf_ecc_err_a_id, rf_ecc_err_b_id;
+ logic [31:0] wdata_tmp, rdata_a_tmp, rdata_b_tmp;
+ logic [31:0] unused_sig32_0, unused_sig32_1;
+ logic [38:0] wdata_ecc_tmp;
+
+ assign rf_wcap_vec = reg2vec(rf_wcap_wb);
+
+ // ECC checkbit generation
+ // -- for simplicity just linearly add the parity bits together.
+ // this is not as good as the full secded implementation (some double errors won't be detected)
+ // but probably ok for protection against random fault injection
+
+ // include waddr and we in the ECC calculation
+ assign wdata_tmp = rf_wdata_wb ^ rf_wcap_vec[31:0] ^ {20'h0, rf_we_wb, rf_waddr_wb, rf_wcap_vec[37:32]};
+ // assign wdata_tmp = rf_wdata_wb ^ rf_wcap_vec[31:0] ^ {26'h0, rf_wcap_vec[37:32]};
+ assign rf_wdata_wb_ecc_o = {wdata_ecc_tmp[38:32], rf_wdata_wb};
+ prim_secded_inv_39_32_enc regfile_ecc_enc (
+ .data_i(wdata_tmp),
+ .data_o(wdata_ecc_tmp)
+ );
+
+ // generate parity bits for the TRSV/TRVK interface
+ prim_secded_inv_39_32_enc trsv_ecc_enc (
+ .data_i({26'h0, rf_trsv_en_o, rf_trsv_addr_o}),
+ .data_o({rf_trsv_par_o, unused_sig32_0})
+ );
+
+ prim_secded_inv_39_32_enc trvk_ecc_enc (
+ .data_i({25'h0, rf_trvk_en_o, rf_trvk_clrtag_o, rf_trvk_addr_o}),
+ .data_o({rf_trvk_par_o, unused_sig32_1})
+ );
+
+ // ECC checking on register file rdata
+ assign rf_rcap_a_vec = reg2vec(rf_rcap_a_i);
+ assign rf_rcap_b_vec = reg2vec(rf_rcap_b_i);
+
+ assign rdata_a_tmp = rf_rdata_a_ecc_i[31:0] ^ rf_rcap_a_vec[31:0] ^ {20'h0, 1'b1, rf_raddr_a, rf_rcap_a_vec[37:32]};
+ assign rdata_b_tmp = rf_rdata_b_ecc_i[31:0] ^ rf_rcap_b_vec[31:0] ^ {20'h0, 1'b1, rf_raddr_b, rf_rcap_b_vec[37:32]};
+
+ //assign rdata_a_tmp = rf_rdata_a_ecc_i[31:0] ^ rf_rcap_a_vec[31:0] ^ {26'h0, rf_rcap_a_vec[37:32]};
+ //assign rdata_b_tmp = rf_rdata_b_ecc_i[31:0] ^ rf_rcap_b_vec[31:0] ^ {26'h0, rf_rcap_b_vec[37:32]};
+ prim_secded_inv_39_32_dec regfile_ecc_dec_a (
+ .data_i ({rf_rdata_a_ecc_i[38:32], rdata_a_tmp}),
+ .data_o (),
+ .syndrome_o(),
+ .err_o (rf_ecc_err_a)
+ );
+ prim_secded_inv_39_32_dec regfile_ecc_dec_b (
+ .data_i ({rf_rdata_b_ecc_i[38:32], rdata_b_tmp}),
+ .data_o (),
+ .syndrome_o(),
+ .err_o (rf_ecc_err_b)
+ );
+
+ // Assign read outputs - no error correction, just trigger an alert
+ assign rf_rdata_a = rf_rdata_a_ecc_i[31:0];
+ assign rf_rdata_b = rf_rdata_b_ecc_i[31:0];
+
+ // Calculate errors - qualify with WB forwarding to avoid xprop into the alert signal
+ assign rf_ecc_err_a_id = |rf_ecc_err_a & rf_ren_a & ~rf_rd_a_wb_match;
+ assign rf_ecc_err_b_id = |rf_ecc_err_b & rf_ren_b & ~rf_rd_b_wb_match;
+
+ // Combined error
+ assign rf_ecc_err_comb = instr_valid_id & (rf_ecc_err_a_id | rf_ecc_err_b_id);
+
+ end else if (RegFileECC) begin : gen_regfile_ecc
+
+ logic [1:0] rf_ecc_err_a, rf_ecc_err_b;
+ logic rf_ecc_err_a_id, rf_ecc_err_b_id;
+
+ // ECC checkbit generation for regiter file wdata
+ prim_secded_inv_39_32_enc regfile_ecc_enc (
+ .data_i(rf_wdata_wb),
+ .data_o(rf_wdata_wb_ecc_o)
+ );
+
+ // ECC checking on register file rdata
+ prim_secded_inv_39_32_dec regfile_ecc_dec_a (
+ .data_i (rf_rdata_a_ecc_i),
+ .data_o (),
+ .syndrome_o(),
+ .err_o (rf_ecc_err_a)
+ );
+ prim_secded_inv_39_32_dec regfile_ecc_dec_b (
+ .data_i (rf_rdata_b_ecc_i),
+ .data_o (),
+ .syndrome_o(),
+ .err_o (rf_ecc_err_b)
+ );
+
+ // Assign read outputs - no error correction, just trigger an alert
+ assign rf_rdata_a = rf_rdata_a_ecc_i[31:0];
+ assign rf_rdata_b = rf_rdata_b_ecc_i[31:0];
+
+ // Calculate errors - qualify with WB forwarding to avoid xprop into the alert signal
+ assign rf_ecc_err_a_id = |rf_ecc_err_a & rf_ren_a & ~rf_rd_a_wb_match;
+ assign rf_ecc_err_b_id = |rf_ecc_err_b & rf_ren_b & ~rf_rd_b_wb_match;
+
+ // Combined error
+ assign rf_ecc_err_comb = instr_valid_id & (rf_ecc_err_a_id | rf_ecc_err_b_id);
+
+ assign rf_trvk_par_o = 7'h0;
+ assign rf_trsv_par_o = 7'h0;
+
+ end else begin : gen_no_regfile_ecc
+
+ logic unused_rf_ren_a, unused_rf_ren_b;
+ logic unused_rf_rd_a_wb_match, unused_rf_rd_b_wb_match;
+
+ assign unused_rf_ren_a = rf_ren_a;
+ assign unused_rf_ren_b = rf_ren_b;
+ assign unused_rf_rd_a_wb_match = rf_rd_a_wb_match;
+ assign unused_rf_rd_b_wb_match = rf_rd_b_wb_match;
+ assign rf_wdata_wb_ecc_o = rf_wdata_wb;
+ assign rf_rdata_a = rf_rdata_a_ecc_i;
+ assign rf_rdata_b = rf_rdata_b_ecc_i;
+ assign rf_ecc_err_comb = 1'b0;
+
+ assign rf_trvk_par_o = 7'h0;
+ assign rf_trsv_par_o = 7'h0;
+end
+
+ ///////////////////////
+ // Crash dump output //
+ ///////////////////////
+
+ assign crash_dump_o.current_pc = pc_id;
+ assign crash_dump_o.next_pc = pc_if;
+ assign crash_dump_o.last_data_addr = lsu_addr_last;
+ assign crash_dump_o.exception_addr = csr_mepc;
+
+ ///////////////////
+ // Alert outputs //
+ ///////////////////
+
+ // Minor alert - core is in a recoverable state
+ // TODO add I$ ECC errors here
+ assign alert_minor_o = 1'b0;
+
+ // Major alert - core is unrecoverable
+ assign alert_major_o = rf_ecc_err_comb | pc_mismatch_alert | csr_shadow_err;
+
+ ////////////////////////
+ // RF (Register File) //
+ ////////////////////////
+`ifdef RVFI
+`endif
+
+
+ /////////////////////////////////////////
+ // CSRs (Control and Status Registers) //
+ /////////////////////////////////////////
+
+ assign csr_wdata = alu_operand_a_ex;
+ assign csr_addr = csr_num_e'(csr_access ? alu_operand_b_ex[11:0] : 12'b0);
+
+ cheriot_cs_registers #(
+ .DbgTriggerEn (DbgTriggerEn),
+ .DbgHwBreakNum (DbgHwBreakNum),
+ .DataIndTiming (DataIndTiming),
+ .DummyInstructions(DummyInstructions),
+ .ShadowCSR (ShadowCSR),
+ .ICache (ICache),
+ .MHPMCounterNum (MHPMCounterNum),
+ .MHPMCounterWidth (MHPMCounterWidth),
+ .PMPEnable (PMPEnable),
+ .PMPGranularity (PMPGranularity),
+ .PMPNumRegions (PMPNumRegions),
+ .RV32E (RV32E),
+ .RV32M (RV32M),
+ .RV32B (RV32B),
+ .CHERIoTEn (CHERIoTEn)
+ ) cs_registers_i (
+ .clk_i (clk_i),
+ .rst_ni(rst_ni),
+
+ .cheri_pmode_i (cheri_pmode_i),
+
+ // Hart ID from outside
+ .hart_id_i (hart_id_i),
+ .priv_mode_id_o (priv_mode_id),
+ .priv_mode_lsu_o(priv_mode_lsu),
+
+ // mtvec
+ .csr_mtvec_o (csr_mtvec),
+ .csr_mtvec_init_i(csr_mtvec_init),
+ .boot_addr_i (boot_addr_i),
+
+ // Interface to CSRs ( SRAM like )
+ .csr_access_i(csr_access),
+ .csr_addr_i (csr_addr),
+ .csr_wdata_i (csr_wdata),
+ .csr_op_i (csr_op),
+ .csr_op_en_i (csr_op_en),
+ .csr_rdata_o (csr_rdata),
+
+ .cheri_csr_access_i (cheri_csr_access),
+ .cheri_csr_addr_i (cheri_csr_addr),
+ .cheri_csr_wdata_i (cheri_csr_wdata),
+ .cheri_csr_wcap_i (cheri_csr_wcap),
+ .cheri_csr_op_i (cheri_csr_op),
+ .cheri_csr_op_en_i (cheri_csr_op_en),
+ .cheri_csr_set_mie_i (cheri_csr_set_mie),
+ .cheri_csr_clr_mie_i (cheri_csr_clr_mie),
+ .cheri_csr_rdata_o (cheri_csr_rdata),
+ .cheri_csr_rcap_o (cheri_csr_rcap),
+
+ .csr_mshwm_o (csr_mshwm),
+ .csr_mshwmb_o (csr_mshwmb),
+ .csr_mshwm_set_i (csr_mshwm_set),
+ .csr_mshwm_new_i (csr_mshwm_new),
+
+ // Interrupt related control signals
+ .irq_software_i (irq_software_i),
+ .irq_timer_i (irq_timer_i),
+ .irq_external_i (irq_external_i),
+ .irq_fast_i (irq_fast_i),
+ .nmi_mode_i (nmi_mode),
+ .irq_pending_o (irq_pending_o),
+ .irqs_o (irqs),
+ .csr_mstatus_mie_o(csr_mstatus_mie),
+ .csr_mstatus_tw_o (csr_mstatus_tw),
+ .csr_mepc_o (csr_mepc),
+
+ // PMP
+ .csr_pmp_cfg_o (csr_pmp_cfg),
+ .csr_pmp_addr_o (csr_pmp_addr),
+ .csr_pmp_mseccfg_o(csr_pmp_mseccfg),
+
+ // debug
+ .csr_depc_o (csr_depc),
+ .debug_mode_i (debug_mode),
+ .debug_cause_i (debug_cause),
+ .debug_csr_save_i (debug_csr_save),
+ .debug_single_step_o(debug_single_step),
+ .debug_ebreakm_o (debug_ebreakm),
+ .debug_ebreaku_o (debug_ebreaku),
+ .trigger_match_o (trigger_match),
+
+ .pc_if_i(pc_if),
+ .pc_id_i(pc_id),
+ .pc_wb_i(pc_wb),
+
+ .data_ind_timing_o (data_ind_timing),
+ .dummy_instr_en_o (dummy_instr_en),
+ .dummy_instr_mask_o (dummy_instr_mask),
+ .dummy_instr_seed_en_o(dummy_instr_seed_en),
+ .dummy_instr_seed_o (dummy_instr_seed),
+ .icache_enable_o (icache_enable),
+ .csr_shadow_err_o (csr_shadow_err),
+
+ .csr_save_if_i (csr_save_if),
+ .csr_save_id_i (csr_save_id),
+ .csr_save_wb_i (csr_save_wb),
+ .csr_restore_mret_i(csr_restore_mret_id),
+ .csr_restore_dret_i(csr_restore_dret_id),
+ .csr_save_cause_i (csr_save_cause),
+ .csr_mepcc_clrtag_i (csr_mepcc_clrtag),
+ .csr_mcause_i (exc_cause),
+ .csr_mtval_i (csr_mtval),
+ .illegal_csr_insn_o(illegal_csr_insn_id),
+
+ .double_fault_seen_o,
+
+ // performance counter related signals
+ .instr_ret_i (perf_instr_ret_wb),
+ .instr_ret_compressed_i (perf_instr_ret_compressed_wb),
+ .instr_ret_spec_i (perf_instr_ret_wb_spec),
+ .instr_ret_compressed_spec_i(perf_instr_ret_compressed_wb_spec),
+ .iside_wait_i (perf_iside_wait),
+ .jump_i (perf_jump),
+ .branch_i (perf_branch),
+ .branch_taken_i (perf_tbranch),
+ .mem_load_i (perf_load),
+ .mem_store_i (perf_store),
+ .dside_wait_i (perf_dside_wait),
+ .mul_wait_i (perf_mul_wait),
+ .div_wait_i (perf_div_wait),
+
+ .cheri_branch_req_i (cheri_branch_req),
+ .cheri_branch_target_i (branch_target_ex_cheri),
+ .pcc_cap_i (pcc_cap_w),
+ .pcc_cap_o (pcc_cap_r),
+ .csr_dbg_tclr_fault_o (csr_dbg_tclr_fault),
+ .cheri_fatal_err_o (cheri_fatal_err_o)
+ );
+
+
+ if (PMPEnable) begin : g_pmp
+ logic [33:0] pmp_req_addr [PMP_NUM_CHAN];
+ pmp_req_e pmp_req_type [PMP_NUM_CHAN];
+ priv_lvl_e pmp_priv_lvl [PMP_NUM_CHAN];
+
+ assign pmp_req_addr[PMP_I] = {2'b00, pc_if};
+ assign pmp_req_type[PMP_I] = PMP_ACC_EXEC;
+ assign pmp_priv_lvl[PMP_I] = priv_mode_id;
+ assign pmp_req_addr[PMP_I2] = {2'b00, (pc_if + 32'd2)};
+ assign pmp_req_type[PMP_I2] = PMP_ACC_EXEC;
+ assign pmp_priv_lvl[PMP_I2] = priv_mode_id;
+ assign pmp_req_addr[PMP_D] = {2'b00, data_addr_o[31:0]};
+ assign pmp_req_type[PMP_D] = data_we_o ? PMP_ACC_WRITE : PMP_ACC_READ;
+ assign pmp_priv_lvl[PMP_D] = priv_mode_lsu;
+
+ cheriot_pmp #(
+ .PMPGranularity(PMPGranularity),
+ .PMPNumChan (PMP_NUM_CHAN),
+ .PMPNumRegions (PMPNumRegions)
+ ) pmp_i (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ // Interface to CSRs
+ .csr_pmp_cfg_i (csr_pmp_cfg),
+ .csr_pmp_addr_i (csr_pmp_addr),
+ .csr_pmp_mseccfg_i(csr_pmp_mseccfg),
+ .priv_mode_i (pmp_priv_lvl),
+ // Access checking channels
+ .pmp_req_addr_i (pmp_req_addr),
+ .pmp_req_type_i (pmp_req_type),
+ .pmp_req_err_o (pmp_req_err)
+ );
+ end else begin : g_no_pmp
+ // Unused signal tieoff
+ priv_lvl_e unused_priv_lvl_ls;
+ logic [33:0] unused_csr_pmp_addr [PMPNumRegions];
+ pmp_cfg_t unused_csr_pmp_cfg [PMPNumRegions];
+ pmp_mseccfg_t unused_csr_pmp_mseccfg;
+ assign unused_priv_lvl_ls = priv_mode_lsu;
+ assign unused_csr_pmp_addr = csr_pmp_addr;
+ assign unused_csr_pmp_cfg = csr_pmp_cfg;
+ assign unused_csr_pmp_mseccfg = csr_pmp_mseccfg;
+
+ // Output tieoff
+ assign pmp_req_err[PMP_I] = 1'b0;
+ assign pmp_req_err[PMP_I2] = 1'b0;
+ assign pmp_req_err[PMP_D] = 1'b0;
+ end
+
+`ifdef RVFI
+ // When writeback stage is present RVFI information is emitted when instruction is finished in
+ // third stage but some information must be captured whilst the instruction is in the second
+ // stage. Without writeback stage RVFI information is all emitted when instruction retires in
+ // second stage. RVFI outputs are all straight from flops. So 2 stage pipeline requires a single
+ // set of flops (instr_info => RVFI_out), 3 stage pipeline requires two sets (instr_info => wb
+ // => RVFI_out)
+ localparam int RVFI_STAGES = WritebackStage ? 2 : 1;
+
+ logic rvfi_stage_valid [RVFI_STAGES];
+ logic [63:0] rvfi_stage_order [RVFI_STAGES];
+ logic [31:0] rvfi_stage_insn [RVFI_STAGES];
+ logic rvfi_stage_trap [RVFI_STAGES];
+ logic rvfi_stage_halt [RVFI_STAGES];
+ logic rvfi_stage_intr [RVFI_STAGES];
+ logic [ 1:0] rvfi_stage_mode [RVFI_STAGES];
+ logic [ 1:0] rvfi_stage_ixl [RVFI_STAGES];
+ logic [ 4:0] rvfi_stage_rs1_addr [RVFI_STAGES];
+ logic [ 4:0] rvfi_stage_rs2_addr [RVFI_STAGES];
+ logic [ 4:0] rvfi_stage_rs3_addr [RVFI_STAGES];
+ logic [31:0] rvfi_stage_rs1_rdata [RVFI_STAGES];
+ logic [31:0] rvfi_stage_rs2_rdata [RVFI_STAGES];
+ logic [31:0] rvfi_stage_rs3_rdata [RVFI_STAGES];
+ reg_cap_t rvfi_stage_rs1_rcap [RVFI_STAGES];
+ reg_cap_t rvfi_stage_rs2_rcap [RVFI_STAGES];
+ logic [ 4:0] rvfi_stage_rd_addr [RVFI_STAGES];
+ logic [31:0] rvfi_stage_rd_wdata [RVFI_STAGES];
+ reg_cap_t rvfi_stage_rd_wcap [RVFI_STAGES];
+ logic [31:0] rvfi_stage_pc_rdata [RVFI_STAGES];
+ logic [31:0] rvfi_stage_pc_wdata [RVFI_STAGES];
+ logic [31:0] rvfi_stage_mem_addr [RVFI_STAGES];
+ logic [ 3:0] rvfi_stage_mem_rmask [RVFI_STAGES];
+ logic [ 3:0] rvfi_stage_mem_wmask [RVFI_STAGES];
+ logic [DataWidth-1:0] rvfi_stage_mem_rdata [RVFI_STAGES];
+ reg_cap_t rvfi_stage_mem_rcap [RVFI_STAGES];
+ logic [DataWidth-1:0] rvfi_stage_mem_wdata [RVFI_STAGES];
+ reg_cap_t rvfi_stage_mem_wcap [RVFI_STAGES];
+ logic rvfi_stage_mem_is_cap [RVFI_STAGES];
+
+ logic rvfi_instr_new_wb;
+ logic rvfi_intr_d;
+ logic rvfi_intr_q;
+ logic rvfi_set_trap_pc_d;
+ logic rvfi_set_trap_pc_q;
+ logic [31:0] rvfi_insn_id;
+ logic [4:0] rvfi_rs1_addr_d;
+ logic [4:0] rvfi_rs1_addr_q;
+ logic [4:0] rvfi_rs2_addr_d;
+ logic [4:0] rvfi_rs2_addr_q;
+ logic [4:0] rvfi_rs3_addr_d;
+ logic [31:0] rvfi_rs1_data_d;
+ logic [31:0] rvfi_rs1_data_q;
+ logic [31:0] rvfi_rs2_data_d;
+ logic [31:0] rvfi_rs2_data_q;
+ reg_cap_t rvfi_rs1_cap_d;
+ reg_cap_t rvfi_rs1_cap_q;
+ reg_cap_t rvfi_rs2_cap_d;
+ reg_cap_t rvfi_rs2_cap_q;
+ reg_cap_t rvfi_rd_cap_d;
+ reg_cap_t rvfi_rd_cap_q;
+ logic [31:0] rvfi_rs3_data_d;
+ logic [4:0] rvfi_rd_addr_wb;
+ logic [4:0] rvfi_rd_addr_q;
+ logic [4:0] rvfi_rd_addr_d;
+ logic [31:0] rvfi_rd_wdata_wb;
+ logic [31:0] rvfi_rd_wdata_d;
+ logic [31:0] rvfi_rd_wdata_q;
+ logic rvfi_rd_we_wb;
+ logic [3:0] rvfi_mem_mask_int;
+ logic [DataWidth-1:0] rvfi_mem_rdata_d;
+ logic [DataWidth-1:0] rvfi_mem_rdata_q;
+ logic [DataWidth-1:0] rvfi_mem_wdata_d;
+ logic [DataWidth-1:0] rvfi_mem_wdata_q;
+ logic [31:0] rvfi_mem_addr_d;
+ logic [31:0] rvfi_mem_addr_q;
+ logic rvfi_mem_is_cap_d;
+ logic rvfi_mem_is_cap_q;
+ reg_cap_t rvfi_mem_rcap_d;
+ reg_cap_t rvfi_mem_rcap_q;
+ reg_cap_t rvfi_mem_wcap_d;
+ reg_cap_t rvfi_mem_wcap_q;
+ logic rvfi_trap_id;
+ logic rvfi_trap_wb;
+ logic [63:0] rvfi_stage_order_d;
+ logic rvfi_id_done;
+ logic rvfi_wb_done;
+
+ logic new_debug_req;
+ logic new_nmi;
+ logic new_irq;
+ cheriot_pkg::irqs_t captured_mip;
+ logic captured_nmi;
+ logic captured_debug_req;
+ logic captured_valid;
+
+ // RVFI extension for co-simulation support
+ // debug_req and MIP captured at IF -> ID transition so one extra stage
+ cheriot_pkg::irqs_t rvfi_ext_stage_mip [RVFI_STAGES+1];
+ logic rvfi_ext_stage_nmi [RVFI_STAGES+1];
+ logic rvfi_ext_stage_debug_req [RVFI_STAGES+1];
+ logic [63:0] rvfi_ext_stage_mcycle [RVFI_STAGES];
+
+ logic rvfi_stage_valid_d [RVFI_STAGES];
+
+ logic insn_c_hint;
+
+ assign rvfi_valid = rvfi_stage_valid [RVFI_STAGES-1];
+ assign rvfi_order = rvfi_stage_order [RVFI_STAGES-1];
+ assign rvfi_insn = rvfi_stage_insn [RVFI_STAGES-1];
+ assign rvfi_trap = rvfi_stage_trap [RVFI_STAGES-1];
+ assign rvfi_halt = rvfi_stage_halt [RVFI_STAGES-1];
+ assign rvfi_intr = rvfi_stage_intr [RVFI_STAGES-1];
+ assign rvfi_mode = rvfi_stage_mode [RVFI_STAGES-1];
+ assign rvfi_ixl = rvfi_stage_ixl [RVFI_STAGES-1];
+ assign rvfi_rs1_addr = rvfi_stage_rs1_addr [RVFI_STAGES-1];
+ assign rvfi_rs2_addr = rvfi_stage_rs2_addr [RVFI_STAGES-1];
+ assign rvfi_rs3_addr = rvfi_stage_rs3_addr [RVFI_STAGES-1];
+ assign rvfi_rs1_rdata = rvfi_stage_rs1_rdata[RVFI_STAGES-1];
+ assign rvfi_rs2_rdata = rvfi_stage_rs2_rdata[RVFI_STAGES-1];
+ assign rvfi_rs1_rcap = rvfi_stage_rs1_rcap [RVFI_STAGES-1];
+ assign rvfi_rs2_rcap = rvfi_stage_rs2_rcap [RVFI_STAGES-1];
+ assign rvfi_rs3_rdata = rvfi_stage_rs3_rdata[RVFI_STAGES-1];
+ assign rvfi_rd_wdata = rvfi_stage_rd_wdata [RVFI_STAGES-1];
+ assign rvfi_rd_wcap = rvfi_stage_rd_wcap [RVFI_STAGES-1];
+ assign rvfi_pc_rdata = rvfi_stage_pc_rdata [RVFI_STAGES-1];
+ assign rvfi_pc_wdata = rvfi_stage_pc_wdata [RVFI_STAGES-1];
+ assign rvfi_mem_addr = rvfi_stage_mem_addr [RVFI_STAGES-1];
+ assign rvfi_mem_rmask = rvfi_stage_mem_rmask[RVFI_STAGES-1];
+ assign rvfi_mem_wmask = rvfi_stage_mem_wmask[RVFI_STAGES-1];
+ assign rvfi_mem_rdata = rvfi_stage_mem_rdata[RVFI_STAGES-1];
+ assign rvfi_mem_wdata = rvfi_stage_mem_wdata[RVFI_STAGES-1];
+ assign rvfi_mem_is_cap = rvfi_stage_mem_is_cap[RVFI_STAGES-1];
+ assign rvfi_mem_rcap = rvfi_stage_mem_rcap[RVFI_STAGES-1];
+ assign rvfi_mem_wcap = rvfi_stage_mem_wcap[RVFI_STAGES-1];
+
+ // for HINT instructions like c.srai64/c.slli64, force rvfi_rd_addr output to 0 to match sail implementation
+ assign rvfi_rd_addr = insn_c_hint ? 0 : rvfi_stage_rd_addr [RVFI_STAGES-1];
+
+ always_comb begin
+ if ((rvfi_insn[1:0] == 2'b01) && (rvfi_insn[15:13] == 3'b100) && (rvfi_insn[11:10] == 2'b00) && // c.srli64
+ ({rvfi_insn[12], rvfi_insn[6:2]} == 6'h0) &&
+ (rvfi_rs1_addr == rvfi_rd_addr) && (rvfi_rs1_rdata == rvfi_rd_wdata))
+ insn_c_hint = 1'b1;
+ else if ((rvfi_insn[1:0] == 2'b01) && (rvfi_insn[15:13] == 3'b100) && (rvfi_insn[11:10] == 2'b01) && // c.srai64
+ ({rvfi_insn[12], rvfi_insn[6:2]} == 6'h0) &&
+ (rvfi_rs1_addr == rvfi_rd_addr) && (rvfi_rs1_rdata == rvfi_rd_wdata))
+ insn_c_hint = 1'b1;
+ else
+ insn_c_hint = 1'b0;
+
+
+ end
+
+ assign rvfi_rd_addr_wb = rf_waddr_wb;
+ assign rvfi_rd_wdata_wb = rf_we_wb ? rf_wdata_wb : rf_wdata_lsu; // this doesn't look right but ok
+ assign rvfi_rd_we_wb = rf_we_wb | rf_we_lsu;
+
+ always_comb begin
+ // Use always_comb instead of continuous assign so first assign can set 0 as default everywhere
+ // that is overridden by more specific settings.
+ rvfi_ext_mip = '0;
+ rvfi_ext_mip[CSR_MSIX_BIT] = rvfi_ext_stage_mip[RVFI_STAGES].irq_software;
+ rvfi_ext_mip[CSR_MTIX_BIT] = rvfi_ext_stage_mip[RVFI_STAGES].irq_timer;
+ rvfi_ext_mip[CSR_MEIX_BIT] = rvfi_ext_stage_mip[RVFI_STAGES].irq_external;
+ rvfi_ext_mip[CSR_MFIX_BIT_HIGH:CSR_MFIX_BIT_LOW] = rvfi_ext_stage_mip[RVFI_STAGES].irq_fast;
+ end
+
+ assign rvfi_ext_nmi = rvfi_ext_stage_nmi[RVFI_STAGES];
+ assign rvfi_ext_debug_req = rvfi_ext_stage_debug_req[RVFI_STAGES];
+ assign rvfi_ext_mcycle = rvfi_ext_stage_mcycle[RVFI_STAGES-1];
+
+ // When an instruction takes a trap the `rvfi_trap` signal will be set. Instructions that take
+ // traps flush the pipeline so ordinarily wouldn't be seen to be retire. The RVFI tracking
+ // pipeline is kept going for flushed instructions that trapped so they are still visible on the
+ // RVFI interface.
+
+ // Factor in exceptions taken in ID so RVFI tracking picks up flushed instructions that took
+ // a trap
+ // kliu 05082024: add the ~wb_exception_o iterm to handle the corner case where
+ // ID and WB both faulted, e.g., illegal_insn in ID and cheri_wb_err in WB
+ // The previous behavior is 2 rvfi items in the trace (both traps),
+ // even if the instruction in the ID is never executed.
+ // The new behavior only generate 1 rvfi item for wb stage fault
+ assign rvfi_id_done = instr_id_done | (id_stage_i.controller_i.rvfi_flush_next &
+ id_stage_i.controller_i.id_exception_o &
+ ~id_stage_i.controller_i.wb_exception_o);
+
+ if (WritebackStage) begin : gen_rvfi_wb_stage
+ logic unused_instr_new_id;
+
+ assign unused_instr_new_id = instr_new_id;
+
+ // With writeback stage first RVFI stage buffers instruction information captured in ID/EX
+ // awaiting instruction retirement and RF Write data/Mem read data whilst instruction is in WB
+ // So first stage becomes valid when instruction leaves ID/EX stage and remains valid until
+ // instruction leaves WB
+ assign rvfi_stage_valid_d[0] = (rvfi_id_done & ~dummy_instr_id) |
+ (rvfi_stage_valid[0] & ~rvfi_wb_done);
+ // Second stage is output stage so simple valid cycle after instruction leaves WB (and so has
+ // retired)
+ assign rvfi_stage_valid_d[1] = rvfi_wb_done;
+
+ // Signal new instruction in WB cycle after instruction leaves ID/EX (to enter WB)
+ logic rvfi_instr_new_wb_q;
+
+ // Signal new instruction in WB either when one has just entered or when a trap is progressing
+ // through the tracking pipeline
+ assign rvfi_instr_new_wb = rvfi_instr_new_wb_q | (rvfi_stage_valid[0] & rvfi_stage_trap[0]);
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rvfi_instr_new_wb_q <= 0;
+ end else begin
+ rvfi_instr_new_wb_q <= rvfi_id_done;
+ end
+ end
+
+ assign rvfi_trap_id = id_stage_i.controller_i.id_exception_o;
+ assign rvfi_trap_wb = id_stage_i.controller_i.exc_req_wb ;
+ // WB is instantly done in the tracking pipeline when a trap is progress through the pipeline
+ assign rvfi_wb_done = instr_done_wb | (rvfi_stage_valid[0] & rvfi_stage_trap[0]);
+ end else begin : gen_rvfi_no_wb_stage
+ // Without writeback stage first RVFI stage is output stage so simply valid the cycle after
+ // instruction leaves ID/EX (and so has retired)
+ assign rvfi_stage_valid_d[0] = rvfi_id_done & ~dummy_instr_id;
+ // Without writeback stage signal new instr_new_wb when instruction enters ID/EX to correctly
+ // setup register write signals
+ assign rvfi_instr_new_wb = instr_new_id;
+ assign rvfi_trap_id = id_stage_i.controller_i.exc_req_d | id_stage_i.controller_i.exc_req_lsu;
+ assign rvfi_trap_wb = 1'b0;
+ assign rvfi_wb_done = instr_done_wb;
+ end
+
+ assign rvfi_stage_order_d = dummy_instr_id ? rvfi_stage_order[0] : rvfi_stage_order[0] + 64'd1;
+
+ // For interrupts and debug Ibex will take the relevant trap as soon as whatever instruction in ID
+ // finishes or immediately if the ID stage is empty. The rvfi_ext interface provides the DV
+ // environment with information about the irq/debug_req/nmi state that applies to a particular
+ // instruction.
+ //
+ // When a irq/debug_req/nmi appears the ID stage will finish whatever instruction it is currently
+ // executing (if any) then take the trap the cycle after that instruction leaves the ID stage. The
+ // trap taken depends upon the state of irq/debug_req/nmi on that cycle. In the cycles following
+ // that before the first instruction of the trap handler enters the ID stage the state of
+ // irq/debug_req/nmi could change but this has no effect on the trap handler (e.g. a higher
+ // priority interrupt might appear but this wouldn't stop the lower priority interrupt trap
+ // handler executing first as it's already being fetched). To provide the DV environment with the
+ // correct information for it to verify execution we need to capture the irq/debug_req/nmi state
+ // the cycle the trap decision is made. Which the captured_X signals below do.
+ //
+ // The new_X signals take the raw irq/debug_req/nmi inputs and factor in the enable terms required
+ // to determine if a trap will actually happen.
+ //
+ // These signals and the comment above are referred to in the documentation (cosim.rst). If
+ // altering the names or meanings of these signals or this comment please adjust the documentation
+ // appropriately.
+ assign new_debug_req = (debug_req_i & ~debug_mode);
+ assign new_nmi = irq_nm_i & ~nmi_mode & ~debug_mode;
+ assign new_irq = irq_pending_o & csr_mstatus_mie & ~nmi_mode & ~debug_mode;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ captured_valid <= 1'b0;
+ captured_mip <= '0;
+ captured_nmi <= 1'b0;
+ captured_debug_req <= 1'b0;
+ end else begin
+ // Capture when ID stage has emptied out and something occurs that will cause a trap and we
+ // haven't yet captured
+ if (~instr_valid_id & (new_debug_req | new_irq | new_nmi) & ~captured_valid) begin
+ captured_valid <= 1'b1;
+ captured_nmi <= irq_nm_i;
+ captured_mip <= cs_registers_i.mip;
+ captured_debug_req <= debug_req_i;
+ end
+
+ // Capture cleared out as soon as a new instruction appears in ID
+ if (if_stage_i.instr_valid_id_d) begin
+ captured_valid <= 1'b0;
+ end
+ end
+ end
+
+ // Pass the captured irq/debug_req/nmi state to the rvfi_ext interface tracking pipeline.
+ //
+ // To correctly capture we need to factor in various enable terms, should there be a fault in this
+ // logic we won't tell the DV environment about a trap that should have been taken. So if there's
+ // no valid capture we grab the raw values of the irq/debug_req/nmi inputs whatever they are and
+ // the DV environment will see if a trap should have been taken but wasn't.
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rvfi_ext_stage_mip[0] <= '0;
+ rvfi_ext_stage_nmi[0] <= '0;
+ rvfi_ext_stage_debug_req[0] <= '0;
+ end else if (if_stage_i.instr_valid_id_d & if_stage_i.instr_new_id_d) begin
+ rvfi_ext_stage_mip[0] <= instr_valid_id | ~captured_valid ? cs_registers_i.mip :
+ captured_mip;
+ rvfi_ext_stage_nmi[0] <= instr_valid_id | ~captured_valid ? irq_nm_i :
+ captured_nmi;
+ rvfi_ext_stage_debug_req[0] <= instr_valid_id | ~captured_valid ? debug_req_i :
+ captured_debug_req;
+ end
+ end
+
+ logic is_mem_rd, is_mem_wr;
+ assign is_mem_rd = lsu_req & ~lsu_we;
+ assign is_mem_wr = lsu_req & lsu_we;
+
+ for (genvar i = 0; i < RVFI_STAGES; i = i + 1) begin : g_rvfi_stages
+ int im1;
+
+ // this is just to get rid of the VCS elab warning (i-1 out of range when i==0)
+ if (i == 0)
+ assign im1 = 0;
+ else
+ assign im1 = i-1;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rvfi_stage_halt[i] <= '0;
+ rvfi_stage_trap[i] <= '0;
+ rvfi_stage_intr[i] <= '0;
+ rvfi_stage_order[i] <= '0;
+ rvfi_stage_insn[i] <= '0;
+ rvfi_stage_mode[i] <= {PRIV_LVL_M};
+ rvfi_stage_ixl[i] <= CSR_MISA_MXL;
+ rvfi_stage_rs1_addr[i] <= '0;
+ rvfi_stage_rs2_addr[i] <= '0;
+ rvfi_stage_rs3_addr[i] <= '0;
+ rvfi_stage_pc_rdata[i] <= '0;
+ rvfi_stage_pc_wdata[i] <= '0;
+ rvfi_stage_mem_rmask[i] <= '0;
+ rvfi_stage_mem_wmask[i] <= '0;
+ rvfi_stage_valid[i] <= '0;
+ rvfi_stage_rs1_rdata[i] <= '0;
+ rvfi_stage_rs2_rdata[i] <= '0;
+ rvfi_stage_rs3_rdata[i] <= '0;
+ rvfi_stage_rs1_rcap[i] <= NULL_REG_CAP;
+ rvfi_stage_rs2_rcap[i] <= NULL_REG_CAP;;
+ rvfi_stage_rd_wdata[i] <= '0;
+ rvfi_stage_rd_wcap[i] <= NULL_REG_CAP;;
+ rvfi_stage_rd_addr[i] <= '0;
+ rvfi_stage_mem_rdata[i] <= '0;
+ rvfi_stage_mem_wdata[i] <= '0;
+ rvfi_stage_mem_addr[i] <= '0;
+ rvfi_ext_stage_mip[i+1] <= '0;
+ rvfi_ext_stage_nmi[i+1] <= '0;
+ rvfi_ext_stage_debug_req[i+1] <= '0;
+ rvfi_ext_stage_mcycle[i] <= '0;
+ end else begin
+ rvfi_stage_valid[i] <= rvfi_stage_valid_d[i];
+
+ if (i == 0) begin
+ if (rvfi_id_done) begin
+ rvfi_stage_halt[i] <= '0;
+ // TODO: Sort this out for writeback stage
+ rvfi_stage_trap[i] <= rvfi_trap_id;
+ rvfi_stage_intr[i] <= rvfi_intr_d;
+ rvfi_stage_order[i] <= rvfi_stage_order_d;
+ rvfi_stage_insn[i] <= rvfi_insn_id;
+ rvfi_stage_mode[i] <= {priv_mode_id};
+ rvfi_stage_ixl[i] <= CSR_MISA_MXL;
+ rvfi_stage_rs1_addr[i] <= rvfi_rs1_addr_d;
+ rvfi_stage_rs2_addr[i] <= rvfi_rs2_addr_d;
+ rvfi_stage_rs3_addr[i] <= rvfi_rs3_addr_d;
+ rvfi_stage_pc_rdata[i] <= pc_id;
+ rvfi_stage_pc_wdata[i] <= pc_set ? branch_target_ex : pc_if;
+ rvfi_stage_mem_rmask[i] <= is_mem_rd ? rvfi_mem_mask_int : 4'b0000; // kliu
+ rvfi_stage_mem_wmask[i] <= is_mem_wr ? rvfi_mem_mask_int : 4'b0000;
+ rvfi_stage_rs1_rdata[i] <= rvfi_rs1_data_d;
+ rvfi_stage_rs2_rdata[i] <= rvfi_rs2_data_d;
+ rvfi_stage_rs3_rdata[i] <= rvfi_rs3_data_d;
+ rvfi_stage_rs1_rcap[i] <= rvfi_rs1_cap_d;
+ rvfi_stage_rs2_rcap[i] <= rvfi_rs2_cap_d;
+ rvfi_stage_rd_addr[i] <= rvfi_rd_addr_d;
+ rvfi_stage_rd_wdata[i] <= rvfi_rd_wdata_d;
+ rvfi_stage_rd_wcap[i] <= rvfi_rd_cap_d;
+ rvfi_stage_mem_rdata[i] <= rvfi_mem_rdata_d;
+ rvfi_stage_mem_wdata[i] <= rvfi_mem_wdata_d;
+ rvfi_stage_mem_rcap[i] <= rvfi_mem_rcap_d;
+ rvfi_stage_mem_wcap[i] <= rvfi_mem_wcap_d;
+ rvfi_stage_mem_is_cap[i] <= rvfi_mem_is_cap_d;
+ rvfi_stage_mem_addr[i] <= rvfi_mem_addr_d;
+ rvfi_ext_stage_mip[i+1] <= rvfi_ext_stage_mip[i];
+ rvfi_ext_stage_nmi[i+1] <= rvfi_ext_stage_nmi[i];
+ rvfi_ext_stage_debug_req[i+1] <= rvfi_ext_stage_debug_req[i];
+ rvfi_ext_stage_mcycle[i] <= cs_registers_i.mcycle_counter_i.counter_val_o;
+ end
+ end else begin
+ if (rvfi_wb_done) begin
+ rvfi_stage_halt[i] <= rvfi_stage_halt[im1];
+ rvfi_stage_trap[i] <= rvfi_stage_trap[im1] | rvfi_trap_wb;
+ rvfi_stage_intr[i] <= rvfi_stage_intr[im1];
+ rvfi_stage_order[i] <= rvfi_stage_order[im1];
+ rvfi_stage_insn[i] <= rvfi_stage_insn[im1];
+ rvfi_stage_mode[i] <= rvfi_stage_mode[im1];
+ rvfi_stage_ixl[i] <= rvfi_stage_ixl[im1];
+ rvfi_stage_rs1_addr[i] <= rvfi_stage_rs1_addr[im1];
+ rvfi_stage_rs2_addr[i] <= rvfi_stage_rs2_addr[im1];
+ rvfi_stage_rs3_addr[i] <= rvfi_stage_rs3_addr[im1];
+ rvfi_stage_pc_rdata[i] <= rvfi_stage_pc_rdata[im1];
+ rvfi_stage_pc_wdata[i] <= rvfi_stage_pc_wdata[im1];
+ rvfi_stage_mem_rmask[i] <= rvfi_trap_wb ? 4'b0000 : rvfi_stage_mem_rmask[im1];
+ rvfi_stage_mem_wmask[i] <= rvfi_trap_wb ? 4'b0000 : rvfi_stage_mem_wmask[im1];
+ rvfi_stage_rs1_rdata[i] <= rvfi_stage_rs1_rdata[im1];
+ rvfi_stage_rs2_rdata[i] <= rvfi_stage_rs2_rdata[im1];
+ rvfi_stage_rs3_rdata[i] <= rvfi_stage_rs3_rdata[im1];
+ rvfi_stage_mem_wdata[i] <= rvfi_stage_mem_wdata[im1];
+ rvfi_stage_mem_is_cap[i] <= rvfi_stage_mem_is_cap[im1];
+ rvfi_stage_mem_wcap[i] <= rvfi_stage_mem_wcap[im1];
+ rvfi_stage_mem_addr[i] <= rvfi_stage_mem_addr[im1];
+ rvfi_stage_rs1_rcap[i] <= rvfi_stage_rs1_rcap[im1];
+ rvfi_stage_rs2_rcap[i] <= rvfi_stage_rs2_rcap[im1];
+
+ // For 2 RVFI_STAGES/Writeback Sor 2 Rtage ignore first stage flops for rd_addr, rd_wdata and
+ // mem_rdata. For RF write addr/data actual write happens in writeback so capture
+ // address/data there. For mem_rdata that is only available from the writeback stage.
+ // Previous stage flops still exist in RTL as they are used by the non writeback config
+ rvfi_stage_rd_addr[i] <= rvfi_rd_addr_d;
+ rvfi_stage_rd_wdata[i] <= rvfi_rd_wdata_d;
+ rvfi_stage_mem_rdata[i] <= rvfi_mem_rdata_d;
+ rvfi_stage_mem_rcap[i] <= rvfi_mem_rcap_d;
+ rvfi_stage_rd_wcap[i] <= rvfi_rd_cap_d;
+
+ rvfi_ext_stage_mip[i+1] <= rvfi_ext_stage_mip[i];
+ rvfi_ext_stage_nmi[i+1] <= rvfi_ext_stage_nmi[i];
+ rvfi_ext_stage_debug_req[i+1] <= rvfi_ext_stage_debug_req[i];
+ rvfi_ext_stage_mcycle[i] <= rvfi_ext_stage_mcycle[im1];
+ end
+ end
+ end
+ end
+ end
+
+ // Memory adddress/write data available first cycle of ld/st instruction from register read
+ always_comb begin
+ if (~CheriTBRE & instr_first_cycle_id) begin
+ // rvfi_mem_addr_d = alu_adder_result_ex;
+ rvfi_mem_addr_d = lsu_addr;
+ rvfi_mem_wdata_d = lsu_wdata;
+ rvfi_mem_wcap_d = lsu_wcap;
+ rvfi_mem_is_cap_d = lsu_is_cap;
+ end else if (CheriTBRE & lsu_req & cpu_lsu_dec & ~lsu_addr_incr_req) begin
+ rvfi_mem_addr_d = lsu_addr;
+ rvfi_mem_wdata_d = lsu_wdata;
+ rvfi_mem_wcap_d = lsu_wcap;
+ rvfi_mem_is_cap_d = lsu_is_cap;
+ end else begin
+ rvfi_mem_addr_d = rvfi_mem_addr_q;
+ rvfi_mem_wdata_d = rvfi_mem_wdata_q;
+ rvfi_mem_wcap_d = rvfi_mem_wcap_q;
+ rvfi_mem_is_cap_d = rvfi_mem_is_cap_q;
+ end
+ end
+
+ // Capture read data from LSU when it becomes valid
+ always_comb begin
+ if (load_store_unit_i.resp_is_cap_q & lsu_resp_valid) begin
+ rvfi_mem_rdata_d = rf_wdata_lsu;
+ rvfi_mem_rcap_d = rf_wcap_lsu;
+ end else if (lsu_resp_valid) begin
+ rvfi_mem_rdata_d = rf_wdata_lsu;
+ rvfi_mem_rcap_d = rvfi_mem_rcap_q;
+ end else begin
+ rvfi_mem_rdata_d = rvfi_mem_rdata_q;
+ rvfi_mem_rcap_d = rvfi_mem_rcap_q;
+ end
+ end
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rvfi_mem_addr_q <= '0;
+ rvfi_mem_rdata_q <= '0;
+ rvfi_mem_wdata_q <= '0;
+ rvfi_mem_rcap_q <= NULL_REG_CAP;
+ rvfi_mem_wcap_q <= NULL_REG_CAP;
+ rvfi_mem_is_cap_q <= 1'b0;
+ end else begin
+ rvfi_mem_addr_q <= rvfi_mem_addr_d;
+ rvfi_mem_rdata_q <= rvfi_mem_rdata_d;
+ rvfi_mem_wdata_q <= rvfi_mem_wdata_d;
+ rvfi_mem_rcap_q <= rvfi_mem_rcap_d;
+ rvfi_mem_wcap_q <= rvfi_mem_wcap_d;
+ rvfi_mem_is_cap_q <=rvfi_mem_is_cap_d;
+ end
+ end
+ // Byte enable based on data type
+ always_comb begin
+ unique case (lsu_type)
+ 2'b00: rvfi_mem_mask_int = 4'b1111;
+ 2'b01: rvfi_mem_mask_int = 4'b0011;
+ 2'b10: rvfi_mem_mask_int = 4'b0001;
+ 2'b11: rvfi_mem_mask_int = 4'b0001; // kliu
+ default: rvfi_mem_mask_int = 4'b0000;
+ endcase
+ end
+
+ always_comb begin
+ if (instr_is_compressed_id) begin
+ rvfi_insn_id = {16'b0, instr_rdata_c_id};
+ end else begin
+ rvfi_insn_id = instr_rdata_id;
+ end
+ end
+
+ // Source registers 1 and 2 are read in the first instruction cycle
+ // Source register 3 is read in the second instruction cycle.
+ if (CHERIoTEn) begin
+ always_comb begin
+ if (instr_first_cycle_id) begin
+ rvfi_rs1_cap_d = rf_ren_a ? g_cheri_ex.u_cheri_ex.rf_rcap_a : NULL_REG_CAP;
+ rvfi_rs2_cap_d = rf_ren_b ? g_cheri_ex.u_cheri_ex.rf_rcap_b : NULL_REG_CAP;
+ end else begin
+ rvfi_rs1_cap_d = rvfi_rs1_cap_q;
+ rvfi_rs2_cap_d = rvfi_rs2_cap_q;
+ end
+ end
+ end else begin
+ assign rvfi_rs1_cap_d = NULL_REG_CAP;
+ assign rvfi_rs2_cap_d = NULL_REG_CAP;
+ end
+
+ always_comb begin
+ if (instr_first_cycle_id) begin
+ rvfi_rs1_data_d = rf_ren_a ? multdiv_operand_a_ex : '0;
+ rvfi_rs1_addr_d = rf_ren_a ? rf_raddr_a : '0;
+ rvfi_rs2_data_d = rf_ren_b ? multdiv_operand_b_ex : '0;
+ rvfi_rs2_addr_d = rf_ren_b ? rf_raddr_b : '0;
+ rvfi_rs3_data_d = '0;
+ rvfi_rs3_addr_d = '0;
+ end else begin
+ rvfi_rs1_data_d = rvfi_rs1_data_q;
+ rvfi_rs1_addr_d = rvfi_rs1_addr_q;
+ rvfi_rs2_data_d = rvfi_rs2_data_q;
+ rvfi_rs2_addr_d = rvfi_rs2_addr_q;
+ rvfi_rs3_data_d = multdiv_operand_a_ex;
+ rvfi_rs3_addr_d = rf_raddr_a;
+ end
+ end
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rvfi_rs1_data_q <= '0;
+ rvfi_rs1_addr_q <= '0;
+ rvfi_rs2_data_q <= '0;
+ rvfi_rs2_addr_q <= '0;
+ rvfi_rs1_cap_q <= NULL_REG_CAP;
+ rvfi_rs2_cap_q <= NULL_REG_CAP;
+ end else begin
+ rvfi_rs1_data_q <= rvfi_rs1_data_d;
+ rvfi_rs1_addr_q <= rvfi_rs1_addr_d;
+ rvfi_rs2_data_q <= rvfi_rs2_data_d;
+ rvfi_rs2_addr_q <= rvfi_rs2_addr_d;
+ rvfi_rs1_cap_q <= rvfi_rs1_cap_d;
+ rvfi_rs2_cap_q <= rvfi_rs2_cap_d;
+ end
+ end
+
+ always_comb begin
+ if (rvfi_rd_we_wb) begin
+ // Capture address/data of write to register file
+ rvfi_rd_addr_d = rvfi_rd_addr_wb;
+ // If writing to x0 zero write data as required by RVFI specification
+ if (rvfi_rd_addr_wb == 5'b0) begin
+ rvfi_rd_wdata_d = '0;
+ rvfi_rd_cap_d = NULL_REG_CAP;
+ end else begin
+ rvfi_rd_wdata_d = rvfi_rd_wdata_wb;
+ rvfi_rd_cap_d = rf_wcap_wb;
+ end
+ end else if (rvfi_instr_new_wb) begin
+ // If no RF write but new instruction in Writeback (when present) or ID/EX (when no writeback
+ // stage present) then zero RF write address/data as required by RVFI specification
+ rvfi_rd_addr_d = '0;
+ rvfi_rd_wdata_d = '0;
+ rvfi_rd_cap_d = NULL_REG_CAP;
+ end else begin
+ // Otherwise maintain previous value
+ rvfi_rd_addr_d = rvfi_rd_addr_q;
+ rvfi_rd_wdata_d = rvfi_rd_wdata_q;
+ rvfi_rd_cap_d = rvfi_rd_cap_q;
+ end
+ end
+
+ // RD write register is refreshed only once per cycle and
+ // then it is kept stable for the cycle.
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rvfi_rd_addr_q <= '0;
+ rvfi_rd_wdata_q <= '0;
+ rvfi_rd_cap_q <= NULL_REG_CAP;
+ end else begin
+ rvfi_rd_addr_q <= rvfi_rd_addr_d;
+ rvfi_rd_wdata_q <= rvfi_rd_wdata_d;
+ rvfi_rd_cap_q <= rvfi_rd_cap_d;
+ end
+ end
+
+ // rvfi_intr must be set for first instruction that is part of a trap handler.
+ // On the first cycle of a new instruction see if a trap PC was set by the previous instruction,
+ // otherwise maintain value.
+ assign rvfi_intr_d = instr_first_cycle_id ? rvfi_set_trap_pc_q : rvfi_intr_q;
+
+ always_comb begin
+ rvfi_set_trap_pc_d = rvfi_set_trap_pc_q;
+
+ //if (pc_set && pc_mux_id == PC_EXC && // kliu - interrupt only
+ // (exc_pc_mux_id == EXC_PC_EXC || exc_pc_mux_id == EXC_PC_IRQ)) begin
+ if (pc_set && pc_mux_id == PC_EXC && (exc_pc_mux_id == EXC_PC_IRQ)) begin
+ // PC is set to enter a trap handler
+ rvfi_set_trap_pc_d = 1'b1;
+ end else if (rvfi_set_trap_pc_q && rvfi_id_done) begin
+ // first instruction has been executed after PC is set to trap handler
+ rvfi_set_trap_pc_d = 1'b0;
+ end
+ end
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rvfi_set_trap_pc_q <= 1'b0;
+ rvfi_intr_q <= 1'b0;
+ end else begin
+ rvfi_set_trap_pc_q <= rvfi_set_trap_pc_d;
+ rvfi_intr_q <= rvfi_intr_d;
+ end
+ end
+
+`else
+ logic unused_instr_new_id, unused_instr_id_done, unused_instr_done_wb;
+ assign unused_instr_id_done = instr_id_done;
+ assign unused_instr_new_id = instr_new_id;
+ assign unused_instr_done_wb = instr_done_wb;
+`endif
+
+ // Certain parameter combinations are not supported
+ `ASSERT_INIT(IllegalParamSecure, !(SecureIbex && (RV32M == RV32MNone)))
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_counter.sv b/hw/ip/cheriot-ibex/rtl/cheriot_counter.sv
new file mode 100644
index 0000000..f574eff
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_counter.sv
@@ -0,0 +1,99 @@
+module cheriot_counter #(
+ parameter int CounterWidth = 32,
+ // When set `counter_val_upd_o` provides an incremented version of the counter value, otherwise
+ // the output is hard-wired to 0. This is required to allow Xilinx DSP inference to work
+ // correctly. When `ProvideValUpd` is set no DSPs are inferred.
+ parameter bit ProvideValUpd = 0
+) (
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic counter_inc_i,
+ input logic counterh_we_i,
+ input logic counter_we_i,
+ input logic [31:0] counter_val_i,
+ output logic [63:0] counter_val_o,
+ output logic [63:0] counter_val_upd_o
+);
+
+ logic [63:0] counter;
+ logic [CounterWidth-1:0] counter_upd;
+ logic [63:0] counter_load;
+ logic we;
+ logic [CounterWidth-1:0] counter_d;
+
+ // Increment
+ assign counter_upd = counter[CounterWidth-1:0] + {{CounterWidth - 1{1'b0}}, 1'b1};
+
+ // Update
+ always_comb begin
+ // Write
+ we = counter_we_i | counterh_we_i;
+ counter_load[63:32] = counter[63:32];
+ counter_load[31:0] = counter_val_i;
+ if (counterh_we_i) begin
+ counter_load[63:32] = counter_val_i;
+ counter_load[31:0] = counter[31:0];
+ end
+
+ // Next value logic
+ if (we) begin
+ counter_d = counter_load[CounterWidth-1:0];
+ end else if (counter_inc_i) begin
+ counter_d = counter_upd[CounterWidth-1:0];
+ end else begin
+ counter_d = counter[CounterWidth-1:0];
+ end
+ end
+
+`ifdef FPGA_XILINX
+ // Set DSP pragma for supported xilinx FPGAs
+ localparam int DspPragma = CounterWidth < 49 ? "yes" : "no";
+ (* use_dsp = DspPragma *) logic [CounterWidth-1:0] counter_q;
+
+ // DSP output register requires synchronous reset.
+ `define COUNTER_FLOP_RST posedge clk_i
+`else
+ logic [CounterWidth-1:0] counter_q;
+
+ `define COUNTER_FLOP_RST posedge clk_i or negedge rst_ni
+`endif
+
+ // Counter flop
+ always_ff @(`COUNTER_FLOP_RST) begin
+ if (!rst_ni) begin
+ counter_q <= '0;
+ end else begin
+ counter_q <= counter_d;
+ end
+ end
+
+ if (CounterWidth < 64) begin : g_counter_narrow
+ logic [63:CounterWidth] unused_counter_load;
+
+ assign counter[CounterWidth-1:0] = counter_q;
+ assign counter[63:CounterWidth] = '0;
+
+ if (ProvideValUpd) begin : g_counter_val_upd_o
+ assign counter_val_upd_o[CounterWidth-1:0] = counter_upd;
+ end else begin : g_no_counter_val_upd_o
+ assign counter_val_upd_o[CounterWidth-1:0] = '0;
+ end
+ assign counter_val_upd_o[63:CounterWidth] = '0;
+ assign unused_counter_load = counter_load[63:CounterWidth];
+ end else begin : g_counter_full
+ assign counter = counter_q;
+
+ if (ProvideValUpd) begin : g_counter_val_upd_o
+ assign counter_val_upd_o = counter_upd;
+ end else begin : g_no_counter_val_upd_o
+ assign counter_val_upd_o = '0;
+ end
+ end
+
+ assign counter_val_o = counter;
+
+endmodule
+
+// Keep helper defines file-local.
+`undef COUNTER_FLOP_RST
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_cs_registers.sv b/hw/ip/cheriot-ibex/rtl/cheriot_cs_registers.sv
new file mode 100644
index 0000000..c7e91dd
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_cs_registers.sv
@@ -0,0 +1,1998 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Control and Status Registers
+ *
+ * Control and Status Registers (CSRs) following the RISC-V Privileged
+ * Specification, draft version 1.11
+ */
+
+`include "prim_assert.sv"
+
+module cheriot_cs_registers import cheri_pkg::*; #(
+ parameter bit DbgTriggerEn = 0,
+ parameter int unsigned DbgHwBreakNum = 1,
+ parameter bit DataIndTiming = 1'b0,
+ parameter bit DummyInstructions = 1'b0,
+ parameter bit ShadowCSR = 1'b0,
+ parameter bit ICache = 1'b0,
+ parameter int unsigned MHPMCounterNum = 10,
+ parameter int unsigned MHPMCounterWidth = 40,
+ parameter bit PMPEnable = 0,
+ parameter int unsigned PMPGranularity = 0,
+ parameter int unsigned PMPNumRegions = 4,
+ parameter bit RV32E = 0,
+ parameter cheriot_pkg::rv32m_e RV32M = cheriot_pkg::RV32MFast,
+ parameter cheriot_pkg::rv32b_e RV32B = cheriot_pkg::RV32BNone,
+ parameter bit CHERIoTEn = 1'b1
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic cheri_pmode_i,
+ // Hart ID
+ input logic [31:0] hart_id_i,
+
+ // Privilege mode
+ output cheriot_pkg::priv_lvl_e priv_mode_id_o,
+ output cheriot_pkg::priv_lvl_e priv_mode_lsu_o,
+ output logic csr_mstatus_tw_o,
+
+ // mtvec
+ output logic [31:0] csr_mtvec_o,
+ input logic csr_mtvec_init_i,
+ input logic [31:0] boot_addr_i,
+
+ // Interface to registers (SRAM like)
+ input logic csr_access_i,
+ input cheriot_pkg::csr_num_e csr_addr_i,
+ input logic [31:0] csr_wdata_i,
+ input cheriot_pkg::csr_op_e csr_op_i,
+ input csr_op_en_i,
+ output logic [31:0] csr_rdata_o,
+
+ input logic cheri_csr_access_i,
+ input logic [4:0] cheri_csr_addr_i,
+ input logic [31:0] cheri_csr_wdata_i,
+ input reg_cap_t cheri_csr_wcap_i,
+ input cheri_csr_op_e cheri_csr_op_i,
+ input logic cheri_csr_op_en_i,
+ input logic cheri_csr_set_mie_i,
+ input logic cheri_csr_clr_mie_i,
+
+ output logic [31:0] cheri_csr_rdata_o,
+ output reg_cap_t cheri_csr_rcap_o,
+
+ // stack highwatermark and fast-clearing function
+ output logic [31:0] csr_mshwm_o,
+ output logic [31:0] csr_mshwmb_o,
+ input logic csr_mshwm_set_i,
+ input logic [31:0] csr_mshwm_new_i,
+
+ // interrupts
+ input logic irq_software_i,
+ input logic irq_timer_i,
+ input logic irq_external_i,
+ input logic [14:0] irq_fast_i,
+ input logic nmi_mode_i,
+ output logic irq_pending_o, // interrupt request pending
+ output cheriot_pkg::irqs_t irqs_o, // interrupt requests qualified with mie
+ output logic csr_mstatus_mie_o,
+ output logic [31:0] csr_mepc_o,
+
+ // PMP
+ output cheriot_pkg::pmp_cfg_t csr_pmp_cfg_o [PMPNumRegions],
+ output logic [33:0] csr_pmp_addr_o [PMPNumRegions],
+ output cheriot_pkg::pmp_mseccfg_t csr_pmp_mseccfg_o,
+
+ // debug
+ input logic debug_mode_i,
+ input cheriot_pkg::dbg_cause_e debug_cause_i,
+ input logic debug_csr_save_i,
+ output logic [31:0] csr_depc_o,
+ output logic debug_single_step_o,
+ output logic debug_ebreakm_o,
+ output logic debug_ebreaku_o,
+ output logic trigger_match_o,
+
+ input logic [31:0] pc_if_i,
+ input logic [31:0] pc_id_i,
+ input logic [31:0] pc_wb_i,
+
+ // CPU control bits
+ output logic data_ind_timing_o,
+ output logic dummy_instr_en_o,
+ output logic [2:0] dummy_instr_mask_o,
+ output logic dummy_instr_seed_en_o,
+ output logic [31:0] dummy_instr_seed_o,
+ output logic icache_enable_o,
+ output logic csr_shadow_err_o,
+
+ // Exception save/restore
+ input logic csr_save_if_i,
+ input logic csr_save_id_i,
+ input logic csr_save_wb_i,
+ input logic csr_restore_mret_i,
+ input logic csr_restore_dret_i,
+ input logic csr_save_cause_i,
+ input logic csr_mepcc_clrtag_i,
+ input cheriot_pkg::exc_cause_e csr_mcause_i,
+ input logic [31:0] csr_mtval_i,
+ output logic illegal_csr_insn_o, // access to non-existent CSR,
+ // with wrong priviledge level, or
+ // missing write permissions
+ output logic double_fault_seen_o,
+ // Performance Counters
+ input logic instr_ret_i, // instr retired in ID/EX stage
+ input logic instr_ret_compressed_i, // compressed instr retired
+ input logic instr_ret_spec_i, // speculative instr_ret_i
+ input logic instr_ret_compressed_spec_i, // speculative instr_ret_compressed_i
+ input logic iside_wait_i, // core waiting for the iside
+ input logic jump_i, // jump instr seen (j, jr, jal, jalr)
+ input logic branch_i, // branch instr seen (bf, bnf)
+ input logic branch_taken_i, // branch was taken
+ input logic mem_load_i, // load from memory in this cycle
+ input logic mem_store_i, // store to memory in this cycle
+ input logic dside_wait_i, // core waiting for the dside
+ input logic mul_wait_i, // core waiting for multiply
+ input logic div_wait_i, // core waiting for divide
+
+ input logic cheri_branch_req_i,
+ input logic [31:0] cheri_branch_target_i,
+ input pcc_cap_t pcc_cap_i,
+ output pcc_cap_t pcc_cap_o,
+
+ output logic csr_dbg_tclr_fault_o,
+ output logic cheri_fatal_err_o
+ );
+
+ import cheriot_pkg::*;
+
+ localparam int unsigned RV32BEnabled = (RV32B == RV32BNone) ? 0 : 1;
+ localparam int unsigned RV32MEnabled = (RV32M == RV32MNone) ? 0 : 1;
+ localparam int unsigned PMPAddrWidth = (PMPGranularity > 0) ? 33 - PMPGranularity : 32;
+
+ // misa
+ localparam logic [31:0] MISA_VALUE =
+ (0 << 0) // A - Atomic Instructions extension
+ | (RV32BEnabled << 1) // B - Bit-Manipulation extension
+ | (1 << 2) // C - Compressed extension
+ | (0 << 3) // D - Double precision floating-point extension
+ | (32'(RV32E) << 4) // E - RV32E base ISA
+ | (0 << 5) // F - Single precision floating-point extension
+ | (32'(!RV32E) << 8) // I - RV32I/64I/128I base ISA
+ | (RV32MEnabled << 12) // M - Integer Multiply/Divide extension
+ | (0 << 13) // N - User level interrupts supported
+ | (0 << 18) // S - Supervisor mode implemented
+ | (1 << 20) // U - User mode implemented
+ | (32'(CHERIoTEn) << 23) // X - Non-standard extensions present
+ | (32'(CSR_MISA_MXL) << 30); // M-XLEN
+
+ typedef struct packed {
+ logic mie;
+ logic mpie;
+ priv_lvl_e mpp;
+ logic mprv;
+ logic tw;
+ } status_t;
+
+ typedef struct packed {
+ logic mpie;
+ priv_lvl_e mpp;
+ } status_stk_t;
+
+ typedef struct packed {
+ x_debug_ver_e xdebugver;
+ logic [11:0] zero2;
+ logic ebreakm;
+ logic zero1;
+ logic ebreaks;
+ logic ebreaku;
+ logic stepie;
+ logic stopcount;
+ logic stoptime;
+ dbg_cause_e cause;
+ logic zero0;
+ logic mprven;
+ logic nmip;
+ logic step;
+ priv_lvl_e prv;
+ } dcsr_t;
+
+ // CPU control register fields
+ typedef struct packed {
+ logic double_fault_seen;
+ logic sync_exc_seen;
+ logic [2:0] dummy_instr_mask;
+ logic dummy_instr_en;
+ logic data_ind_timing;
+ logic icache_enable;
+ } cpu_ctrl_t;
+
+ // Interrupt and exception control signals
+ logic [31:0] exception_pc;
+
+ // CSRs
+ priv_lvl_e priv_lvl_q, priv_lvl_d;
+ status_t mstatus_q, mstatus_d;
+ logic mstatus_err;
+ logic mstatus_en;
+ irqs_t mie_q, mie_d;
+ logic mie_en;
+ logic [31:0] mscratch_q;
+ logic mscratch_en;
+ logic [31:0] mepc_q, mepc_d;
+ logic mepc_en;
+ reg_cap_t mepc_cap;
+ logic [5:0] mcause_q, mcause_d;
+ logic mcause_en;
+ logic [31:0] mtval_q, mtval_d;
+ logic mtval_en;
+ logic [31:0] mtvec_q, mtvec_d;
+ reg_cap_t mtvec_cap;
+ logic mtvec_err;
+ logic mtvec_en;
+ irqs_t mip;
+ dcsr_t dcsr_q, dcsr_d;
+ logic dcsr_en;
+ logic [31:0] depc_q, depc_d;
+ logic depc_en;
+ reg_cap_t depc_cap;
+ logic [31:0] dscratch0_q;
+ logic [31:0] dscratch1_q;
+ logic dscratch0_en, dscratch1_en;
+ reg_cap_t dscratch0_cap, dscratch1_cap;
+ logic [31:0] mshwm_q, mshwm_d;
+ logic [31:0] mshwmb_q;
+ logic mshwm_en, mshwmb_en;
+ logic [31:0] cdbg_ctrl_q;
+ logic cdbg_ctrl_en;
+ pcc_cap_t pcc_cap_q, pcc_cap_d;
+
+ // CSRs for recoverable NMIs
+ // NOTE: these CSRS are nonstandard, see https://github.com/riscv/riscv-isa-manual/issues/261
+ status_stk_t mstack_q, mstack_d;
+ logic mstack_en;
+ logic [31:0] mstack_epc_q, mstack_epc_d;
+ logic [5:0] mstack_cause_q, mstack_cause_d;
+
+ // PMP Signals
+ logic [31:0] pmp_addr_rdata [PMP_MAX_REGIONS];
+ logic [PMP_CFG_W-1:0] pmp_cfg_rdata [PMP_MAX_REGIONS];
+ logic pmp_csr_err;
+ pmp_mseccfg_t pmp_mseccfg;
+
+ // Hardware performance monitor signals
+ logic [31:0] mcountinhibit;
+ // Only have mcountinhibit flops for counters that actually exist
+ logic [MHPMCounterNum+3-1:0] mcountinhibit_d, mcountinhibit_q;
+ logic mcountinhibit_we;
+
+ // mhpmcounter flops are elaborated below providing only the precise number that is required based
+ // on MHPMCounterNum/MHPMCounterWidth. This signal connects to the Q output of these flops
+ // where they exist and is otherwise 0.
+ logic [63:0] mhpmcounter [32];
+ logic [31:0] mhpmcounter_we;
+ logic [31:0] mhpmcounterh_we;
+ logic [31:0] mhpmcounter_incr;
+ logic [31:0] mhpmevent [32];
+ logic [4:0] mhpmcounter_idx;
+ logic unused_mhpmcounter_we_1;
+ logic unused_mhpmcounterh_we_1;
+ logic unused_mhpmcounter_incr_1;
+
+ logic [63:0] minstret_next, minstret_raw;
+
+ // Debug / trigger registers
+ logic [31:0] tselect_rdata;
+ logic [31:0] tmatch_control_rdata;
+ logic [31:0] tmatch_value_rdata;
+
+ // CPU control bits
+ cpu_ctrl_t cpuctrl_q, cpuctrl_d, cpuctrl_wdata_raw, cpuctrl_wdata;
+ logic cpuctrl_we;
+ logic cpuctrl_err;
+
+ // CSR update logic
+ logic [31:0] csr_wdata_int;
+ logic [31:0] csr_rdata_int;
+ logic csr_we_int;
+ logic csr_wr;
+
+ // Access violation signals
+ logic illegal_csr;
+ logic illegal_csr_priv;
+ logic illegal_csr_write;
+
+ logic [7:0] unused_boot_addr;
+ logic [2:0] unused_csr_addr;
+
+ logic mepc_en_combi, mepc_en_cheri;
+ logic [31:0] mepc_d_combi;
+
+ logic mtvec_en_combi, mtvec_en_cheri;
+ logic [31:0] mtvec_d_combi;
+
+ logic depc_en_combi, depc_en_cheri;
+ logic [31:0] depc_d_combi;
+
+ logic dscratch0_en_combi, dscratch0_en_cheri;
+ logic [31:0] dscratch0_d_combi;
+ logic dscratch1_en_combi, dscratch1_en_cheri;
+ logic [31:0] dscratch1_d_combi;
+
+ assign unused_boot_addr = boot_addr_i[7:0];
+
+ logic [31:0] misa_value_masked;
+
+ assign misa_value_masked = MISA_VALUE & ~{8'h0, ~cheri_pmode_i, 23'h0};
+
+
+ /////////////
+ // CSR reg //
+ /////////////
+
+ logic [$bits(csr_num_e)-1:0] csr_addr;
+ assign csr_addr = {csr_addr_i};
+ assign unused_csr_addr = csr_addr[7:5];
+ assign mhpmcounter_idx = csr_addr[4:0];
+
+ // See RISC-V Privileged Specification, version 1.11, Section 2.1
+ assign illegal_csr_priv = (csr_addr[9:8] > {priv_lvl_q});
+ assign illegal_csr_write = (csr_addr[11:10] == 2'b11) && csr_wr;
+ assign illegal_csr_insn_o = csr_access_i & (illegal_csr | illegal_csr_write | illegal_csr_priv);
+
+ // mip CSR is purely combinational - must be able to re-enable the clock upon WFI
+ assign mip.irq_software = irq_software_i;
+ assign mip.irq_timer = irq_timer_i;
+ assign mip.irq_external = irq_external_i;
+ assign mip.irq_fast = irq_fast_i;
+
+ // read logic
+ always_comb begin
+ csr_rdata_int = '0;
+ illegal_csr = 1'b0;
+
+ unique case (csr_addr_i)
+ // mvendorid: encoding of manufacturer/provider
+ CSR_MVENDORID: csr_rdata_int = (CHERIoTEn&cheri_pmode_i) ? CSR_MVENDORID_CHERI_VALUE : CSR_MVENDORID_VALUE;
+ // marchid: encoding of base microarchitecture
+ CSR_MARCHID: csr_rdata_int = (CHERIoTEn&cheri_pmode_i) ? CSR_MARCHID_CHERI_VALUE : CSR_MARCHID_VALUE;
+ // mimpid: encoding of processor implementation version
+ CSR_MIMPID: csr_rdata_int = CSR_MIMPID_VALUE;
+ // mhartid: unique hardware thread id
+ CSR_MHARTID: csr_rdata_int = hart_id_i;
+
+ // mstatus: always M-mode, contains IE bit
+ CSR_MSTATUS: begin
+ csr_rdata_int = '0;
+ csr_rdata_int[CSR_MSTATUS_MIE_BIT] = mstatus_q.mie;
+ csr_rdata_int[CSR_MSTATUS_MPIE_BIT] = mstatus_q.mpie;
+ csr_rdata_int[CSR_MSTATUS_MPP_BIT_HIGH:CSR_MSTATUS_MPP_BIT_LOW] = mstatus_q.mpp;
+ csr_rdata_int[CSR_MSTATUS_MPRV_BIT] = mstatus_q.mprv;
+ csr_rdata_int[CSR_MSTATUS_TW_BIT] = mstatus_q.tw;
+ end
+
+ // misa
+ CSR_MISA: csr_rdata_int = misa_value_masked;
+
+ // interrupt enable
+ CSR_MIE: begin
+ csr_rdata_int = '0;
+ csr_rdata_int[CSR_MSIX_BIT] = mie_q.irq_software;
+ csr_rdata_int[CSR_MTIX_BIT] = mie_q.irq_timer;
+ csr_rdata_int[CSR_MEIX_BIT] = mie_q.irq_external;
+ csr_rdata_int[CSR_MFIX_BIT_HIGH:CSR_MFIX_BIT_LOW] = mie_q.irq_fast;
+ end
+
+ // mcounteren: machine counter enable
+ CSR_MCOUNTEREN: begin
+ csr_rdata_int = '0;
+ end
+
+ CSR_MSCRATCH: csr_rdata_int = mscratch_q;
+
+ // mtvec: trap-vector base address
+ CSR_MTVEC: csr_rdata_int = mtvec_q;
+
+ // mepc: exception program counter
+ CSR_MEPC: csr_rdata_int = mepc_q;
+
+ // mcause: exception cause
+ CSR_MCAUSE: csr_rdata_int = {mcause_q[5], 26'b0, mcause_q[4:0]};
+
+ // mtval: trap value
+ CSR_MTVAL: csr_rdata_int = mtval_q;
+
+ // mip: interrupt pending
+ CSR_MIP: begin
+ csr_rdata_int = '0;
+ csr_rdata_int[CSR_MSIX_BIT] = mip.irq_software;
+ csr_rdata_int[CSR_MTIX_BIT] = mip.irq_timer;
+ csr_rdata_int[CSR_MEIX_BIT] = mip.irq_external;
+ csr_rdata_int[CSR_MFIX_BIT_HIGH:CSR_MFIX_BIT_LOW] = mip.irq_fast;
+ end
+
+ CSR_MSECCFG: begin
+ if (PMPEnable) begin
+ csr_rdata_int = '0;
+ csr_rdata_int[CSR_MSECCFG_MML_BIT] = pmp_mseccfg.mml;
+ csr_rdata_int[CSR_MSECCFG_MMWP_BIT] = pmp_mseccfg.mmwp;
+ csr_rdata_int[CSR_MSECCFG_RLB_BIT] = pmp_mseccfg.rlb;
+ end else begin
+ illegal_csr = 1'b1;
+ end
+ end
+
+ CSR_MSECCFGH: begin
+ if (PMPEnable) begin
+ csr_rdata_int = '0;
+ end else begin
+ illegal_csr = 1'b1;
+ end
+ end
+
+ // PMP registers
+ CSR_PMPCFG0: csr_rdata_int = {pmp_cfg_rdata[3], pmp_cfg_rdata[2],
+ pmp_cfg_rdata[1], pmp_cfg_rdata[0]};
+ CSR_PMPCFG1: csr_rdata_int = {pmp_cfg_rdata[7], pmp_cfg_rdata[6],
+ pmp_cfg_rdata[5], pmp_cfg_rdata[4]};
+ CSR_PMPCFG2: csr_rdata_int = {pmp_cfg_rdata[11], pmp_cfg_rdata[10],
+ pmp_cfg_rdata[9], pmp_cfg_rdata[8]};
+ CSR_PMPCFG3: csr_rdata_int = {pmp_cfg_rdata[15], pmp_cfg_rdata[14],
+ pmp_cfg_rdata[13], pmp_cfg_rdata[12]};
+ CSR_PMPADDR0: csr_rdata_int = pmp_addr_rdata[0];
+ CSR_PMPADDR1: csr_rdata_int = pmp_addr_rdata[1];
+ CSR_PMPADDR2: csr_rdata_int = pmp_addr_rdata[2];
+ CSR_PMPADDR3: csr_rdata_int = pmp_addr_rdata[3];
+ CSR_PMPADDR4: csr_rdata_int = pmp_addr_rdata[4];
+ CSR_PMPADDR5: csr_rdata_int = pmp_addr_rdata[5];
+ CSR_PMPADDR6: csr_rdata_int = pmp_addr_rdata[6];
+ CSR_PMPADDR7: csr_rdata_int = pmp_addr_rdata[7];
+ CSR_PMPADDR8: csr_rdata_int = pmp_addr_rdata[8];
+ CSR_PMPADDR9: csr_rdata_int = pmp_addr_rdata[9];
+ CSR_PMPADDR10: csr_rdata_int = pmp_addr_rdata[10];
+ CSR_PMPADDR11: csr_rdata_int = pmp_addr_rdata[11];
+ CSR_PMPADDR12: csr_rdata_int = pmp_addr_rdata[12];
+ CSR_PMPADDR13: csr_rdata_int = pmp_addr_rdata[13];
+ CSR_PMPADDR14: csr_rdata_int = pmp_addr_rdata[14];
+ CSR_PMPADDR15: csr_rdata_int = pmp_addr_rdata[15];
+
+ CSR_DCSR: begin
+ csr_rdata_int = dcsr_q;
+ illegal_csr = ~debug_mode_i;
+ end
+ CSR_DPC: begin
+ csr_rdata_int = depc_q;
+ illegal_csr = ~debug_mode_i;
+ end
+ CSR_DSCRATCH0: begin
+ csr_rdata_int = dscratch0_q;
+ illegal_csr = ~debug_mode_i;
+ end
+ CSR_DSCRATCH1: begin
+ csr_rdata_int = dscratch1_q;
+ illegal_csr = ~debug_mode_i;
+ end
+
+ // machine counter/timers
+ CSR_MCOUNTINHIBIT: csr_rdata_int = mcountinhibit;
+ CSR_MHPMEVENT3,
+ CSR_MHPMEVENT4, CSR_MHPMEVENT5, CSR_MHPMEVENT6, CSR_MHPMEVENT7,
+ CSR_MHPMEVENT8, CSR_MHPMEVENT9, CSR_MHPMEVENT10, CSR_MHPMEVENT11,
+ CSR_MHPMEVENT12, CSR_MHPMEVENT13, CSR_MHPMEVENT14, CSR_MHPMEVENT15,
+ CSR_MHPMEVENT16, CSR_MHPMEVENT17, CSR_MHPMEVENT18, CSR_MHPMEVENT19,
+ CSR_MHPMEVENT20, CSR_MHPMEVENT21, CSR_MHPMEVENT22, CSR_MHPMEVENT23,
+ CSR_MHPMEVENT24, CSR_MHPMEVENT25, CSR_MHPMEVENT26, CSR_MHPMEVENT27,
+ CSR_MHPMEVENT28, CSR_MHPMEVENT29, CSR_MHPMEVENT30, CSR_MHPMEVENT31: begin
+ csr_rdata_int = mhpmevent[mhpmcounter_idx];
+ end
+
+ CSR_MCYCLE,
+ CSR_MINSTRET,
+ CSR_MHPMCOUNTER3,
+ CSR_MHPMCOUNTER4, CSR_MHPMCOUNTER5, CSR_MHPMCOUNTER6, CSR_MHPMCOUNTER7,
+ CSR_MHPMCOUNTER8, CSR_MHPMCOUNTER9, CSR_MHPMCOUNTER10, CSR_MHPMCOUNTER11,
+ CSR_MHPMCOUNTER12, CSR_MHPMCOUNTER13, CSR_MHPMCOUNTER14, CSR_MHPMCOUNTER15,
+ CSR_MHPMCOUNTER16, CSR_MHPMCOUNTER17, CSR_MHPMCOUNTER18, CSR_MHPMCOUNTER19,
+ CSR_MHPMCOUNTER20, CSR_MHPMCOUNTER21, CSR_MHPMCOUNTER22, CSR_MHPMCOUNTER23,
+ CSR_MHPMCOUNTER24, CSR_MHPMCOUNTER25, CSR_MHPMCOUNTER26, CSR_MHPMCOUNTER27,
+ CSR_MHPMCOUNTER28, CSR_MHPMCOUNTER29, CSR_MHPMCOUNTER30, CSR_MHPMCOUNTER31: begin
+ csr_rdata_int = mhpmcounter[mhpmcounter_idx][31:0];
+ end
+
+ CSR_MCYCLEH,
+ CSR_MINSTRETH,
+ CSR_MHPMCOUNTER3H,
+ CSR_MHPMCOUNTER4H, CSR_MHPMCOUNTER5H, CSR_MHPMCOUNTER6H, CSR_MHPMCOUNTER7H,
+ CSR_MHPMCOUNTER8H, CSR_MHPMCOUNTER9H, CSR_MHPMCOUNTER10H, CSR_MHPMCOUNTER11H,
+ CSR_MHPMCOUNTER12H, CSR_MHPMCOUNTER13H, CSR_MHPMCOUNTER14H, CSR_MHPMCOUNTER15H,
+ CSR_MHPMCOUNTER16H, CSR_MHPMCOUNTER17H, CSR_MHPMCOUNTER18H, CSR_MHPMCOUNTER19H,
+ CSR_MHPMCOUNTER20H, CSR_MHPMCOUNTER21H, CSR_MHPMCOUNTER22H, CSR_MHPMCOUNTER23H,
+ CSR_MHPMCOUNTER24H, CSR_MHPMCOUNTER25H, CSR_MHPMCOUNTER26H, CSR_MHPMCOUNTER27H,
+ CSR_MHPMCOUNTER28H, CSR_MHPMCOUNTER29H, CSR_MHPMCOUNTER30H, CSR_MHPMCOUNTER31H: begin
+ csr_rdata_int = mhpmcounter[mhpmcounter_idx][63:32];
+ end
+
+ // Debug triggers
+ CSR_TSELECT: begin
+ csr_rdata_int = tselect_rdata;
+ illegal_csr = ~DbgTriggerEn;
+ end
+ CSR_TDATA1: begin
+ csr_rdata_int = tmatch_control_rdata;
+ illegal_csr = ~DbgTriggerEn;
+ end
+ CSR_TDATA2: begin
+ csr_rdata_int = tmatch_value_rdata;
+ illegal_csr = ~DbgTriggerEn;
+ end
+ CSR_TDATA3: begin
+ csr_rdata_int = '0;
+ illegal_csr = ~DbgTriggerEn;
+ end
+ CSR_MCONTEXT: begin
+ csr_rdata_int = '0;
+ illegal_csr = ~DbgTriggerEn;
+ end
+ CSR_SCONTEXT: begin
+ csr_rdata_int = '0;
+ illegal_csr = ~DbgTriggerEn;
+ end
+
+ // Custom CSR for controlling CPU features
+ CSR_CPUCTRL: begin
+ csr_rdata_int = {{32 - $bits(cpu_ctrl_t) {1'b0}}, cpuctrl_q};
+ end
+
+ // Custom CSR for LFSR re-seeding (cannot be read)
+ CSR_SECURESEED: begin
+ csr_rdata_int = '0;
+ end
+
+ // MSHWM CSR (stack high watermark in cheriot)
+ CSR_MSHWM: begin
+ if (cheri_pmode_i) begin
+ csr_rdata_int = cheri_pmode_i ? mshwm_q : 32'h0;
+ end else begin
+ illegal_csr = 1'b1;
+ end
+ end
+
+ CSR_MSHWMB: begin
+ if (cheri_pmode_i) begin
+ csr_rdata_int = cheri_pmode_i ? mshwmb_q : 32'h0;
+ end else begin
+ illegal_csr = 1'b1;
+ end
+ end
+
+ CSR_CDBG_CTRL: begin
+ if (cheri_pmode_i) begin
+ csr_rdata_int = cheri_pmode_i ? cdbg_ctrl_q : 32'h0;
+ end else begin
+ illegal_csr = 1'b1;
+ end
+ end
+
+ default: begin
+ illegal_csr = 1'b1;
+ end
+ endcase
+ end
+
+ // write logic
+ always_comb begin
+ exception_pc = pc_id_i;
+
+ priv_lvl_d = priv_lvl_q;
+ mstatus_en = 1'b0;
+ mstatus_d = mstatus_q;
+ mie_en = 1'b0;
+ mscratch_en = 1'b0;
+ mepc_en = 1'b0;
+ mepc_d = {csr_wdata_int[31:1], 1'b0};
+ mcause_en = 1'b0;
+ mcause_d = {csr_wdata_int[31], csr_wdata_int[4:0]};
+ mtval_en = 1'b0;
+ mtval_d = csr_wdata_int;
+ mtvec_en = csr_mtvec_init_i;
+ // mtvec.MODE set to vectored
+ // mtvec.BASE must be 256-byte aligned
+ mtvec_d = csr_mtvec_init_i ? {boot_addr_i[31:8], 6'b0, 1'b0, ~(CHERIoTEn&cheri_pmode_i)} :
+ {csr_wdata_int[31:8], 6'b0, 1'b0, ~(CHERIoTEn&cheri_pmode_i)};
+ dcsr_en = 1'b0;
+ dcsr_d = dcsr_q;
+ depc_d = {csr_wdata_int[31:1], 1'b0};
+ depc_en = 1'b0;
+ dscratch0_en = 1'b0;
+ dscratch1_en = 1'b0;
+
+ mstack_en = 1'b0;
+ mstack_d.mpie = mstatus_q.mpie;
+ mstack_d.mpp = mstatus_q.mpp;
+ mstack_epc_d = mepc_q;
+ mstack_cause_d = mcause_q;
+
+ mcountinhibit_we = 1'b0;
+ mhpmcounter_we = '0;
+ mhpmcounterh_we = '0;
+
+ cpuctrl_we = 1'b0;
+ cpuctrl_d = cpuctrl_q;
+
+ mshwm_en = 1'b0;
+ mshwmb_en = 1'b0;
+ cdbg_ctrl_en = 1'b0;
+
+ double_fault_seen_o = 1'b0;
+
+ if (csr_we_int) begin
+ unique case (csr_addr_i)
+ // mstatus: IE bit
+ CSR_MSTATUS: begin
+ mstatus_en = 1'b1;
+ mstatus_d = '{
+ mie: csr_wdata_int[CSR_MSTATUS_MIE_BIT],
+ mpie: csr_wdata_int[CSR_MSTATUS_MPIE_BIT],
+ mpp: priv_lvl_e'(csr_wdata_int[CSR_MSTATUS_MPP_BIT_HIGH:CSR_MSTATUS_MPP_BIT_LOW]),
+ mprv: csr_wdata_int[CSR_MSTATUS_MPRV_BIT],
+ tw: csr_wdata_int[CSR_MSTATUS_TW_BIT]
+ };
+ // Convert illegal values to M-mode
+ if ((mstatus_d.mpp != PRIV_LVL_M) && (mstatus_d.mpp != PRIV_LVL_U)) begin
+ mstatus_d.mpp = PRIV_LVL_M;
+ end
+ end
+
+ // interrupt enable
+ CSR_MIE: mie_en = 1'b1;
+
+ CSR_MSCRATCH: mscratch_en = 1'b1;
+
+ // mepc: exception program counter
+ CSR_MEPC: mepc_en = ~CHERIoTEn | ~cheri_pmode_i; // disabled for pure cap mode (only allow cap writes)
+
+ // mcause
+ CSR_MCAUSE: mcause_en = 1'b1;
+
+ // mtval: trap value
+ CSR_MTVAL: mtval_en = 1'b1;
+
+ // mtvec
+ CSR_MTVEC: mtvec_en = ~CHERIoTEn | ~cheri_pmode_i; // disabled for pure cap mode (only allow cap writes)
+
+ CSR_DCSR: begin
+ dcsr_d = csr_wdata_int;
+ dcsr_d.xdebugver = XDEBUGVER_STD;
+ // Change to PRIV_LVL_M if software writes an unsupported value
+ if ((dcsr_d.prv != PRIV_LVL_M) && (dcsr_d.prv != PRIV_LVL_U)) begin
+ dcsr_d.prv = PRIV_LVL_M;
+ end
+
+ // Read-only for SW
+ dcsr_d.cause = dcsr_q.cause;
+
+ // Interrupts always disabled during single stepping
+ dcsr_d.stepie = 1'b0;
+
+ // currently not supported:
+ dcsr_d.nmip = 1'b0;
+ dcsr_d.mprven = 1'b0;
+ dcsr_d.stopcount = 1'b0;
+ dcsr_d.stoptime = 1'b0;
+
+ // forced to be zero
+ dcsr_d.zero0 = 1'b0;
+ dcsr_d.zero1 = 1'b0;
+ dcsr_d.zero2 = 12'h0;
+ dcsr_en = 1'b1;
+ end
+
+ // dpc: debug program counter
+ CSR_DPC: depc_en = 1'b1;
+
+ CSR_DSCRATCH0: dscratch0_en = 1'b1;
+ CSR_DSCRATCH1: dscratch1_en = 1'b1;
+
+ // machine counter/timers
+ CSR_MCOUNTINHIBIT: mcountinhibit_we = 1'b1;
+
+ CSR_MCYCLE,
+ CSR_MINSTRET,
+ CSR_MHPMCOUNTER3,
+ CSR_MHPMCOUNTER4, CSR_MHPMCOUNTER5, CSR_MHPMCOUNTER6, CSR_MHPMCOUNTER7,
+ CSR_MHPMCOUNTER8, CSR_MHPMCOUNTER9, CSR_MHPMCOUNTER10, CSR_MHPMCOUNTER11,
+ CSR_MHPMCOUNTER12, CSR_MHPMCOUNTER13, CSR_MHPMCOUNTER14, CSR_MHPMCOUNTER15,
+ CSR_MHPMCOUNTER16, CSR_MHPMCOUNTER17, CSR_MHPMCOUNTER18, CSR_MHPMCOUNTER19,
+ CSR_MHPMCOUNTER20, CSR_MHPMCOUNTER21, CSR_MHPMCOUNTER22, CSR_MHPMCOUNTER23,
+ CSR_MHPMCOUNTER24, CSR_MHPMCOUNTER25, CSR_MHPMCOUNTER26, CSR_MHPMCOUNTER27,
+ CSR_MHPMCOUNTER28, CSR_MHPMCOUNTER29, CSR_MHPMCOUNTER30, CSR_MHPMCOUNTER31: begin
+ mhpmcounter_we[mhpmcounter_idx] = 1'b1;
+ end
+
+ CSR_MCYCLEH,
+ CSR_MINSTRETH,
+ CSR_MHPMCOUNTER3H,
+ CSR_MHPMCOUNTER4H, CSR_MHPMCOUNTER5H, CSR_MHPMCOUNTER6H, CSR_MHPMCOUNTER7H,
+ CSR_MHPMCOUNTER8H, CSR_MHPMCOUNTER9H, CSR_MHPMCOUNTER10H, CSR_MHPMCOUNTER11H,
+ CSR_MHPMCOUNTER12H, CSR_MHPMCOUNTER13H, CSR_MHPMCOUNTER14H, CSR_MHPMCOUNTER15H,
+ CSR_MHPMCOUNTER16H, CSR_MHPMCOUNTER17H, CSR_MHPMCOUNTER18H, CSR_MHPMCOUNTER19H,
+ CSR_MHPMCOUNTER20H, CSR_MHPMCOUNTER21H, CSR_MHPMCOUNTER22H, CSR_MHPMCOUNTER23H,
+ CSR_MHPMCOUNTER24H, CSR_MHPMCOUNTER25H, CSR_MHPMCOUNTER26H, CSR_MHPMCOUNTER27H,
+ CSR_MHPMCOUNTER28H, CSR_MHPMCOUNTER29H, CSR_MHPMCOUNTER30H, CSR_MHPMCOUNTER31H: begin
+ mhpmcounterh_we[mhpmcounter_idx] = 1'b1;
+ end
+
+ CSR_CPUCTRL: begin
+ cpuctrl_d = cpuctrl_wdata;
+ cpuctrl_we = 1'b1;
+ end
+
+ CSR_MSHWM: mshwm_en = CHERIoTEn & cheri_pmode_i;
+ CSR_MSHWMB: mshwmb_en = CHERIoTEn & cheri_pmode_i;
+ CSR_CDBG_CTRL: cdbg_ctrl_en = CHERIoTEn & cheri_pmode_i;
+
+ default:;
+ endcase
+ end
+
+ // exception controller gets priority over other writes
+ unique case (1'b1)
+
+ csr_save_cause_i: begin
+ unique case (1'b1)
+ csr_save_if_i: begin
+ exception_pc = pc_if_i;
+ end
+ csr_save_id_i: begin
+ exception_pc = pc_id_i;
+ end
+ csr_save_wb_i: begin
+ exception_pc = pc_wb_i;
+ end
+ default:;
+ endcase
+
+ // Any exception, including debug mode, causes a switch to M-mode
+ priv_lvl_d = PRIV_LVL_M;
+
+ if (debug_csr_save_i) begin
+ // all interrupts are masked
+ // do not update cause, epc, tval, epc and status
+ dcsr_d.prv = priv_lvl_q;
+ dcsr_d.cause = debug_cause_i;
+ dcsr_en = 1'b1;
+ depc_d = exception_pc;
+ depc_en = 1'b1;
+ end else if (!debug_mode_i) begin
+ // In debug mode, "exceptions do not update any registers. That
+ // includes cause, epc, tval, dpc and mstatus." [Debug Spec v0.13.2, p.39]
+ mtval_en = 1'b1;
+ mtval_d = csr_mtval_i;
+ mstatus_en = 1'b1;
+ mstatus_d.mie = 1'b0; // disable interrupts
+ // save current status
+ mstatus_d.mpie = mstatus_q.mie;
+ mstatus_d.mpp = priv_lvl_q;
+ mepc_en = 1'b1;
+ mepc_d = exception_pc;
+ mcause_en = 1'b1;
+ mcause_d = {csr_mcause_i};
+ // save previous status for recoverable NMI
+ mstack_en = 1'b1;
+
+ if (!mcause_d[5]) begin
+ cpuctrl_we = 1'b1;
+
+ cpuctrl_d.sync_exc_seen = 1'b1;
+ if (cpuctrl_q.sync_exc_seen) begin
+ double_fault_seen_o = 1'b1;
+ cpuctrl_d.double_fault_seen = 1'b1;
+ end
+ end
+ end
+ end // csr_save_cause_i
+
+ csr_restore_dret_i: begin // DRET
+ priv_lvl_d = dcsr_q.prv;
+ end // csr_restore_dret_i
+
+ csr_restore_mret_i: begin // MRET
+ priv_lvl_d = mstatus_q.mpp;
+ mstatus_en = 1'b1;
+ mstatus_d.mie = mstatus_q.mpie; // re-enable interrupts
+
+ // merge in upstream change 9/7/2022 // LEC_NOT_COMPATIBLE
+ if (mstatus_q.mpp != PRIV_LVL_M) begin
+ mstatus_d.mprv = 1'b0;
+ end
+
+ cpuctrl_we = 1'b1;
+ cpuctrl_d.sync_exc_seen = 1'b0;
+
+ if (nmi_mode_i) begin
+ // when returning from an NMI restore state from mstack CSR
+ mstatus_d.mpie = mstack_q.mpie;
+ mstatus_d.mpp = mstack_q.mpp;
+ mepc_en = 1'b1;
+ mepc_d = mstack_epc_q;
+ mcause_en = 1'b1;
+ mcause_d = mstack_cause_q;
+ end else begin
+ // otherwise just set mstatus.MPIE/MPP
+ // See RISC-V Privileged Specification, version 1.11, Section 3.1.6.1
+ mstatus_d.mpie = 1'b1;
+ mstatus_d.mpp = PRIV_LVL_U;
+ end
+ end // csr_restore_mret_i
+
+ default:;
+ endcase
+ end
+
+ // Update current priv level
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ priv_lvl_q <= PRIV_LVL_M;
+ end else begin
+ priv_lvl_q <= priv_lvl_d;
+ end
+ end
+
+ // Send current priv level to the decoder
+ assign priv_mode_id_o = priv_lvl_q;
+ // Load/store instructions must factor in MPRV for PMP checking
+ assign priv_mode_lsu_o = mstatus_q.mprv ? mstatus_q.mpp : priv_lvl_q;
+
+ // CSR operation logic
+ always_comb begin
+ unique case (csr_op_i)
+ CSR_OP_WRITE: csr_wdata_int = csr_wdata_i;
+ CSR_OP_SET: csr_wdata_int = csr_wdata_i | csr_rdata_o;
+ CSR_OP_CLEAR: csr_wdata_int = ~csr_wdata_i & csr_rdata_o;
+ CSR_OP_READ: csr_wdata_int = csr_wdata_i;
+ default: csr_wdata_int = csr_wdata_i;
+ endcase
+ end
+
+ assign csr_wr = (csr_op_i inside {CSR_OP_WRITE, CSR_OP_SET, CSR_OP_CLEAR});
+
+ // only write CSRs during one clock cycle
+
+ // enforcing the CHERI CSR access policy.
+ // - exceptions for ASR violation is generated in the controller.
+ // - we never allow writes to any CSR if ASR=0
+ // - no need to gate csr_rdata for ASR violation since the instruction will be faulted anyway
+
+ // logic read_ok;
+ // assign read_ok = ~CHERIoTEn || ~cheri_pmode_i || debug_mode_i || pcc_cap_q.perms[PERM_SR] ||
+ // ((csr_addr_i>=CSR_MCYCLE) && (csr_addr_i<=CSR_CDBG_CTRL));
+ assign csr_we_int = csr_wr & csr_op_en_i & (~CHERIoTEn | ~cheri_pmode_i | debug_mode_i | pcc_cap_q.perms[PERM_SR]) & ~illegal_csr_insn_o;
+
+ // assign csr_rdata_o = read_ok ? csr_rdata_int : 0;
+ assign csr_rdata_o = csr_rdata_int;
+
+ // directly output some registers
+ assign csr_mepc_o = mepc_q;
+ assign csr_depc_o = depc_q;
+ assign csr_mtvec_o = mtvec_q;
+
+ assign csr_mshwm_o = mshwm_q;
+ assign csr_mshwmb_o = mshwmb_q;
+
+ assign csr_mstatus_mie_o = mstatus_q.mie;
+ assign csr_mstatus_tw_o = mstatus_q.tw;
+ assign debug_single_step_o = dcsr_q.step;
+ assign debug_ebreakm_o = dcsr_q.ebreakm;
+ assign debug_ebreaku_o = dcsr_q.ebreaku;
+
+ // Qualify incoming interrupt requests in mip CSR with mie CSR for controller and to re-enable
+ // clock upon WFI (must be purely combinational).
+ assign irqs_o = mip & mie_q;
+ assign irq_pending_o = |irqs_o;
+
+ ////////////////////////
+ // CSR instantiations //
+ ////////////////////////
+
+ // MSTATUS
+ localparam status_t MSTATUS_RST_VAL = '{mie: 1'b0,
+ mpie: 1'b1,
+ mpp: PRIV_LVL_U,
+ mprv: 1'b0,
+ tw: 1'b0};
+
+ // adding set/clr of mie based on sentry type for CHERIoT
+ logic mstatus_en_combi;
+ status_t mstatus_d_combi;
+
+ assign mstatus_en_combi = mstatus_en | cheri_csr_clr_mie_i | cheri_csr_set_mie_i;
+
+ always_comb begin
+ mstatus_d_combi = mstatus_d;
+ mstatus_d_combi.mie = (mstatus_d.mie & ~cheri_csr_clr_mie_i) | cheri_csr_set_mie_i;
+ end
+
+ cheriot_csr #(
+ .Width ($bits(status_t)),
+ .ShadowCopy(ShadowCSR),
+ .ResetValue({MSTATUS_RST_VAL})
+ ) u_mstatus_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i ({mstatus_d_combi}),
+ .wr_en_i (mstatus_en_combi),
+ .rd_data_o (mstatus_q),
+ .rd_error_o(mstatus_err)
+ );
+
+ assign mepc_en_combi = mepc_en | mepc_en_cheri;
+ assign mepc_d_combi = ({32{mepc_en}} & mepc_d) | ({32{mepc_en_cheri}} & cheri_csr_wdata_i);
+
+ // MEPC
+ cheriot_csr #(
+ .Width (32),
+ .ShadowCopy(1'b0),
+ .ResetValue('0)
+ ) u_mepc_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (mepc_d_combi),
+ .wr_en_i (mepc_en_combi),
+ .rd_data_o (mepc_q),
+ .rd_error_o()
+ );
+
+ // MIE
+ assign mie_d.irq_software = csr_wdata_int[CSR_MSIX_BIT];
+ assign mie_d.irq_timer = csr_wdata_int[CSR_MTIX_BIT];
+ assign mie_d.irq_external = csr_wdata_int[CSR_MEIX_BIT];
+ assign mie_d.irq_fast = csr_wdata_int[CSR_MFIX_BIT_HIGH:CSR_MFIX_BIT_LOW];
+ cheriot_csr #(
+ .Width ($bits(irqs_t)),
+ .ShadowCopy(1'b0),
+ .ResetValue('0)
+ ) u_mie_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i ({mie_d}),
+ .wr_en_i (mie_en),
+ .rd_data_o (mie_q),
+ .rd_error_o()
+ );
+
+ // MSCRATCH
+ cheriot_csr #(
+ .Width (32),
+ .ShadowCopy(1'b0),
+ .ResetValue('0)
+ ) u_mscratch_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (csr_wdata_int),
+ .wr_en_i (mscratch_en),
+ .rd_data_o (mscratch_q),
+ .rd_error_o()
+ );
+
+ // MCAUSE
+ cheriot_csr #(
+ .Width (6),
+ .ShadowCopy(1'b0),
+ .ResetValue('0)
+ ) u_mcause_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (mcause_d),
+ .wr_en_i (mcause_en),
+ .rd_data_o (mcause_q),
+ .rd_error_o()
+ );
+
+ // MTVAL
+ cheriot_csr #(
+ .Width (32),
+ .ShadowCopy(1'b0),
+ .ResetValue('0)
+ ) u_mtval_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (mtval_d),
+ .wr_en_i (mtval_en),
+ .rd_data_o (mtval_q),
+ .rd_error_o()
+ );
+
+
+ assign mtvec_en_combi = mtvec_en | mtvec_en_cheri;
+
+ // use only 2'b00 (direct mode) for CHERIoT
+ assign mtvec_d_combi = ({32{mtvec_en}} & mtvec_d) | ({32{mtvec_en_cheri}} &
+ {cheri_csr_wdata_i[31:2],2'b00});
+
+ // MTVEC
+ cheriot_csr #(
+ .Width (32),
+ .ShadowCopy(ShadowCSR),
+ .ResetValue({32'd1}) // retain this to make lec vs ibex pass
+ ) u_mtvec_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (mtvec_d_combi),
+ .wr_en_i (mtvec_en_combi),
+ .rd_data_o (mtvec_q),
+ .rd_error_o(mtvec_err)
+ );
+
+ // DCSR
+ localparam dcsr_t DCSR_RESET_VAL = '{
+ xdebugver: XDEBUGVER_STD,
+ cause: DBG_CAUSE_NONE, // 3'h0
+ prv: PRIV_LVL_M,
+ default: '0
+ };
+ cheriot_csr #(
+ .Width ($bits(dcsr_t)),
+ .ShadowCopy(1'b0),
+ .ResetValue({DCSR_RESET_VAL})
+ ) u_dcsr_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i ({dcsr_d}),
+ .wr_en_i (dcsr_en),
+ .rd_data_o (dcsr_q),
+ .rd_error_o()
+ );
+
+ assign depc_en_combi = depc_en | depc_en_cheri;
+ assign depc_d_combi = ({32{depc_en}} & depc_d) | ({32{depc_en_cheri}} & cheri_csr_wdata_i);
+
+ // DEPC
+ cheriot_csr #(
+ .Width (32),
+ .ShadowCopy(1'b0),
+ .ResetValue('0)
+ ) u_depc_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (depc_d_combi),
+ .wr_en_i (depc_en_combi),
+ .rd_data_o (depc_q),
+ .rd_error_o()
+ );
+
+ assign dscratch0_en_combi = dscratch0_en | dscratch0_en_cheri;
+ assign dscratch0_d_combi = ({32{dscratch0_en}} & csr_wdata_int) | ({32{dscratch0_en_cheri}} & cheri_csr_wdata_i);
+
+ // DSCRATCH0
+ cheriot_csr #(
+ .Width (32),
+ .ShadowCopy(1'b0),
+ .ResetValue('0)
+ ) u_dscratch0_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (dscratch0_d_combi),
+ .wr_en_i (dscratch0_en_combi),
+ .rd_data_o (dscratch0_q),
+ .rd_error_o()
+ );
+
+ assign dscratch1_en_combi = dscratch1_en | dscratch1_en_cheri;
+ assign dscratch1_d_combi = ({32{dscratch1_en}} & csr_wdata_int) | ({32{dscratch1_en_cheri}} & cheri_csr_wdata_i);
+
+ // DSCRATCH0
+ cheriot_csr #(
+ .Width (32),
+ .ShadowCopy(1'b0),
+ .ResetValue('0)
+ ) u_dscratch1_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (dscratch1_d_combi),
+ .wr_en_i (dscratch1_en_combi),
+ .rd_data_o (dscratch1_q),
+ .rd_error_o()
+ );
+
+ // MSTACK
+ localparam status_stk_t MSTACK_RESET_VAL = '{mpie: 1'b1, mpp: PRIV_LVL_U};
+ cheriot_csr #(
+ .Width ($bits(status_stk_t)),
+ .ShadowCopy(1'b0),
+ .ResetValue({MSTACK_RESET_VAL})
+ ) u_mstack_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i ({mstack_d}),
+ .wr_en_i (mstack_en),
+ .rd_data_o (mstack_q),
+ .rd_error_o()
+ );
+
+ // MSTACK_EPC
+ cheriot_csr #(
+ .Width (32),
+ .ShadowCopy(1'b0),
+ .ResetValue('0)
+ ) u_mstack_epc_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (mstack_epc_d),
+ .wr_en_i (mstack_en),
+ .rd_data_o (mstack_epc_q),
+ .rd_error_o()
+ );
+
+ // MSTACK_CAUSE
+ cheriot_csr #(
+ .Width (6),
+ .ShadowCopy(1'b0),
+ .ResetValue('0)
+ ) u_mstack_cause_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (mstack_cause_d),
+ .wr_en_i (mstack_en),
+ .rd_data_o (mstack_cause_q),
+ .rd_error_o()
+ );
+
+ // MSHWM and HSHWMB
+ logic mshwm_en_combi;
+ assign mshwm_en_combi = mshwm_en | csr_mshwm_set_i;
+ assign mshwm_d = csr_mshwm_set_i ? csr_mshwm_new_i : {csr_wdata_int[31:4], 4'h0};
+
+ if (CHERIoTEn) begin: g_mshwm
+ cheriot_csr #(
+ .Width (32),
+ .ShadowCopy(ShadowCSR),
+ .ResetValue(32'd0)
+ ) u_mshwm_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (mshwm_d),
+ .wr_en_i (mshwm_en_combi),
+ .rd_data_o (mshwm_q),
+ .rd_error_o()
+ );
+
+ cheriot_csr #(
+ .Width (32),
+ .ShadowCopy(ShadowCSR),
+ .ResetValue(32'd0)
+ ) u_mshwmb_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i ({csr_wdata_int[31:4], 4'h0}),
+ .wr_en_i (mshwmb_en),
+ .rd_data_o (mshwmb_q),
+ .rd_error_o()
+ );
+
+ // cheri debug feature control
+ cheriot_csr #(
+ .Width (32),
+ .ShadowCopy(ShadowCSR),
+ .ResetValue(32'd0)
+ ) u_cdbg_ctrl_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i ({31'h0, csr_wdata_int[0]}),
+ .wr_en_i (cdbg_ctrl_en),
+ .rd_data_o (cdbg_ctrl_q),
+ .rd_error_o()
+ );
+
+ assign csr_dbg_tclr_fault_o = cdbg_ctrl_q[0];
+
+ end else begin
+ assign mshwm_q = 32'h0;
+ assign mshwmb_q = 32'h0;
+
+ assign csr_dbg_tclr_fault_o = 1'b0;
+ end
+
+ // -----------------
+ // PMP registers
+ // -----------------
+
+ if (PMPEnable) begin : g_pmp_registers
+ // PMP reset values
+ `ifdef CHERIOT_CUSTOM_PMP_RESET_VALUES
+ `include "cheriot_pmp_reset.svh"
+ `else
+ `include "cheriot_pmp_reset_default.svh"
+ `endif
+
+ pmp_mseccfg_t pmp_mseccfg_q, pmp_mseccfg_d;
+ logic pmp_mseccfg_we;
+ logic pmp_mseccfg_err;
+ pmp_cfg_t pmp_cfg [PMPNumRegions];
+ logic [PMPNumRegions-1:0] pmp_cfg_locked;
+ pmp_cfg_t pmp_cfg_wdata [PMPNumRegions];
+ logic [PMPAddrWidth-1:0] pmp_addr [PMPNumRegions];
+ logic [PMPNumRegions-1:0] pmp_cfg_we;
+ logic [PMPNumRegions-1:0] pmp_cfg_err;
+ logic [PMPNumRegions-1:0] pmp_addr_we;
+ logic [PMPNumRegions-1:0] pmp_addr_err;
+ logic any_pmp_entry_locked;
+
+ // Expanded / qualified register read data
+ for (genvar i = 0; i < PMP_MAX_REGIONS; i++) begin : g_exp_rd_data
+ if (i < PMPNumRegions) begin : g_implemented_regions
+ // Add in zero padding for reserved fields
+ assign pmp_cfg_rdata[i] = {pmp_cfg[i].lock, 2'b00, pmp_cfg[i].mode,
+ pmp_cfg[i].exec, pmp_cfg[i].write, pmp_cfg[i].read};
+
+ // Address field read data depends on the current programmed mode and the granularity
+ // See RISC-V Privileged Specification, version 1.11, Section 3.6.1
+ if (PMPGranularity == 0) begin : g_pmp_g0
+ // If G == 0, read data is unmodified
+ assign pmp_addr_rdata[i] = pmp_addr[i];
+
+ end else if (PMPGranularity == 1) begin : g_pmp_g1
+ // If G == 1, bit [G-1] reads as zero in TOR or OFF mode
+ always_comb begin
+ pmp_addr_rdata[i] = pmp_addr[i];
+ if ((pmp_cfg[i].mode == PMP_MODE_OFF) || (pmp_cfg[i].mode == PMP_MODE_TOR)) begin
+ pmp_addr_rdata[i][PMPGranularity-1:0] = '0;
+ end
+ end
+
+ end else begin : g_pmp_g2
+ // For G >= 2, bits are masked to one or zero depending on the mode
+ always_comb begin
+ // In NAPOT mode, bits [G-2:0] must read as one
+ pmp_addr_rdata[i] = {pmp_addr[i], {PMPGranularity - 1{1'b1}}};
+
+ if ((pmp_cfg[i].mode == PMP_MODE_OFF) || (pmp_cfg[i].mode == PMP_MODE_TOR)) begin
+ // In TOR or OFF mode, bits [G-1:0] must read as zero
+ pmp_addr_rdata[i][PMPGranularity-1:0] = '0;
+ end
+ end
+ end
+
+ end else begin : g_other_regions
+ // Non-implemented regions read as zero
+ assign pmp_cfg_rdata[i] = '0;
+ assign pmp_addr_rdata[i] = '0;
+ end
+ end
+
+ // Write data calculation
+ for (genvar i = 0; i < PMPNumRegions; i++) begin : g_pmp_csrs
+ // -------------------------
+ // Instantiate cfg registers
+ // -------------------------
+ assign pmp_cfg_we[i] = csr_we_int & ~pmp_cfg_locked[i] &
+ (csr_addr == (CSR_OFF_PMP_CFG + (i[11:0] >> 2)));
+
+ // Select the correct WDATA (each CSR contains 4 CFG fields, each with 2 RES bits)
+ assign pmp_cfg_wdata[i].lock = csr_wdata_int[(i%4)*PMP_CFG_W+7];
+ // NA4 mode is not selectable when G > 0, mode is treated as OFF
+ always_comb begin
+ unique case (csr_wdata_int[(i%4)*PMP_CFG_W+3+:2])
+ 2'b00 : pmp_cfg_wdata[i].mode = PMP_MODE_OFF;
+ 2'b01 : pmp_cfg_wdata[i].mode = PMP_MODE_TOR;
+ 2'b10 : pmp_cfg_wdata[i].mode = (PMPGranularity == 0) ? PMP_MODE_NA4:
+ PMP_MODE_OFF;
+ 2'b11 : pmp_cfg_wdata[i].mode = PMP_MODE_NAPOT;
+ default : pmp_cfg_wdata[i].mode = PMP_MODE_OFF;
+ endcase
+ end
+ assign pmp_cfg_wdata[i].exec = csr_wdata_int[(i%4)*PMP_CFG_W+2];
+ // When MSECCFG.MML is unset, W = 1, R = 0 is a reserved combination, so force W to 0 if R ==
+ // 0. Otherwise allow all possible values to be written.
+ assign pmp_cfg_wdata[i].write = pmp_mseccfg_q.mml ? csr_wdata_int[(i%4)*PMP_CFG_W+1] :
+ &csr_wdata_int[(i%4)*PMP_CFG_W+:2];
+ assign pmp_cfg_wdata[i].read = csr_wdata_int[(i%4)*PMP_CFG_W];
+
+ cheriot_csr #(
+ .Width ($bits(pmp_cfg_t)),
+ .ShadowCopy(ShadowCSR),
+ .ResetValue(pmp_cfg_rst[i])
+ ) u_pmp_cfg_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i ({pmp_cfg_wdata[i]}),
+ .wr_en_i (pmp_cfg_we[i]),
+ .rd_data_o (pmp_cfg[i]),
+ .rd_error_o(pmp_cfg_err[i])
+ );
+
+ // MSECCFG.RLB allows the lock bit to be bypassed (allowing cfg writes when MSECCFG.RLB is
+ // set).
+ assign pmp_cfg_locked[i] = pmp_cfg[i].lock & ~pmp_mseccfg_q.rlb;
+
+ // --------------------------
+ // Instantiate addr registers
+ // --------------------------
+ if (i < PMPNumRegions - 1) begin : g_lower
+ assign pmp_addr_we[i] = csr_we_int & ~pmp_cfg_locked[i] &
+ (~pmp_cfg_locked[i+1] | (pmp_cfg[i+1].mode != PMP_MODE_TOR)) &
+ (csr_addr == (CSR_OFF_PMP_ADDR + i[11:0]));
+ end else begin : g_upper
+ assign pmp_addr_we[i] = csr_we_int & ~pmp_cfg_locked[i] &
+ (csr_addr == (CSR_OFF_PMP_ADDR + i[11:0]));
+ end
+
+ cheriot_csr #(
+ .Width (PMPAddrWidth),
+ .ShadowCopy(ShadowCSR),
+ .ResetValue(pmp_addr_rst[i][33-:PMPAddrWidth])
+ ) u_pmp_addr_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (csr_wdata_int[31-:PMPAddrWidth]),
+ .wr_en_i (pmp_addr_we[i]),
+ .rd_data_o (pmp_addr[i]),
+ .rd_error_o(pmp_addr_err[i])
+ );
+
+ `ASSERT_INIT(PMPAddrRstLowBitsZero_A, pmp_addr_rst[i][33-PMPAddrWidth:0] == '0)
+
+ assign csr_pmp_cfg_o[i] = pmp_cfg[i];
+ assign csr_pmp_addr_o[i] = {pmp_addr_rdata[i], 2'b00};
+ end
+
+ assign pmp_mseccfg_we = csr_we_int & (csr_addr == CSR_MSECCFG);
+
+ // MSECCFG.MML/MSECCFG.MMWP cannot be unset once set
+ assign pmp_mseccfg_d.mml = pmp_mseccfg_q.mml ? 1'b1 : csr_wdata_int[CSR_MSECCFG_MML_BIT];
+ assign pmp_mseccfg_d.mmwp = pmp_mseccfg_q.mmwp ? 1'b1 : csr_wdata_int[CSR_MSECCFG_MMWP_BIT];
+
+ // pmp_cfg_locked factors in MSECCFG.RLB so any_pmp_entry_locked will only be set if MSECCFG.RLB
+ // is unset
+ assign any_pmp_entry_locked = |pmp_cfg_locked;
+
+ // When any PMP entry is locked (A PMP entry has the L bit set and MSECCFG.RLB is unset),
+ // MSECCFG.RLB cannot be set again
+ assign pmp_mseccfg_d.rlb = any_pmp_entry_locked ? 1'b0 : csr_wdata_int[CSR_MSECCFG_RLB_BIT];
+
+ cheriot_csr #(
+ .Width ($bits(pmp_mseccfg_t)),
+ .ShadowCopy(ShadowCSR),
+ .ResetValue(pmp_mseccfg_rst)
+ ) u_pmp_mseccfg (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (pmp_mseccfg_d),
+ .wr_en_i (pmp_mseccfg_we),
+ .rd_data_o (pmp_mseccfg_q),
+ .rd_error_o(pmp_mseccfg_err)
+ );
+
+ assign pmp_csr_err = (|pmp_cfg_err) | (|pmp_addr_err) | pmp_mseccfg_err;
+ assign pmp_mseccfg = pmp_mseccfg_q;
+
+ end else begin : g_no_pmp_tieoffs
+ // Generate tieoffs when PMP is not configured
+ for (genvar i = 0; i < PMP_MAX_REGIONS; i++) begin : g_rdata
+ assign pmp_addr_rdata[i] = '0;
+ assign pmp_cfg_rdata[i] = '0;
+ end
+ for (genvar i = 0; i < PMPNumRegions; i++) begin : g_outputs
+ assign csr_pmp_cfg_o[i] = pmp_cfg_t'(1'b0);
+ assign csr_pmp_addr_o[i] = '0;
+ end
+ assign pmp_csr_err = 1'b0;
+ assign pmp_mseccfg = '0;
+ end
+
+ assign csr_pmp_mseccfg_o = pmp_mseccfg;
+
+ //////////////////////////
+ // Performance monitor //
+ //////////////////////////
+
+ // update enable signals
+ always_comb begin : mcountinhibit_update
+ if (mcountinhibit_we == 1'b1) begin
+ // bit 1 must always be 0
+ mcountinhibit_d = {csr_wdata_int[MHPMCounterNum+2:2], 1'b0, csr_wdata_int[0]};
+ end else begin
+ mcountinhibit_d = mcountinhibit_q;
+ end
+ end
+
+ // event selection (hardwired) & control
+ always_comb begin : gen_mhpmcounter_incr
+
+ // Assign inactive counters (first to prevent latch inference)
+ for (int unsigned i = 0; i < 32; i++) begin : gen_mhpmcounter_incr_inactive
+ mhpmcounter_incr[i] = 1'b0;
+ end
+
+ // When adding or altering performance counter meanings and default
+ // mappings please update dv/verilator/pcount/cpp/ibex_pcounts.cc
+ // appropriately.
+ //
+ // active counters
+ mhpmcounter_incr[0] = 1'b1; // mcycle
+ mhpmcounter_incr[1] = 1'b0; // reserved
+ mhpmcounter_incr[2] = instr_ret_i; // minstret
+ mhpmcounter_incr[3] = dside_wait_i; // cycles waiting for data memory
+ mhpmcounter_incr[4] = iside_wait_i; // cycles waiting for instr fetches
+ mhpmcounter_incr[5] = mem_load_i; // num of loads
+ mhpmcounter_incr[6] = mem_store_i; // num of stores
+ mhpmcounter_incr[7] = jump_i; // num of jumps (unconditional)
+ mhpmcounter_incr[8] = branch_i; // num of branches (conditional)
+ mhpmcounter_incr[9] = branch_taken_i; // num of taken branches (conditional)
+ mhpmcounter_incr[10] = instr_ret_compressed_i; // num of compressed instr
+ mhpmcounter_incr[11] = mul_wait_i; // cycles waiting for multiply
+ mhpmcounter_incr[12] = div_wait_i; // cycles waiting for divide
+ end
+
+ // event selector (hardwired, 0 means no event)
+ always_comb begin : gen_mhpmevent
+
+ // activate all
+ for (int i = 0; i < 32; i++) begin : gen_mhpmevent_active
+ mhpmevent[i] = '0;
+ mhpmevent[i][i] = 1'b1;
+ end
+
+ // deactivate
+ mhpmevent[1] = '0; // not existing, reserved
+ for (int unsigned i = 3 + MHPMCounterNum; i < 32; i++) begin : gen_mhpmevent_inactive
+ mhpmevent[i] = '0;
+ end
+ end
+
+ // mcycle
+ cheriot_counter #(
+ .CounterWidth(64)
+ ) mcycle_counter_i (
+ .clk_i(clk_i),
+ .rst_ni(rst_ni),
+ .counter_inc_i(mhpmcounter_incr[0] & ~mcountinhibit[0]),
+ .counterh_we_i(mhpmcounterh_we[0]),
+ .counter_we_i(mhpmcounter_we[0]),
+ .counter_val_i(csr_wdata_int),
+ .counter_val_o(mhpmcounter[0]),
+ .counter_val_upd_o()
+ );
+
+
+ // minstret
+ cheriot_counter #(
+ .CounterWidth(64),
+ .ProvideValUpd(1)
+ ) minstret_counter_i (
+ .clk_i(clk_i),
+ .rst_ni(rst_ni),
+ .counter_inc_i(mhpmcounter_incr[2] & ~mcountinhibit[2]),
+ .counterh_we_i(mhpmcounterh_we[2]),
+ .counter_we_i(mhpmcounter_we[2]),
+ .counter_val_i(csr_wdata_int),
+ .counter_val_o(minstret_raw),
+ .counter_val_upd_o(minstret_next)
+ );
+
+ // Where the writeback stage is present instruction in ID observing value of minstret must take
+ // into account any instruction in the writeback stage. If one is present the incremented value of
+ // minstret is used. A speculative version of the signal is used to aid timing. When the writeback
+ // stage sees an exception (so the speculative signal is incorrect) the ID stage will be flushed
+ // so the incorrect value doesn't matter. A similar behaviour is required for the compressed
+ // instruction retired counter below. When the writeback stage isn't present the speculative
+ // signals are always 0.
+ assign mhpmcounter[2] = instr_ret_spec_i & ~mcountinhibit[2] ? minstret_next : minstret_raw;
+
+ // reserved:
+ assign mhpmcounter[1] = '0;
+ assign unused_mhpmcounter_we_1 = mhpmcounter_we[1];
+ assign unused_mhpmcounterh_we_1 = mhpmcounterh_we[1];
+ assign unused_mhpmcounter_incr_1 = mhpmcounter_incr[1];
+
+ // Iterate through optionally included counters (MHPMCounterNum controls how many are included)
+ for (genvar i = 0; i < 29; i++) begin : gen_cntrs
+ localparam int Cnt = i + 3;
+
+ if (i < MHPMCounterNum) begin : gen_imp
+ logic [63:0] mhpmcounter_raw, mhpmcounter_next;
+
+ cheriot_counter #(
+ .CounterWidth(MHPMCounterWidth),
+ .ProvideValUpd(Cnt == 10)
+ ) mcounters_variable_i (
+ .clk_i(clk_i),
+ .rst_ni(rst_ni),
+ .counter_inc_i(mhpmcounter_incr[Cnt] & ~mcountinhibit[Cnt]),
+ .counterh_we_i(mhpmcounterh_we[Cnt]),
+ .counter_we_i(mhpmcounter_we[Cnt]),
+ .counter_val_i(csr_wdata_int),
+ .counter_val_o(mhpmcounter_raw),
+ .counter_val_upd_o(mhpmcounter_next)
+ );
+
+ if (Cnt == 10) begin : gen_compressed_instr_cnt
+ // Special behaviour for reading compressed instruction retired counter, see comment on
+ // `mhpmcounter[2]` above for further information.
+ assign mhpmcounter[Cnt] =
+ instr_ret_compressed_spec_i & ~mcountinhibit[Cnt] ? mhpmcounter_next:
+ mhpmcounter_raw;
+ end else begin : gen_other_cnts
+ logic [63:0] unused_mhpmcounter_next;
+ // All other counters just see the raw counter value directly.
+ assign mhpmcounter[Cnt] = mhpmcounter_raw;
+ assign unused_mhpmcounter_next = mhpmcounter_next;
+ end
+ end else begin : gen_unimp
+ assign mhpmcounter[Cnt] = '0;
+
+ if (Cnt == 10) begin : gen_no_compressed_instr_cnt
+ logic unused_instr_ret_compressed_spec_i;
+ assign unused_instr_ret_compressed_spec_i = instr_ret_compressed_spec_i;
+ end
+ end
+ end
+
+ if (MHPMCounterNum < 29) begin : g_mcountinhibit_reduced
+ logic [29-MHPMCounterNum-1:0] unused_mhphcounter_we;
+ logic [29-MHPMCounterNum-1:0] unused_mhphcounterh_we;
+ logic [29-MHPMCounterNum-1:0] unused_mhphcounter_incr;
+
+ assign mcountinhibit = {{29 - MHPMCounterNum{1'b1}}, mcountinhibit_q};
+ // Lint tieoffs for unused bits
+ assign unused_mhphcounter_we = mhpmcounter_we[31:MHPMCounterNum+3];
+ assign unused_mhphcounterh_we = mhpmcounterh_we[31:MHPMCounterNum+3];
+ assign unused_mhphcounter_incr = mhpmcounter_incr[31:MHPMCounterNum+3];
+ end else begin : g_mcountinhibit_full
+ assign mcountinhibit = mcountinhibit_q;
+ end
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ mcountinhibit_q <= '0;
+ end else begin
+ mcountinhibit_q <= mcountinhibit_d;
+ end
+ end
+
+ /////////////////////////////
+ // Debug trigger registers //
+ /////////////////////////////
+
+ if (DbgTriggerEn) begin : gen_trigger_regs
+ localparam int unsigned DbgHwNumLen = DbgHwBreakNum > 1 ? $clog2(DbgHwBreakNum) : 1;
+ localparam int unsigned MaxTselect = DbgHwBreakNum - 1;
+
+ // Register values
+ logic [DbgHwNumLen-1:0] tselect_d, tselect_q;
+ logic tmatch_control_d;
+ logic [DbgHwBreakNum-1:0] tmatch_control_q;
+ logic [31:0] tmatch_value_d;
+ logic [31:0] tmatch_value_q[DbgHwBreakNum];
+ logic selected_tmatch_control;
+ logic [31:0] selected_tmatch_value;
+
+ // Write enables
+ logic tselect_we;
+ logic [DbgHwBreakNum-1:0] tmatch_control_we;
+ logic [DbgHwBreakNum-1:0] tmatch_value_we;
+ // Trigger comparison result
+ logic [DbgHwBreakNum-1:0] trigger_match;
+
+ // Write select
+ assign tselect_we = csr_we_int & debug_mode_i & (csr_addr_i == CSR_TSELECT);
+ for (genvar i = 0; i < DbgHwBreakNum; i++) begin : g_dbg_tmatch_we
+ assign tmatch_control_we[i] = (i[DbgHwNumLen-1:0] == tselect_q) & csr_we_int & debug_mode_i &
+ (csr_addr_i == CSR_TDATA1);
+ assign tmatch_value_we[i] = (i[DbgHwNumLen-1:0] == tselect_q) & csr_we_int & debug_mode_i &
+ (csr_addr_i == CSR_TDATA2);
+ end
+
+ // Debug interface tests the available number of triggers by writing and reading the trigger
+ // select register. Only allow changes to the register if it is within the supported region.
+ assign tselect_d = (csr_wdata_int < DbgHwBreakNum) ? csr_wdata_int[DbgHwNumLen-1:0] :
+ MaxTselect[DbgHwNumLen-1:0];
+
+ // tmatch_control is enabled when the execute bit is set
+ assign tmatch_control_d = csr_wdata_int[2];
+ assign tmatch_value_d = csr_wdata_int[31:0];
+
+ // Registers
+ cheriot_csr #(
+ .Width (DbgHwNumLen),
+ .ShadowCopy(1'b0),
+ .ResetValue('0)
+ ) u_tselect_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (tselect_d),
+ .wr_en_i (tselect_we),
+ .rd_data_o (tselect_q),
+ .rd_error_o()
+ );
+
+ for (genvar i = 0; i < DbgHwBreakNum; i++) begin : g_dbg_tmatch_reg
+ cheriot_csr #(
+ .Width (1),
+ .ShadowCopy(1'b0),
+ .ResetValue('0)
+ ) u_tmatch_control_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (tmatch_control_d),
+ .wr_en_i (tmatch_control_we[i]),
+ .rd_data_o (tmatch_control_q[i]),
+ .rd_error_o()
+ );
+
+ cheriot_csr #(
+ .Width (32),
+ .ShadowCopy(1'b0),
+ .ResetValue('0)
+ ) u_tmatch_value_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i (tmatch_value_d),
+ .wr_en_i (tmatch_value_we[i]),
+ .rd_data_o (tmatch_value_q[i]),
+ .rd_error_o()
+ );
+ end
+
+ // Assign read data
+ // TSELECT - number of supported triggers defined by parameter DbgHwBreakNum
+ localparam int unsigned TSelectRdataPadlen = DbgHwNumLen >= 32 ? 0 : (32 - DbgHwNumLen);
+ assign tselect_rdata = {{TSelectRdataPadlen{1'b0}}, tselect_q};
+
+ if (DbgHwBreakNum > 1) begin : g_dbg_tmatch_multiple_select
+ assign selected_tmatch_control = tmatch_control_q[tselect_q];
+ assign selected_tmatch_value = tmatch_value_q[tselect_q];
+ end else begin : g_dbg_tmatch_single_select
+ assign selected_tmatch_control = tmatch_control_q[0];
+ assign selected_tmatch_value = tmatch_value_q[0];
+ end
+
+ // TDATA0 - only support simple address matching
+ assign tmatch_control_rdata = {4'h2, // type : address/data match
+ 1'b1, // dmode : access from D mode only
+ 6'h00, // maskmax : exact match only
+ 1'b0, // hit : not supported
+ 1'b0, // select : address match only
+ 1'b0, // timing : match before execution
+ 2'b00, // sizelo : match any access
+ 4'h1, // action : enter debug mode
+ 1'b0, // chain : not supported
+ 4'h0, // match : simple match
+ 1'b1, // m : match in m-mode
+ 1'b0, // 0 : zero
+ 1'b0, // s : not supported
+ 1'b1, // u : match in u-mode
+ selected_tmatch_control, // execute : match instruction address
+ 1'b0, // store : not supported
+ 1'b0}; // load : not supported
+
+ // TDATA1 - address match value only
+ assign tmatch_value_rdata = selected_tmatch_value;
+
+ // Breakpoint matching
+ // We match against the next address, as the breakpoint must be taken before execution
+ for (genvar i = 0; i < DbgHwBreakNum; i++) begin : g_dbg_trigger_match
+ assign trigger_match[i] = tmatch_control_q[i] & (pc_if_i[31:0] == tmatch_value_q[i]);
+ end
+ assign trigger_match_o = |trigger_match;
+
+ end else begin : gen_no_trigger_regs
+ assign tselect_rdata = 'b0;
+ assign tmatch_control_rdata = 'b0;
+ assign tmatch_value_rdata = 'b0;
+ assign trigger_match_o = 'b0;
+ end
+
+ //////////////////////////
+ // CPU control register //
+ //////////////////////////
+
+ // Cast register write data
+ assign cpuctrl_wdata_raw = cpu_ctrl_t'(csr_wdata_int[$bits(cpu_ctrl_t)-1:0]);
+
+ // Generate fixed time execution bit
+ if (DataIndTiming) begin : gen_dit
+ assign cpuctrl_wdata.data_ind_timing = cpuctrl_wdata_raw.data_ind_timing;
+
+ end else begin : gen_no_dit
+ // tieoff for the unused bit
+ logic unused_dit;
+ assign unused_dit = cpuctrl_wdata_raw.data_ind_timing;
+
+ // field will always read as zero if not configured
+ assign cpuctrl_wdata.data_ind_timing = 1'b0;
+ end
+
+ assign data_ind_timing_o = cpuctrl_q.data_ind_timing;
+
+ // Generate dummy instruction signals
+ if (DummyInstructions) begin : gen_dummy
+ assign cpuctrl_wdata.dummy_instr_en = cpuctrl_wdata_raw.dummy_instr_en;
+ assign cpuctrl_wdata.dummy_instr_mask = cpuctrl_wdata_raw.dummy_instr_mask;
+
+ // Signal a write to the seed register
+ assign dummy_instr_seed_en_o = csr_we_int && (csr_addr == CSR_SECURESEED);
+ assign dummy_instr_seed_o = csr_wdata_int;
+
+ end else begin : gen_no_dummy
+ // tieoff for the unused bit
+ logic unused_dummy_en;
+ logic [2:0] unused_dummy_mask;
+ assign unused_dummy_en = cpuctrl_wdata_raw.dummy_instr_en;
+ assign unused_dummy_mask = cpuctrl_wdata_raw.dummy_instr_mask;
+
+ // field will always read as zero if not configured
+ assign cpuctrl_wdata.dummy_instr_en = 1'b0;
+ assign cpuctrl_wdata.dummy_instr_mask = 3'b000;
+ assign dummy_instr_seed_en_o = 1'b0;
+ assign dummy_instr_seed_o = '0;
+ end
+
+ assign dummy_instr_en_o = cpuctrl_q.dummy_instr_en;
+ assign dummy_instr_mask_o = cpuctrl_q.dummy_instr_mask;
+
+ // Generate icache enable bit
+ if (ICache) begin : gen_icache_enable
+ assign cpuctrl_wdata.icache_enable = cpuctrl_wdata_raw.icache_enable;
+ end else begin : gen_no_icache
+ // tieoff for the unused icen bit
+ logic unused_icen;
+ assign unused_icen = cpuctrl_wdata_raw.icache_enable;
+
+ // icen field will always read as zero if ICache not configured
+ assign cpuctrl_wdata.icache_enable = 1'b0;
+ end
+
+ assign cpuctrl_wdata.double_fault_seen = cpuctrl_wdata_raw.double_fault_seen;
+ assign cpuctrl_wdata.sync_exc_seen = cpuctrl_wdata_raw.sync_exc_seen;
+
+ assign icache_enable_o = cpuctrl_q.icache_enable;
+
+ cheriot_csr #(
+ .Width ($bits(cpu_ctrl_t)),
+ .ShadowCopy(ShadowCSR),
+ .ResetValue('0)
+ ) u_cpuctrl_csr (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .wr_data_i ({cpuctrl_d}),
+ .wr_en_i (cpuctrl_we),
+ .rd_data_o (cpuctrl_q),
+ .rd_error_o(cpuctrl_err)
+ );
+
+ assign csr_shadow_err_o = mstatus_err | mtvec_err | pmp_csr_err | cpuctrl_err;
+
+ ////////////////
+ // Assertions //
+ ////////////////
+
+ `ASSERT(IbexCsrOpEnRequiresAccess, csr_op_en_i |-> csr_access_i)
+
+ //////////////////////
+ // Cheriot SCR's
+ //////////////////////
+
+ if (CHERIoTEn) begin: gen_scr
+ reg_cap_t pcc_exc_cap;
+ reg_cap_t mtdc_cap;
+ logic [31:0] mtdc_data;
+ reg_cap_t mscratchc_cap;
+ logic [31:0] mscratchc_data; // note this is separate from legacy mscratch
+
+
+ logic mtdc_en_cheri, mscratchc_en_cheri;
+
+ always_comb begin
+ case (cheri_csr_addr_i)
+ CHERI_SCR_DEPCC:
+ begin
+ cheri_csr_rdata_o = debug_mode_i ? depc_q : 0;
+ cheri_csr_rcap_o = debug_mode_i ? depc_cap : NULL_REG_CAP;
+ end
+ CHERI_SCR_DSCRATCHC0:
+ begin
+ cheri_csr_rdata_o = debug_mode_i ? dscratch0_q : 0;
+ cheri_csr_rcap_o = debug_mode_i ? dscratch0_cap : NULL_REG_CAP;
+ end
+ CHERI_SCR_DSCRATCHC1:
+ begin
+ cheri_csr_rdata_o = debug_mode_i ? dscratch1_q : 0;
+ cheri_csr_rcap_o = debug_mode_i ? dscratch1_cap : NULL_REG_CAP;
+ end
+ CHERI_SCR_MTCC:
+ begin
+ cheri_csr_rdata_o = mtvec_q;
+ cheri_csr_rcap_o = mtvec_cap;
+ end
+ CHERI_SCR_MTDC:
+ begin
+ cheri_csr_rdata_o = mtdc_data;
+ cheri_csr_rcap_o = mtdc_cap;
+ end
+ CHERI_SCR_MSCRATCHC:
+ begin
+ cheri_csr_rdata_o = mscratchc_data;
+ cheri_csr_rcap_o = mscratchc_cap;
+ end
+ CHERI_SCR_MEPCC:
+ begin
+ cheri_csr_rdata_o = mepc_q;
+ cheri_csr_rcap_o = mepc_cap;
+ end
+ default:
+ begin
+ cheri_csr_rdata_o = 32'h0;
+ cheri_csr_rcap_o = NULL_REG_CAP;
+ end
+ endcase
+ end
+
+ assign pcc_cap_o = pcc_cap_q;
+
+ assign pcc_exc_cap = pcc2mepcc(pcc_cap_q, exception_pc, csr_mepcc_clrtag_i);
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ pcc_cap_q <= PCC_RESET_CAP;
+ end else begin
+ pcc_cap_q <= pcc_cap_d;
+ end
+ end
+
+ // PCC updating
+ // -- PC address range checking is always against the pcc_cap, which is only updated with
+ // CHER CJALR or exceptions. Legacy RV32 jumps/branches can change PC but not the PCC
+ // bounds/perms, so they are still limited by the orginal bounds in IF stage checking
+ always_comb begin
+ full_cap_t tf_cap;
+ reg_cap_t tr_cap;
+ logic [31:0] tr_addr;
+
+ if (csr_save_cause_i) begin // Exception cases
+ tr_cap = mtvec_cap;
+ tr_addr = mtvec_q;
+ end else if (csr_restore_mret_i) begin
+ tr_cap = mepc_cap;
+ tr_addr = mepc_q;
+ end else if (csr_restore_dret_i & debug_mode_i) begin
+ tr_cap = depc_cap;
+ tr_addr = depc_q;
+ end else begin
+ tr_cap = NULL_REG_CAP;
+ tr_addr = 32'h0;
+ end
+
+ tf_cap = reg2fullcap(tr_cap, tr_addr);
+
+ // Exception cases
+ if (csr_save_cause_i | csr_restore_mret_i | (csr_restore_dret_i & debug_mode_i)) begin
+ pcc_cap_d = full2pcap(tf_cap);
+ end else if (cheri_branch_req_i) begin
+ pcc_cap_d = pcc_cap_i;
+ end else begin
+ pcc_cap_d = pcc_cap_q;
+ end
+ end
+
+ // mtvec extended capability
+ assign mtvec_en_cheri = cheri_csr_op_en_i && (cheri_csr_addr_i == CHERI_SCR_MTCC) && (cheri_csr_op_i == CHERI_CSR_RW);
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni)
+ mtvec_cap <= MTVEC_RESET_CAP;
+ else if (mtvec_en_cheri)
+ mtvec_cap <= cheri_csr_wcap_i;
+ end
+
+ // mepc extended capability
+ assign mepc_en_cheri = cheri_csr_op_en_i && (cheri_csr_addr_i == CHERI_SCR_MEPCC) && (cheri_csr_op_i == CHERI_CSR_RW);
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni)
+ mepc_cap <= MEPC_RESET_CAP;
+ else if (csr_save_cause_i & (~debug_csr_save_i) & (~debug_mode_i))
+ mepc_cap <= pcc_exc_cap;
+ else if (cheri_pmode_i & mepc_en) // legacy cssrw; NMI recover
+ mepc_cap <= NULL_REG_CAP;
+ else if (mepc_en_cheri)
+ mepc_cap <= cheri_csr_wcap_i;
+ end
+
+ // MTDC capability
+ assign mtdc_en_cheri = cheri_csr_op_en_i && (cheri_csr_addr_i == CHERI_SCR_MTDC) && (cheri_csr_op_i == CHERI_CSR_RW);
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ mtdc_cap <= MTDC_RESET_CAP;
+ mtdc_data <= 32'h0;
+ end else if (mtdc_en_cheri) begin
+ mtdc_cap <= cheri_csr_wcap_i;
+ mtdc_data <= cheri_csr_wdata_i;
+ end
+ end
+
+ // MSCRATCHC capability
+ assign mscratchc_en_cheri = cheri_csr_op_en_i && (cheri_csr_addr_i == CHERI_SCR_MSCRATCHC) && (cheri_csr_op_i == CHERI_CSR_RW);
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ mscratchc_cap <= MSCRATCHC_RESET_CAP;
+ mscratchc_data <= 32'h0;
+ end else if (mscratchc_en_cheri) begin
+ mscratchc_cap <= cheri_csr_wcap_i;
+ mscratchc_data <= cheri_csr_wdata_i;
+ end
+ end
+
+ // depc extended capability
+ assign depc_en_cheri = debug_mode_i & cheri_csr_op_en_i && (cheri_csr_addr_i == CHERI_SCR_DEPCC) && (cheri_csr_op_i == CHERI_CSR_RW);
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni)
+ depc_cap <= NULL_REG_CAP;
+ else if (csr_save_cause_i & debug_csr_save_i)
+ depc_cap <= pcc_exc_cap;
+ else if (depc_en_cheri)
+ depc_cap <= cheri_csr_wcap_i;
+ end
+
+ // dscratch0/1 extended capability
+ assign dscratch0_en_cheri = debug_mode_i & cheri_csr_op_en_i && (cheri_csr_addr_i == CHERI_SCR_DSCRATCHC0) && (cheri_csr_op_i == CHERI_CSR_RW);
+ assign dscratch1_en_cheri = debug_mode_i & cheri_csr_op_en_i && (cheri_csr_addr_i == CHERI_SCR_DSCRATCHC1) && (cheri_csr_op_i == CHERI_CSR_RW);
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ dscratch0_cap <= NULL_REG_CAP;
+ dscratch1_cap <= NULL_REG_CAP;
+ end else if (dscratch0_en_cheri)
+ dscratch0_cap <= cheri_csr_wcap_i;
+ else if (dscratch1_en_cheri)
+ dscratch1_cap <= cheri_csr_wcap_i;
+
+ end
+
+ // fatal error condition (unrecoverable, need external reset)
+ // exception with invalid mepcc
+ logic cheri_fatal_err_q;
+
+ assign cheri_fatal_err_o = cheri_fatal_err_q;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ cheri_fatal_err_q <= 1'b0;
+ end else begin
+ if (cheri_pmode_i & csr_save_cause_i & ~mtvec_cap.valid)
+ cheri_fatal_err_q <= 1'b1;
+ end
+ end
+
+
+ end else begin: gen_no_scr
+
+ assign cheri_csr_rdata_o = 32'h0;
+ assign cheri_csr_rcap_o = NULL_REG_CAP;
+
+ assign pcc_cap_o = NULL_PCC_CAP;
+ assign pcc_cap_q = NULL_PCC_CAP;
+
+ assign mtvec_en_cheri = 1'b0;
+ assign mepc_en_cheri = 1'b0;
+ assign depc_en_cheri = 1'b0;
+ assign dscratch0_en_cheri = 1'b0;
+ assign dscratch1_en_cheri = 1'b0;
+
+ assign cheri_fatal_err_o = 1'b0;
+
+ end
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_csr.sv b/hw/ip/cheriot-ibex/rtl/cheriot_csr.sv
new file mode 100644
index 0000000..9dbe1b6
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_csr.sv
@@ -0,0 +1,57 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Control / status register primitive
+ */
+
+`include "prim_assert.sv"
+
+module cheriot_csr #(
+ parameter int unsigned Width = 32,
+ parameter bit ShadowCopy = 1'b0,
+ parameter bit [Width-1:0] ResetValue = '0
+ ) (
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic [Width-1:0] wr_data_i,
+ input logic wr_en_i,
+ output logic [Width-1:0] rd_data_o,
+
+ output logic rd_error_o
+);
+
+ logic [Width-1:0] rdata_q;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rdata_q <= ResetValue;
+ end else if (wr_en_i) begin
+ rdata_q <= wr_data_i;
+ end
+ end
+
+ assign rd_data_o = rdata_q;
+
+ if (ShadowCopy) begin : gen_shadow
+ logic [Width-1:0] shadow_q;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ shadow_q <= ~ResetValue;
+ end else if (wr_en_i) begin
+ shadow_q <= ~wr_data_i;
+ end
+ end
+
+ assign rd_error_o = rdata_q != ~shadow_q;
+
+ end else begin : gen_no_shadow
+ assign rd_error_o = 1'b0;
+ end
+
+ `ASSERT_KNOWN(IbexCSREnValid, wr_en_i)
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_decoder.sv b/hw/ip/cheriot-ibex/rtl/cheriot_decoder.sv
new file mode 100644
index 0000000..8b0fcdb
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_decoder.sv
@@ -0,0 +1,1432 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+
+/**
+ * Instruction decoder
+ *
+ * This module is fully combinatorial, clock and reset are used for
+ * assertions only.
+ */
+
+`include "prim_assert.sv"
+
+module cheriot_decoder import cheri_pkg::*; #(
+ parameter bit RV32E = 0,
+ parameter cheriot_pkg::rv32m_e RV32M = cheriot_pkg::RV32MFast,
+ parameter cheriot_pkg::rv32b_e RV32B = cheriot_pkg::RV32BNone,
+ parameter bit BranchTargetALU = 0,
+ parameter bit CHERIoTEn = 1'b1,
+ parameter bit CheriPPLBC = 1'b0,
+ parameter bit CheriSBND2 = 1'b0
+) (
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic cheri_pmode_i,
+ input logic cheri_tsafe_en_i,
+
+ // to/from controller
+ output logic illegal_insn_o, // illegal instr encountered
+ output logic ebrk_insn_o, // trap instr encountered
+ output logic mret_insn_o, // return from exception instr
+ // encountered
+ output logic dret_insn_o, // return from debug instr encountered
+ output logic ecall_insn_o, // syscall instr encountered
+ output logic wfi_insn_o, // wait for interrupt instr encountered
+ output logic jump_set_o, // jump taken set signal
+ input logic branch_taken_i, // registered branch decision
+ output logic icache_inval_o,
+
+ // from IF-ID pipeline register
+ input logic instr_first_cycle_i, // instruction read is in its first cycle
+ input logic [31:0] instr_rdata_i, // instruction read from memory/cache
+ input logic [31:0] instr_rdata_alu_i, // instruction read from memory/cache
+ // replicated to ease fan-out)
+
+ input logic illegal_c_insn_i, // compressed instruction decode failed
+
+ // immediates
+ output cheriot_pkg::imm_a_sel_e imm_a_mux_sel_o, // immediate selection for operand a
+ output cheriot_pkg::imm_b_sel_e imm_b_mux_sel_o, // immediate selection for operand b
+ output cheriot_pkg::op_a_sel_e bt_a_mux_sel_o, // branch target selection operand a
+ output cheriot_pkg::imm_b_sel_e bt_b_mux_sel_o, // branch target selection operand b
+ output logic [31:0] imm_i_type_o,
+ output logic [31:0] imm_s_type_o,
+ output logic [31:0] imm_b_type_o,
+ output logic [31:0] imm_u_type_o,
+ output logic [31:0] imm_j_type_o,
+ output logic [31:0] zimm_rs1_type_o,
+
+ // register file
+ output cheriot_pkg::rf_wd_sel_e rf_wdata_sel_o, // RF write data selection
+ output logic rf_we_o, // write enable for regfile
+ output logic rf_we_or_load_o,
+ output logic [4:0] rf_raddr_a_o,
+ output logic [4:0] rf_raddr_b_o,
+ output logic [4:0] rf_waddr_o,
+ output logic rf_ren_a_o, // Instruction reads from RF addr A
+ output logic rf_ren_b_o, // Instruction reads from RF addr B
+
+ // ALU
+ output cheriot_pkg::alu_op_e alu_operator_o, // ALU operation selection
+ output cheriot_pkg::op_a_sel_e alu_op_a_mux_sel_o, // operand a selection: reg value, PC,
+ // immediate or zero
+ output cheriot_pkg::op_b_sel_e alu_op_b_mux_sel_o, // operand b selection: reg value or
+ // immediate
+ output logic alu_multicycle_o, // ternary bitmanip instruction
+
+ // MULT & DIV
+ output logic mult_en_o, // perform integer multiplication
+ output logic div_en_o, // perform integer division or remainder
+ output logic mult_sel_o, // as above but static, for data muxes
+ output logic div_sel_o, // as above but static, for data muxes
+
+ output cheriot_pkg::md_op_e multdiv_operator_o,
+ output logic [1:0] multdiv_signed_mode_o,
+
+ // CSRs
+ output logic csr_access_o, // access to CSR
+ output cheriot_pkg::csr_op_e csr_op_o, // operation to perform on CSR
+ output logic csr_cheri_always_ok_o, // CHERI safe-listed (no ASR needed) CSRs
+
+ // LSU
+ output logic data_req_o, // start transaction to data memory
+ output logic cheri_data_req_o, // cheri lsu transaction
+ output logic data_we_o, // write enable
+ output logic [1:0] data_type_o, // size of transaction: byte, half
+ // word or word
+ output logic data_sign_extension_o, // sign extension for data read from
+ // memory
+
+ // jump/branches
+ output logic jump_in_dec_o, // jump is being calculated in ALU
+ output logic branch_in_dec_o,
+
+ // output to cheri EX
+ output logic instr_is_cheri_o,
+ output logic instr_is_legal_cheri_o,
+ output logic [11:0] cheri_imm12_o,
+ output logic [19:0] cheri_imm20_o,
+ output logic [20:0] cheri_imm21_o,
+ output logic [OPDW-1:0] cheri_operator_o,
+ output logic [4:0] cheri_cs2_dec_o,
+ output logic cheri_multicycle_dec_o
+);
+
+ import cheriot_pkg::*;
+
+ localparam bit CheriLimit16Regs = CHERIoTEn;
+
+ logic illegal_insn;
+ logic illegal_reg_rv32e;
+ logic illegal_reg_cheri;
+ logic csr_illegal;
+ logic rf_we;
+
+ logic [31:0] instr;
+ logic [31:0] instr_alu;
+ logic [9:0] unused_instr_alu;
+ // Source/Destination register instruction index
+ logic [4:0] instr_rs1;
+ logic [4:0] instr_rs2;
+ logic [4:0] instr_rs3;
+ logic [4:0] instr_rd;
+
+ logic use_rs3_d;
+ logic use_rs3_q;
+
+ csr_op_e csr_op;
+
+ opcode_e opcode;
+ opcode_e opcode_alu;
+
+ logic cheri_opcode_en;
+ logic cheri_auipcc_en;
+ logic cheri_auicgp_en;
+ logic cheri_jalr_en;
+ logic cheri_jal_en;
+ logic cheri_cload_en;
+ logic cheri_cstore_en;
+ logic instr_is_legal_cheri;
+ logic cheri_rf_ren_a, cheri_rf_ren_b;
+ logic cheri_rf_we_dec;
+
+ // To help timing the flops containing the current instruction are replicated to reduce fan-out.
+ // instr_alu is used to determine the ALU control logic and associated operand/imm select signals
+ // as the ALU is often on the more critical timing paths. instr is used for everything else.
+ assign instr = instr_rdata_i;
+ assign instr_alu = instr_rdata_alu_i;
+
+ //////////////////////////////////////
+ // Register and immediate selection //
+ //////////////////////////////////////
+
+ // immediate extraction and sign extension
+ assign imm_i_type_o = { {20{instr[31]}}, instr[31:20] };
+ assign imm_s_type_o = { {20{instr[31]}}, instr[31:25], instr[11:7] };
+ assign imm_b_type_o = { {19{instr[31]}}, instr[31], instr[7], instr[30:25], instr[11:8], 1'b0 };
+ assign imm_u_type_o = { instr[31:12], 12'b0 };
+ assign imm_j_type_o = { {12{instr[31]}}, instr[19:12], instr[20], instr[30:21], 1'b0 };
+
+ // immediate for CSR manipulation (zero extended)
+ assign zimm_rs1_type_o = { 27'b0, instr_rs1 }; // rs1
+
+ if (RV32B != RV32BNone) begin : gen_rs3_flop
+ // the use of rs3 is known one cycle ahead.
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ use_rs3_q <= 1'b0;
+ end else begin
+ use_rs3_q <= use_rs3_d;
+ end
+ end
+ end else begin : gen_no_rs3_flop
+ logic unused_clk;
+ logic unused_rst_n;
+
+ // Clock and reset unused when there's no rs3 flop
+ assign unused_clk = clk_i;
+ assign unused_rst_n = rst_ni;
+
+ // always zero
+ assign use_rs3_q = use_rs3_d;
+ end
+
+ // source registers
+ assign instr_rs1 = instr[19:15];
+ assign instr_rs2 = instr[24:20];
+ assign instr_rs3 = instr[31:27];
+
+ // read cx3 if AUICGP
+ // note for GDC (c3) we want to use the regular scheme to resovel data hazards, instead of using
+ // sideband signals to export CX3 from register file directly
+ logic [4:0] raddr_a, raddr_b;
+ assign raddr_a = cheri_auicgp_en ? 5'h3 : ((use_rs3_q & ~instr_first_cycle_i) ? instr_rs3 : instr_rs1); // rs3 / rs1
+ assign raddr_b = instr_rs2; // rs2
+
+ // cheriot only uses 16 registers and repurposes the MSB addr bits
+ if (CheriLimit16Regs) begin
+ assign rf_raddr_a_o = cheri_pmode_i ?{1'b0, raddr_a[3:0]} : raddr_a;
+ assign rf_raddr_b_o = cheri_pmode_i ?{1'b0, raddr_b[3:0]} : raddr_b;
+ end else begin
+ assign rf_raddr_a_o = raddr_a;
+ assign rf_raddr_b_o = raddr_b;
+ end
+
+ // destination register
+ assign instr_rd = instr[11:7];
+ if (CheriLimit16Regs) begin
+ assign rf_waddr_o = cheri_pmode_i ? {1'b0, instr_rd[3:0]} : instr_rd; // rd
+ end else begin
+ assign rf_waddr_o = instr_rd; // rd
+ end
+
+ ////////////////////
+ // Register check //
+ ////////////////////
+
+ // rf_we from decoder doesn't cover memory load case (where regfile write signal comes from LSU response)
+ logic rf_we_or_load;
+ assign rf_we_or_load = rf_we | (opcode == OPCODE_LOAD);
+
+ assign rf_we_or_load_o = rf_we_or_load;
+
+ if (RV32E) begin : gen_rv32e_reg_check_active
+ //assign illegal_reg_rv32e = ((rf_raddr_a_o[4] & (alu_op_a_mux_sel_o == OP_A_REG_A)) |
+ // (rf_raddr_b_o[4] & (alu_op_b_mux_sel_o == OP_B_REG_B)) |
+ assign illegal_reg_rv32e = ((rf_raddr_a_o[4] & rf_ren_a_o) |
+ (rf_raddr_b_o[4] & rf_ren_b_o) |
+ (instr_rs3[4] & use_rs3_d & rf_ren_a_o) |
+ (rf_waddr_o[4] & rf_we_or_load));
+ end else begin : gen_rv32e_reg_check_inactive
+ assign illegal_reg_rv32e = 1'b0;
+ end
+
+ if (CheriLimit16Regs) begin : gen_cheri_reg_check_active
+ assign illegal_reg_cheri = cheri_pmode_i &
+ ((raddr_a[4] & rf_ren_a_o) |
+ (raddr_b[4] & rf_ren_b_o) |
+ (instr_rs3[4] & use_rs3_d & rf_ren_a_o) |
+ (instr_rd[4] & rf_we_or_load ));
+ end else begin : gen_cheri_reg_check_inactive
+ assign illegal_reg_cheri = 1'b0;
+ end
+
+ ///////////////////////
+ // CSR operand check //
+ ///////////////////////
+ always_comb begin : csr_operand_check
+ csr_op_o = csr_op;
+
+ // CSRRSI/CSRRCI must not write 0 to CSRs (uimm[4:0]=='0)
+ // CSRRS/CSRRC must not write from x0 to CSRs (rs1=='0)
+ if ((csr_op == CSR_OP_SET || csr_op == CSR_OP_CLEAR) &&
+ instr_rs1 == '0) begin
+ csr_op_o = CSR_OP_READ;
+ end
+ end
+
+ /////////////
+ // Decoder //
+ /////////////
+
+ always_comb begin
+ jump_in_dec_o = 1'b0;
+ jump_set_o = 1'b0;
+ branch_in_dec_o = 1'b0;
+ icache_inval_o = 1'b0;
+
+ multdiv_operator_o = MD_OP_MULL;
+ multdiv_signed_mode_o = 2'b00;
+
+ rf_wdata_sel_o = RF_WD_EX;
+ rf_we = 1'b0;
+ rf_ren_a_o = 1'b0;
+ rf_ren_b_o = 1'b0;
+
+ csr_access_o = 1'b0;
+ csr_illegal = 1'b0;
+ csr_op = CSR_OP_READ;
+ csr_cheri_always_ok_o = 1'b0;
+
+ data_we_o = 1'b0;
+ data_type_o = 2'b00;
+ data_sign_extension_o = 1'b0;
+ data_req_o = 1'b0;
+ cheri_data_req_o = 1'b0;
+
+ illegal_insn = 1'b0;
+ ebrk_insn_o = 1'b0;
+ mret_insn_o = 1'b0;
+ dret_insn_o = 1'b0;
+ ecall_insn_o = 1'b0;
+ wfi_insn_o = 1'b0;
+
+ cheri_opcode_en = 1'b0;
+ cheri_cload_en = 1'b0;
+ cheri_cstore_en = 1'b0;
+ cheri_auipcc_en = 1'b0;
+ cheri_auicgp_en = 1'b0;
+ cheri_jalr_en = 1'b0;
+ cheri_jal_en = 1'b0;
+
+ opcode = opcode_e'(instr[6:0]);
+
+ unique case (opcode)
+
+ ///////////
+ // Jumps //
+ ///////////
+
+ OPCODE_JAL: begin // Jump and Link
+ if (CHERIoTEn & cheri_pmode_i & ~illegal_c_insn_i) begin
+ // cheri_ex takes over JAL now as a single-cycle jump
+ cheri_jal_en = 1'b1;
+ illegal_insn = 1'b0;
+ rf_we = 1'b1;
+ end else begin
+ jump_in_dec_o = 1'b1;
+
+ if (instr_first_cycle_i) begin
+ // Calculate jump target (and store PC + 4 if BranchTargetALU is configured)
+ rf_we = BranchTargetALU;
+ jump_set_o = 1'b1;
+ end else begin
+ // Calculate and store PC+4
+ rf_we = 1'b1;
+ end
+ end
+ end
+
+ OPCODE_JALR: begin // Jump and Link Register
+ if (CHERIoTEn & cheri_pmode_i & ~illegal_c_insn_i) begin
+ // cheri_ex takes over JALR now as a single-cycle jump
+ cheri_jalr_en = (instr[14:12] == 3'b0);
+ rf_ren_a_o = 1'b1;
+ rf_we = 1'b1;
+
+ if (instr[14:12] != 3'b0) begin
+ illegal_insn = 1'b1;
+ end
+ end else begin
+ jump_in_dec_o = 1'b1;
+
+ if (instr_first_cycle_i) begin
+ // Calculate jump target (and store PC + 4 if BranchTargetALU is configured)
+ rf_we = BranchTargetALU;
+ jump_set_o = 1'b1;
+ end else begin
+ // Calculate and store PC+4
+ rf_we = 1'b1;
+ end
+ if (instr[14:12] != 3'b0) begin
+ illegal_insn = 1'b1;
+ end
+
+ rf_ren_a_o = 1'b1;
+ end
+ end
+
+ OPCODE_BRANCH: begin // Branch
+ branch_in_dec_o = 1'b1;
+ // Check branch condition selection
+ unique case (instr[14:12])
+ 3'b000,
+ 3'b001,
+ 3'b100,
+ 3'b101,
+ 3'b110,
+ 3'b111: illegal_insn = 1'b0;
+ default: illegal_insn = 1'b1;
+ endcase
+
+ rf_ren_a_o = 1'b1;
+ rf_ren_b_o = 1'b1;
+ end
+
+ ////////////////
+ // Load/store //
+ ////////////////
+
+ OPCODE_STORE: begin
+ rf_ren_a_o = 1'b1;
+ rf_ren_b_o = 1'b1;
+ data_req_o = 1'b1; // keep this to pass LEC w/ ibex
+ data_we_o = 1'b1;
+
+ if (instr[14]) begin
+ illegal_insn = 1'b1;
+ end else if (instr[13:12] == 2'b11) begin
+ if (CHERIoTEn & cheri_pmode_i) begin
+ cheri_cstore_en = ~illegal_c_insn_i; // csc
+ cheri_data_req_o = ~illegal_c_insn_i;
+ data_req_o = 1'b0;
+ illegal_insn = 1'b0;
+ end else begin
+ cheri_cstore_en = 1'b0; // csc
+ cheri_data_req_o = 1'b0;
+ illegal_insn = 1'b1;
+ end
+ end
+
+ // store size
+ unique case (instr[13:12])
+ 2'b00: data_type_o = 2'b10; // sb
+ 2'b01: data_type_o = 2'b01; // sh
+ 2'b10: data_type_o = 2'b00; // sw
+ default: data_type_o = 2'b00;
+ endcase
+
+ end
+
+ OPCODE_LOAD: begin
+ rf_ren_a_o = 1'b1;
+ data_req_o = 1'b1;
+ data_type_o = 2'b00;
+
+ // sign/zero extension
+ data_sign_extension_o = ~instr[14];
+
+ // load size
+ unique case (instr[13:12])
+ 2'b00: data_type_o = 2'b10; // lb(u)
+ 2'b01: data_type_o = 2'b01; // lh(u)
+ 2'b10: begin
+ data_type_o = 2'b00; // lw
+ if (instr[14]) begin
+ illegal_insn = 1'b1; // lwu does not exist
+ end
+ end
+ 2'b11: begin
+ // illegal_c_insn_i is added to fix the c.clcsp case
+ // (compressed decoder translate to cheri instruction but could still assert illegal_c_insn
+ // if rd == 0
+ if (CHERIoTEn & cheri_pmode_i && ~instr[14] && ~illegal_c_insn_i) begin
+ cheri_cload_en = 1'b1;
+ cheri_data_req_o = ~cheri_tsafe_en_i | CheriPPLBC;
+ data_req_o = 1'b0; // req generated by cheri_ex
+ illegal_insn = 1'b0;
+ end else begin // CHERIoT consider instr[14]=1 illegal
+ cheri_cload_en = 1'b0;
+ cheri_data_req_o = 1'b0;
+ illegal_insn = 1'b1;
+ end
+ end
+ default: begin
+ illegal_insn = 1'b1;
+ end
+ endcase
+ end
+
+ /////////
+ // ALU //
+ /////////
+
+ OPCODE_LUI: begin // Load Upper Immediate
+ rf_we = 1'b1;
+ end
+
+ OPCODE_AUIPC: begin
+ if (CHERIoTEn & cheri_pmode_i & ~illegal_c_insn_i) begin
+ cheri_auipcc_en = 1'b1;
+ illegal_insn = 1'b0;
+ rf_we = 1'b1;
+ end else begin
+ // OPCODE_AUIPC: begin // Add Upper Immediate to PC
+ rf_we = 1'b1;
+ end
+ end
+
+ OPCODE_OP_IMM: begin // Register-Immediate ALU Operations
+ rf_ren_a_o = 1'b1;
+ rf_we = 1'b1;
+
+ unique case (instr[14:12])
+ 3'b000,
+ 3'b010,
+ 3'b011,
+ 3'b100,
+ 3'b110,
+ 3'b111: illegal_insn = 1'b0;
+
+ 3'b001: begin
+ unique case (instr[31:27])
+ 5'b0_0000: illegal_insn = (instr[26:25] == 2'b00) ? 1'b0 : 1'b1; // slli
+ 5'b0_0100: begin // sloi
+ illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1;
+ end
+ 5'b0_1001, // bclri
+ 5'b0_0101, // bseti
+ // 5'b0_1101: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // binvi
+ 5'b0_1101: illegal_insn = (RV32B != RV32BNone) ? (instr[26:25] != 2'b00) : 1'b1; // binvi
+ 5'b0_0001: begin
+ if (instr[26] == 1'b0) begin // shfl
+ illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1;
+ end else begin
+ illegal_insn = 1'b1;
+ end
+ end
+ 5'b0_1100: begin
+ unique case(instr[26:20])
+ 7'b000_0000, // clz
+ 7'b000_0001, // ctz
+ 7'b000_0010, // cpop
+ 7'b000_0100, // sext.b
+ 7'b000_0101: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // sext.h
+ 7'b001_0000, // crc32.b
+ 7'b001_0001, // crc32.h
+ 7'b001_0010, // crc32.w
+ 7'b001_1000, // crc32c.b
+ 7'b001_1001, // crc32c.h
+ 7'b001_1010: begin // crc32c.w
+ illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1;
+ end
+ default: illegal_insn = 1'b1;
+ endcase
+ end
+ default : illegal_insn = 1'b1;
+ endcase
+ end
+
+ 3'b101: begin
+ if (instr[26]) begin
+ illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // fsri
+ end else begin
+ unique case (instr[31:27])
+ 5'b0_0000, // srli
+ 5'b0_1000: illegal_insn = (instr[26:25] == 2'b00) ? 1'b0 : 1'b1; // srai
+
+ 5'b0_0100: begin // sroi
+ illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1;
+ end
+ 5'b0_1100, // rori
+ // 5'b0_1001: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // bexti
+ 5'b0_1001: illegal_insn = (RV32B != RV32BNone) ? (instr[26:25] != 2'b00) : 1'b1; // bexti
+
+ 5'b0_1101: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin
+ illegal_insn = 1'b0; // grevi
+ end else if (RV32B == RV32BBalanced) begin
+ illegal_insn = (instr[24:20] == 5'b11000) ? 1'b0 : 1'b1; // rev8
+ end else begin
+ illegal_insn = 1'b1;
+ end
+ end
+ 5'b0_0101: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin
+ illegal_insn = 1'b0; // gorci
+ end else if (instr[24:20] == 5'b00111) begin
+ illegal_insn = (RV32B == RV32BBalanced) ? 1'b0 : 1'b1; // orc.b
+ end else begin
+ illegal_insn = 1'b1;
+ end
+ end
+ 5'b0_0001: begin
+ if (instr[26] == 1'b0) begin // unshfl
+ illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1;
+ end else begin
+ illegal_insn = 1'b1;
+ end
+ end
+
+ default: illegal_insn = 1'b1;
+ endcase
+ end
+ end
+
+ default: illegal_insn = 1'b1;
+ endcase
+ end
+
+ OPCODE_OP: begin // Register-Register ALU operation
+ rf_ren_a_o = 1'b1;
+ rf_ren_b_o = 1'b1;
+ rf_we = 1'b1;
+ if ({instr[26], instr[13:12]} == {1'b1, 2'b01}) begin
+ illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // cmix / cmov / fsl / fsr
+ end else begin
+ unique case ({instr[31:25], instr[14:12]})
+ // RV32I ALU operations
+ {7'b000_0000, 3'b000},
+ {7'b010_0000, 3'b000},
+ {7'b000_0000, 3'b010},
+ {7'b000_0000, 3'b011},
+ {7'b000_0000, 3'b100},
+ {7'b000_0000, 3'b110},
+ {7'b000_0000, 3'b111},
+ {7'b000_0000, 3'b001},
+ {7'b000_0000, 3'b101},
+ {7'b010_0000, 3'b101}: illegal_insn = 1'b0;
+
+ // RV32B zba
+ {7'b001_0000, 3'b010}, // sh1add
+ {7'b001_0000, 3'b100}, // sh2add
+ {7'b001_0000, 3'b110}, // sh3add
+ // RV32B zbb
+ {7'b010_0000, 3'b111}, // andn
+ {7'b010_0000, 3'b110}, // orn
+ {7'b010_0000, 3'b100}, // xnor
+ {7'b011_0000, 3'b001}, // rol
+ {7'b011_0000, 3'b101}, // ror
+ {7'b000_0101, 3'b100}, // min
+ {7'b000_0101, 3'b110}, // max
+ {7'b000_0101, 3'b101}, // minu
+ {7'b000_0101, 3'b111}, // maxu
+ {7'b000_0100, 3'b100}, // pack
+ {7'b010_0100, 3'b100}, // packu
+ {7'b000_0100, 3'b111}, // packh
+ // RV32B zbs
+ {7'b010_0100, 3'b001}, // bclr
+ {7'b001_0100, 3'b001}, // bset
+ {7'b011_0100, 3'b001}, // binv
+ {7'b010_0100, 3'b101}, // bext
+ // RV32B zbf
+ {7'b010_0100, 3'b111}: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // bfp
+ // RV32B zbp
+ {7'b011_0100, 3'b101}, // grev
+ {7'b001_0100, 3'b101}, // gorc
+ {7'b000_0100, 3'b001}, // shfl
+ {7'b000_0100, 3'b101}, // unshfl
+ {7'b001_0100, 3'b010}, // xperm.n
+ {7'b001_0100, 3'b100}, // xperm.b
+ {7'b001_0100, 3'b110}, // xperm.h
+ {7'b001_0000, 3'b001}, // slo
+ {7'b001_0000, 3'b101}, // sro
+ // RV32B zbc
+ {7'b000_0101, 3'b001}, // clmul
+ {7'b000_0101, 3'b010}, // clmulr
+ {7'b000_0101, 3'b011}: begin // clmulh
+ illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1;
+ end
+ // RV32B zbe
+ {7'b010_0100, 3'b110}, // bdecompress
+ {7'b000_0100, 3'b110}: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // bcompress
+
+ // RV32M instructions
+ {7'b000_0001, 3'b000}: begin // mul
+ multdiv_operator_o = MD_OP_MULL;
+ multdiv_signed_mode_o = 2'b00;
+ illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+ end
+ {7'b000_0001, 3'b001}: begin // mulh
+ multdiv_operator_o = MD_OP_MULH;
+ multdiv_signed_mode_o = 2'b11;
+ illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+ end
+ {7'b000_0001, 3'b010}: begin // mulhsu
+ multdiv_operator_o = MD_OP_MULH;
+ multdiv_signed_mode_o = 2'b01;
+ illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+ end
+ {7'b000_0001, 3'b011}: begin // mulhu
+ multdiv_operator_o = MD_OP_MULH;
+ multdiv_signed_mode_o = 2'b00;
+ illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+ end
+ {7'b000_0001, 3'b100}: begin // div
+ multdiv_operator_o = MD_OP_DIV;
+ multdiv_signed_mode_o = 2'b11;
+ illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+ end
+ {7'b000_0001, 3'b101}: begin // divu
+ multdiv_operator_o = MD_OP_DIV;
+ multdiv_signed_mode_o = 2'b00;
+ illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+ end
+ {7'b000_0001, 3'b110}: begin // rem
+ multdiv_operator_o = MD_OP_REM;
+ multdiv_signed_mode_o = 2'b11;
+ illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+ end
+ {7'b000_0001, 3'b111}: begin // remu
+ multdiv_operator_o = MD_OP_REM;
+ multdiv_signed_mode_o = 2'b00;
+ illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0;
+ end
+ default: begin
+ illegal_insn = 1'b1;
+ end
+ endcase
+ end
+ end
+
+ /////////////
+ // Special //
+ /////////////
+
+ OPCODE_MISC_MEM: begin
+ unique case (instr[14:12])
+ 3'b000: begin
+ // FENCE is treated as a NOP since all memory operations are already strictly ordered.
+ rf_we = 1'b0;
+ end
+ 3'b001: begin
+ // FENCE.I is implemented as a jump to the next PC, this gives the required flushing
+ // behaviour (iside prefetch buffer flushed and response to any outstanding iside
+ // requests will be ignored).
+ // If present, the ICache will also be flushed.
+ jump_in_dec_o = 1'b1;
+
+ rf_we = 1'b0;
+
+ if (instr_first_cycle_i) begin
+ jump_set_o = 1'b1;
+ icache_inval_o = 1'b1;
+ end
+ end
+ default: begin
+ illegal_insn = 1'b1;
+ end
+ endcase
+ end
+
+ OPCODE_SYSTEM: begin
+ if (instr[14:12] == 3'b000) begin
+ // non CSR related SYSTEM instructions
+ unique case (instr[31:20])
+ 12'h000: // ECALL
+ // environment (system) call
+ ecall_insn_o = 1'b1;
+
+ 12'h001: // ebreak
+ // debugger trap
+ ebrk_insn_o = 1'b1;
+
+ 12'h302: // mret
+ mret_insn_o = 1'b1;
+
+ 12'h7b2: // dret
+ dret_insn_o = 1'b1;
+
+ 12'h105: // wfi
+ wfi_insn_o = 1'b1;
+
+ default:
+ illegal_insn = 1'b1;
+ endcase
+
+ // rs1 and rd must be 0
+ if (instr_rs1 != 5'b0 || instr_rd != 5'b0) begin
+ illegal_insn = 1'b1;
+ end
+ end else begin
+ // instruction to read/modify CSR
+ csr_access_o = 1'b1;
+ rf_wdata_sel_o = RF_WD_CSR;
+ rf_we = 1'b1;
+
+ if (~instr[14]) begin
+ rf_ren_a_o = 1'b1;
+ end
+
+ unique case (instr[13:12])
+ 2'b01: csr_op = CSR_OP_WRITE;
+ 2'b10: csr_op = CSR_OP_SET;
+ 2'b11: csr_op = CSR_OP_CLEAR;
+ default: csr_illegal = 1'b1;
+ endcase
+
+ // always allow access to the following CSRs even without ASR permission
+ // -- 0xC01-0xC9F (unpriviledged counters)
+ // -- 0xB01-0xB9F (m-mode counters).
+ // note 0xb01 is undefined per rvi spec. CSR register logic will handle it.
+ csr_cheri_always_ok_o = CHERIoTEn & cheri_pmode_i &
+ (((instr[31:28] == 4'hb) || (instr[31:28] == 4'hc)) &&
+ ((instr[27] == 1'b0) || (instr[26:25] == 2'b00)));
+
+ illegal_insn = csr_illegal;
+ end
+ end
+
+ OPCODE_CHERI: begin
+ if (CHERIoTEn & cheri_pmode_i & ~illegal_c_insn_i) begin
+ cheri_opcode_en = 1'b1;
+ rf_ren_a_o = cheri_rf_ren_a;
+ rf_ren_b_o = cheri_rf_ren_b;
+ rf_we = cheri_rf_we_dec;
+ illegal_insn = ~instr_is_legal_cheri;
+ end else begin
+ cheri_opcode_en = 1'b0;
+ rf_ren_a_o = 1'b0;
+ rf_ren_b_o = 1'b0;
+ rf_we = 1'b0;
+ illegal_insn = 1'b1;
+ end
+ end
+
+ OPCODE_AUICGP: begin
+ if (CHERIoTEn & cheri_pmode_i & ~illegal_c_insn_i) begin
+ cheri_auicgp_en = 1'b1;
+ rf_ren_a_o = 1'b1;
+ rf_ren_b_o = 1'b0;
+ rf_we = 1'b1;
+ illegal_insn = 1'b0;
+ end else begin
+ cheri_opcode_en = 1'b0;
+ rf_ren_a_o = 1'b0;
+ rf_ren_b_o = 1'b0;
+ illegal_insn = 1'b1;
+ end
+ end
+
+ default: begin
+ illegal_insn = 1'b1;
+ end
+ endcase
+
+ // make sure illegal compressed instructions cause illegal instruction exceptions
+ if (illegal_c_insn_i) begin
+ illegal_insn = 1'b1;
+ end
+
+ // make sure illegal instructions detected in the decoder do not propagate from decoder
+ // into register file, LSU, EX, WB, CSRs, PC
+ // NOTE: instructions can also be detected to be illegal inside the CSRs (upon accesses with
+ // insufficient privileges), or when accessing non-available registers in RV32E,
+ // these cases are not handled here
+ if (illegal_insn) begin
+ rf_we = 1'b0;
+ data_req_o = 1'b0;
+ data_we_o = 1'b0;
+ jump_in_dec_o = 1'b0;
+ jump_set_o = 1'b0;
+ branch_in_dec_o = 1'b0;
+ csr_access_o = 1'b0;
+ end
+ end
+
+ /////////////////////////////
+ // Decoder for ALU control //
+ /////////////////////////////
+
+ always_comb begin
+ alu_operator_o = ALU_SLTU;
+ alu_op_a_mux_sel_o = OP_A_IMM;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+
+ imm_a_mux_sel_o = IMM_A_ZERO;
+ imm_b_mux_sel_o = IMM_B_I;
+
+ bt_a_mux_sel_o = OP_A_CURRPC;
+ bt_b_mux_sel_o = IMM_B_I;
+
+
+ opcode_alu = opcode_e'(instr_alu[6:0]);
+
+ use_rs3_d = 1'b0;
+ alu_multicycle_o = 1'b0;
+ mult_sel_o = 1'b0;
+ div_sel_o = 1'b0;
+
+ unique case (opcode_alu)
+
+ ///////////
+ // Jumps //
+ ///////////
+
+ OPCODE_JAL: begin // Jump and Link
+ if (BranchTargetALU) begin
+ bt_a_mux_sel_o = OP_A_CURRPC;
+ bt_b_mux_sel_o = IMM_B_J;
+ end
+
+ // Jumps take two cycles without the BTALU
+ if (instr_first_cycle_i && !BranchTargetALU) begin
+ // Calculate jump target
+ alu_op_a_mux_sel_o = OP_A_CURRPC;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_J;
+ alu_operator_o = ALU_ADD;
+ end else begin
+ // Calculate and store PC+4
+ alu_op_a_mux_sel_o = OP_A_CURRPC;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_INCR_PC;
+ alu_operator_o = ALU_ADD;
+ end
+ end
+
+ OPCODE_JALR: begin // Jump and Link Register
+ if (BranchTargetALU) begin
+ bt_a_mux_sel_o = OP_A_REG_A;
+ bt_b_mux_sel_o = IMM_B_I;
+ end
+
+ // Jumps take two cycles without the BTALU
+ if (instr_first_cycle_i && !BranchTargetALU) begin
+ // Calculate jump target
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_I;
+ alu_operator_o = ALU_ADD;
+ end else begin
+ // Calculate and store PC+4
+ alu_op_a_mux_sel_o = OP_A_CURRPC;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_INCR_PC;
+ alu_operator_o = ALU_ADD;
+ end
+ end
+
+ OPCODE_BRANCH: begin // Branch
+ // Check branch condition selection
+ unique case (instr_alu[14:12])
+ 3'b000: alu_operator_o = ALU_EQ;
+ 3'b001: alu_operator_o = ALU_NE;
+ 3'b100: alu_operator_o = ALU_LT;
+ 3'b101: alu_operator_o = ALU_GE;
+ 3'b110: alu_operator_o = ALU_LTU;
+ 3'b111: alu_operator_o = ALU_GEU;
+ default: ;
+ endcase
+
+ if (BranchTargetALU) begin
+ bt_a_mux_sel_o = OP_A_CURRPC;
+ // Not-taken branch will jump to next instruction (used in secure mode)
+ bt_b_mux_sel_o = branch_taken_i ? IMM_B_B : IMM_B_INCR_PC;
+ end
+
+ // Without branch target ALU, a branch is a two-stage operation using the Main ALU in both
+ // stages
+ if (instr_first_cycle_i) begin
+ // First evaluate the branch condition
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_op_b_mux_sel_o = OP_B_REG_B;
+ end else if (!BranchTargetALU) begin
+ // Then calculate jump target
+ alu_op_a_mux_sel_o = OP_A_CURRPC;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ // Not-taken branch will jump to next instruction (used in secure mode)
+ imm_b_mux_sel_o = branch_taken_i ? IMM_B_B : IMM_B_INCR_PC;
+ alu_operator_o = ALU_ADD;
+ end
+ end
+
+ ////////////////
+ // Load/store //
+ ////////////////
+
+ OPCODE_STORE: begin
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_op_b_mux_sel_o = OP_B_REG_B;
+ alu_operator_o = ALU_ADD;
+
+ if (!instr_alu[14]) begin
+ // offset from immediate
+ imm_b_mux_sel_o = IMM_B_S;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ end
+ end
+
+ OPCODE_LOAD: begin
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+
+ // offset from immediate
+ alu_operator_o = ALU_ADD;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_I;
+ end
+
+ /////////
+ // ALU //
+ /////////
+
+ OPCODE_LUI: begin // Load Upper Immediate
+ alu_op_a_mux_sel_o = OP_A_IMM;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_a_mux_sel_o = IMM_A_ZERO;
+ imm_b_mux_sel_o = IMM_B_U;
+ alu_operator_o = ALU_ADD;
+ end
+
+ // use CHERI version of AUIPCC when pmode == 1
+ OPCODE_AUIPC: begin // Add Upper Immediate to PC
+ alu_op_a_mux_sel_o = OP_A_CURRPC;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_U;
+ alu_operator_o = ALU_ADD;
+ end
+
+ OPCODE_OP_IMM: begin // Register-Immediate ALU Operations
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_I;
+
+ unique case (instr_alu[14:12])
+ 3'b000: alu_operator_o = ALU_ADD; // Add Immediate
+ 3'b010: alu_operator_o = ALU_SLT; // Set to one if Lower Than Immediate
+ 3'b011: alu_operator_o = ALU_SLTU; // Set to one if Lower Than Immediate Unsigned
+ 3'b100: alu_operator_o = ALU_XOR; // Exclusive Or with Immediate
+ 3'b110: alu_operator_o = ALU_OR; // Or with Immediate
+ 3'b111: alu_operator_o = ALU_AND; // And with Immediate
+
+ 3'b001: begin
+ if (RV32B != RV32BNone) begin
+ unique case (instr_alu[31:27])
+ 5'b0_0000: alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
+ // Shift Left Ones by Immediate
+ 5'b0_0100: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_SLO;
+ end
+ 5'b0_1001: alu_operator_o = ALU_BCLR; // Clear bit specified by immediate
+ 5'b0_0101: alu_operator_o = ALU_BSET; // Set bit specified by immediate
+ 5'b0_1101: alu_operator_o = ALU_BINV; // Invert bit specified by immediate.
+ // Shuffle with Immediate Control Value
+ 5'b0_0001: if (instr_alu[26] == 0) alu_operator_o = ALU_SHFL;
+ 5'b0_1100: begin
+ unique case (instr_alu[26:20])
+ 7'b000_0000: alu_operator_o = ALU_CLZ; // clz
+ 7'b000_0001: alu_operator_o = ALU_CTZ; // ctz
+ 7'b000_0010: alu_operator_o = ALU_CPOP; // cpop
+ 7'b000_0100: alu_operator_o = ALU_SEXTB; // sext.b
+ 7'b000_0101: alu_operator_o = ALU_SEXTH; // sext.h
+ 7'b001_0000: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin
+ alu_operator_o = ALU_CRC32_B; // crc32.b
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ 7'b001_0001: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin
+ alu_operator_o = ALU_CRC32_H; // crc32.h
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ 7'b001_0010: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin
+ alu_operator_o = ALU_CRC32_W; // crc32.w
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ 7'b001_1000: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin
+ alu_operator_o = ALU_CRC32C_B; // crc32c.b
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ 7'b001_1001: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin
+ alu_operator_o = ALU_CRC32C_H; // crc32c.h
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ 7'b001_1010: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin
+ alu_operator_o = ALU_CRC32C_W; // crc32c.w
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ default: ;
+ endcase
+ end
+
+ default: ;
+ endcase
+ end else begin
+ alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
+ end
+ end
+
+ 3'b101: begin
+ if (RV32B != RV32BNone) begin
+ if (instr_alu[26] == 1'b1) begin
+ alu_operator_o = ALU_FSR;
+ alu_multicycle_o = 1'b1;
+ if (instr_first_cycle_i) begin
+ use_rs3_d = 1'b1;
+ end else begin
+ use_rs3_d = 1'b0;
+ end
+ end else begin
+ unique case (instr_alu[31:27])
+ 5'b0_0000: alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate
+ 5'b0_1000: alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate
+ // Shift Right Ones by Immediate
+ 5'b0_0100: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_SRO;
+ end
+ 5'b0_1001: alu_operator_o = ALU_BEXT; // Extract bit specified by immediate.
+ 5'b0_1100: begin
+ alu_operator_o = ALU_ROR; // Rotate Right by Immediate
+ alu_multicycle_o = 1'b1;
+ end
+ 5'b0_1101: alu_operator_o = ALU_GREV; // General Reverse with Imm Control Val
+ 5'b0_0101: alu_operator_o = ALU_GORC; // General Or-combine with Imm Control Val
+ // Unshuffle with Immediate Control Value
+ 5'b0_0001: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin
+ if (instr_alu[26] == 1'b0) alu_operator_o = ALU_UNSHFL;
+ end
+ end
+ default: ;
+ endcase
+ end
+
+ end else begin
+ if (instr_alu[31:27] == 5'b0_0000) begin
+ alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate
+ end else if (instr_alu[31:27] == 5'b0_1000) begin
+ alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate
+ end
+ end
+ end
+
+ default: ;
+ endcase
+ end
+
+ OPCODE_OP: begin // Register-Register ALU operation
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_op_b_mux_sel_o = OP_B_REG_B;
+
+ if (instr_alu[26]) begin
+ if (RV32B != RV32BNone) begin
+ unique case ({instr_alu[26:25], instr_alu[14:12]})
+ {2'b11, 3'b001}: begin
+ alu_operator_o = ALU_CMIX; // cmix
+ alu_multicycle_o = 1'b1;
+ if (instr_first_cycle_i) begin
+ use_rs3_d = 1'b1;
+ end else begin
+ use_rs3_d = 1'b0;
+ end
+ end
+ {2'b11, 3'b101}: begin
+ alu_operator_o = ALU_CMOV; // cmov
+ alu_multicycle_o = 1'b1;
+ if (instr_first_cycle_i) begin
+ use_rs3_d = 1'b1;
+ end else begin
+ use_rs3_d = 1'b0;
+ end
+ end
+ {2'b10, 3'b001}: begin
+ alu_operator_o = ALU_FSL; // fsl
+ alu_multicycle_o = 1'b1;
+ if (instr_first_cycle_i) begin
+ use_rs3_d = 1'b1;
+ end else begin
+ use_rs3_d = 1'b0;
+ end
+ end
+ {2'b10, 3'b101}: begin
+ alu_operator_o = ALU_FSR; // fsr
+ alu_multicycle_o = 1'b1;
+ if (instr_first_cycle_i) begin
+ use_rs3_d = 1'b1;
+ end else begin
+ use_rs3_d = 1'b0;
+ end
+ end
+ default: ;
+ endcase
+ end
+ end else begin
+ unique case ({instr_alu[31:25], instr_alu[14:12]})
+ // RV32I ALU operations
+ {7'b000_0000, 3'b000}: alu_operator_o = ALU_ADD; // Add
+ {7'b010_0000, 3'b000}: alu_operator_o = ALU_SUB; // Sub
+ {7'b000_0000, 3'b010}: alu_operator_o = ALU_SLT; // Set Lower Than
+ {7'b000_0000, 3'b011}: alu_operator_o = ALU_SLTU; // Set Lower Than Unsigned
+ {7'b000_0000, 3'b100}: alu_operator_o = ALU_XOR; // Xor
+ {7'b000_0000, 3'b110}: alu_operator_o = ALU_OR; // Or
+ {7'b000_0000, 3'b111}: alu_operator_o = ALU_AND; // And
+ {7'b000_0000, 3'b001}: alu_operator_o = ALU_SLL; // Shift Left Logical
+ {7'b000_0000, 3'b101}: alu_operator_o = ALU_SRL; // Shift Right Logical
+ {7'b010_0000, 3'b101}: alu_operator_o = ALU_SRA; // Shift Right Arithmetic
+
+ // RV32B ALU Operations
+ {7'b011_0000, 3'b001}: begin
+ if (RV32B != RV32BNone) begin
+ alu_operator_o = ALU_ROL;
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ {7'b011_0000, 3'b101}: begin
+ if (RV32B != RV32BNone) begin
+ alu_operator_o = ALU_ROR;
+ alu_multicycle_o = 1'b1;
+ end
+ end
+
+ {7'b000_0101, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_MIN;
+ {7'b000_0101, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_MAX;
+ {7'b000_0101, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_MINU;
+ {7'b000_0101, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_MAXU;
+
+ {7'b000_0100, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACK;
+ {7'b010_0100, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACKU;
+ {7'b000_0100, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACKH;
+
+ {7'b010_0000, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_XNOR;
+ {7'b010_0000, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_ORN;
+ {7'b010_0000, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_ANDN;
+
+ // RV32B zba
+ {7'b001_0000, 3'b010}: if (RV32B != RV32BNone) alu_operator_o = ALU_SH1ADD;
+ {7'b001_0000, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_SH2ADD;
+ {7'b001_0000, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_SH3ADD;
+
+ // RV32B zbs
+ {7'b010_0100, 3'b001}: if (RV32B != RV32BNone) alu_operator_o = ALU_BCLR;
+ {7'b001_0100, 3'b001}: if (RV32B != RV32BNone) alu_operator_o = ALU_BSET;
+ {7'b011_0100, 3'b001}: if (RV32B != RV32BNone) alu_operator_o = ALU_BINV;
+ {7'b010_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_BEXT;
+
+ // RV32B zbf
+ {7'b010_0100, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_BFP;
+
+ // RV32B zbp
+ {7'b011_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GREV;
+ {7'b001_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GORC;
+ {7'b000_0100, 3'b001}: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_SHFL;
+ end
+ {7'b000_0100, 3'b101}: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_UNSHFL;
+ end
+ {7'b001_0100, 3'b010}: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_XPERM_N;
+ end
+ {7'b001_0100, 3'b100}: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_XPERM_B;
+ end
+ {7'b001_0100, 3'b110}: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_XPERM_H;
+ end
+ {7'b001_0000, 3'b001}: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_SLO;
+ end
+ {7'b001_0000, 3'b101}: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_SRO;
+ end
+
+ // RV32B zbc
+ {7'b000_0101, 3'b001}: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_CLMUL;
+ end
+ {7'b000_0101, 3'b010}: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_CLMULR;
+ end
+ {7'b000_0101, 3'b011}: begin
+ if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_CLMULH;
+ end
+
+ // RV32B zbe
+ {7'b010_0100, 3'b110}: begin
+ if (RV32B == RV32BFull) begin
+ alu_operator_o = ALU_BDECOMPRESS;
+ alu_multicycle_o = 1'b1;
+ end
+ end
+ {7'b000_0100, 3'b110}: begin
+ if (RV32B == RV32BFull) begin
+ alu_operator_o = ALU_BCOMPRESS;
+ alu_multicycle_o = 1'b1;
+ end
+ end
+
+ // RV32M instructions, all use the same ALU operation
+ {7'b000_0001, 3'b000}: begin // mul
+ alu_operator_o = ALU_ADD;
+ mult_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+ end
+ {7'b000_0001, 3'b001}: begin // mulh
+ alu_operator_o = ALU_ADD;
+ mult_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+ end
+ {7'b000_0001, 3'b010}: begin // mulhsu
+ alu_operator_o = ALU_ADD;
+ mult_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+ end
+ {7'b000_0001, 3'b011}: begin // mulhu
+ alu_operator_o = ALU_ADD;
+ mult_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+ end
+ {7'b000_0001, 3'b100}: begin // div
+ alu_operator_o = ALU_ADD;
+ div_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+ end
+ {7'b000_0001, 3'b101}: begin // divu
+ alu_operator_o = ALU_ADD;
+ div_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+ end
+ {7'b000_0001, 3'b110}: begin // rem
+ alu_operator_o = ALU_ADD;
+ div_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+ end
+ {7'b000_0001, 3'b111}: begin // remu
+ alu_operator_o = ALU_ADD;
+ div_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1;
+ end
+
+ default: ;
+ endcase
+ end
+ end
+
+ /////////////
+ // Special //
+ /////////////
+
+ OPCODE_MISC_MEM: begin
+ unique case (instr_alu[14:12])
+ 3'b000: begin
+ // FENCE is treated as a NOP since all memory operations are already strictly ordered.
+ alu_operator_o = ALU_ADD; // nop
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ end
+ 3'b001: begin
+ // FENCE.I will flush the IF stage, prefetch buffer and ICache if present.
+ if (BranchTargetALU) begin
+ bt_a_mux_sel_o = OP_A_CURRPC;
+ bt_b_mux_sel_o = IMM_B_INCR_PC;
+ end else begin
+ alu_op_a_mux_sel_o = OP_A_CURRPC;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_b_mux_sel_o = IMM_B_INCR_PC;
+ alu_operator_o = ALU_ADD;
+ end
+ end
+ default: ;
+ endcase
+ end
+
+ OPCODE_SYSTEM: begin
+ if (instr_alu[14:12] == 3'b000) begin
+ // non CSR related SYSTEM instructions
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ end else begin
+ // instruction to read/modify CSR
+ alu_op_b_mux_sel_o = OP_B_IMM;
+ imm_a_mux_sel_o = IMM_A_Z;
+ imm_b_mux_sel_o = IMM_B_I; // CSR address is encoded in I imm
+
+ if (instr_alu[14]) begin
+ // rs1 field is used as immediate
+ alu_op_a_mux_sel_o = OP_A_IMM;
+ end else begin
+ alu_op_a_mux_sel_o = OP_A_REG_A;
+ end
+ end
+
+ end
+ default: ;
+ endcase
+ end
+
+ // do not enable multdiv in case of illegal instruction exceptions
+ assign mult_en_o = illegal_insn_o ? 1'b0 : mult_sel_o;
+ assign div_en_o = illegal_insn_o ? 1'b0 : div_sel_o;
+
+ // make sure instructions accessing non-available registers in RV32E cause illegal
+ // instruction exceptions
+ assign illegal_insn_o = illegal_insn | illegal_reg_rv32e | illegal_reg_cheri;
+
+ // do not propgate regfile write enable if non-available registers are accessed in RV32E
+ assign rf_we_o = rf_we & ~illegal_reg_rv32e & ~illegal_reg_cheri;
+
+ // Not all bits are used
+ assign unused_instr_alu = {instr_alu[19:15],instr_alu[11:7]};
+
+ assign instr_is_legal_cheri_o = instr_is_legal_cheri & ~illegal_reg_cheri;
+
+ // cheri decoder
+ if (CHERIoTEn) begin : gen_cheri_decoder
+ cheri_decoder # (
+ .CheriPPLBC (CheriPPLBC),
+ .CheriSBND2 (CheriSBND2)
+ ) u_cheri_decoder (
+ .cheri_opcode_en_i (cheri_opcode_en),
+ .cheri_tsafe_en_i (cheri_tsafe_en_i),
+ .cheri_auipcc_en_i (cheri_auipcc_en),
+ .cheri_auicgp_en_i (cheri_auicgp_en),
+ .cheri_jalr_en_i (cheri_jalr_en),
+ .cheri_jal_en_i (cheri_jal_en),
+ .cheri_cload_en_i (cheri_cload_en),
+ .cheri_cstore_en_i (cheri_cstore_en),
+ .instr_rdata_i (instr_rdata_i),
+ .instr_is_cheri_o (instr_is_cheri_o),
+ .instr_is_legal_cheri_o (instr_is_legal_cheri),
+ .cheri_imm12_o (cheri_imm12_o),
+ .cheri_imm20_o (cheri_imm20_o),
+ .cheri_imm21_o (cheri_imm21_o),
+ .cheri_operator_o (cheri_operator_o),
+ .cheri_cs2_dec_o (cheri_cs2_dec_o),
+ .cheri_rf_ren_a_o (cheri_rf_ren_a),
+ .cheri_rf_ren_b_o (cheri_rf_ren_b),
+ .cheri_rf_we_dec_o (cheri_rf_we_dec),
+ .cheri_multicycle_dec_o (cheri_multicycle_dec_o)
+ );
+ end else begin
+ assign instr_is_cheri_o = 1'b0;
+ assign instr_is_legal_cheri = 1'b0;
+ assign cheri_imm12_o = 12'h0;
+ assign cheri_imm20_o = 20'h0;
+ assign cheri_imm21_o = 21'h0;
+ assign cheri_operator_o = 'h0;
+ assign cheri_cs2_dec_o = 1'b0;
+ assign cheri_rf_ren_a = 1'b0;
+ assign cheri_rf_ren_b = 1'b0;
+ assign cheri_rf_we_dec = 1'b0;
+ assign cheri_multicycle_dec_o = 1'b0;
+
+ end
+
+ ////////////////a
+ // Assertions //
+ ////////////////
+
+ // Selectors must be known/valid.
+ `ASSERT(IbexRegImmAluOpKnown, (opcode == OPCODE_OP_IMM) |->
+ !$isunknown(instr[14:12]))
+endmodule // controller
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_dummy_instr.sv b/hw/ip/cheriot-ibex/rtl/cheriot_dummy_instr.sv
new file mode 100644
index 0000000..897172d
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_dummy_instr.sv
@@ -0,0 +1,149 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Dummy instruction module
+ *
+ * Provides pseudo-randomly inserted fake instructions for secure code obfuscation
+ */
+
+module cheriot_dummy_instr import cheriot_pkg::*; #(
+ parameter lfsr_seed_t RndCnstLfsrSeed = RndCnstLfsrSeedDefault,
+ parameter lfsr_perm_t RndCnstLfsrPerm = RndCnstLfsrPermDefault
+) (
+ // Clock and reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ // Interface to CSRs
+ input logic dummy_instr_en_i,
+ input logic [2:0] dummy_instr_mask_i,
+ input logic dummy_instr_seed_en_i,
+ input logic [31:0] dummy_instr_seed_i,
+
+ // Interface to IF stage
+ input logic fetch_valid_i,
+ input logic id_in_ready_i,
+ output logic insert_dummy_instr_o,
+ output logic [31:0] dummy_instr_data_o
+);
+
+ localparam int unsigned TIMEOUT_CNT_W = 5;
+ localparam int unsigned OP_W = 5;
+
+ typedef enum logic [1:0] {
+ DUMMY_ADD = 2'b00,
+ DUMMY_MUL = 2'b01,
+ DUMMY_DIV = 2'b10,
+ DUMMY_AND = 2'b11
+ } dummy_instr_e;
+
+ typedef struct packed {
+ dummy_instr_e instr_type;
+ logic [OP_W-1:0] op_b;
+ logic [OP_W-1:0] op_a;
+ logic [TIMEOUT_CNT_W-1:0] cnt;
+ } lfsr_data_t;
+ localparam int unsigned LFSR_OUT_W = $bits(lfsr_data_t);
+
+ lfsr_data_t lfsr_data;
+ logic [TIMEOUT_CNT_W-1:0] dummy_cnt_incr, dummy_cnt_threshold;
+ logic [TIMEOUT_CNT_W-1:0] dummy_cnt_d, dummy_cnt_q;
+ logic dummy_cnt_en;
+ logic lfsr_en;
+ logic [LFSR_OUT_W-1:0] lfsr_state;
+ logic insert_dummy_instr;
+ logic [6:0] dummy_set;
+ logic [2:0] dummy_opcode;
+ logic [31:0] dummy_instr;
+ logic [31:0] dummy_instr_seed_q, dummy_instr_seed_d;
+
+ // Shift the LFSR every time we insert an instruction
+ assign lfsr_en = insert_dummy_instr & id_in_ready_i;
+
+ assign dummy_instr_seed_d = dummy_instr_seed_q ^ dummy_instr_seed_i;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ dummy_instr_seed_q <= '0;
+ end else if (dummy_instr_seed_en_i) begin
+ dummy_instr_seed_q <= dummy_instr_seed_d;
+ end
+ end
+
+ prim_lfsr #(
+ .LfsrDw ( LfsrWidth ),
+ .StateOutDw ( LFSR_OUT_W ),
+ .DefaultSeed ( RndCnstLfsrSeed ),
+ .StatePermEn ( 1'b1 ),
+ .StatePerm ( RndCnstLfsrPerm )
+ ) lfsr_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .seed_en_i ( dummy_instr_seed_en_i ),
+ .seed_i ( dummy_instr_seed_d ),
+ .lfsr_en_i ( lfsr_en ),
+ .entropy_i ( '0 ),
+ .state_o ( lfsr_state )
+ );
+
+ // Extract fields from LFSR
+ assign lfsr_data = lfsr_data_t'(lfsr_state);
+
+ // Set count threshold for inserting a new instruction. This is the pseudo-random value from the
+ // LFSR with a mask applied (based on CSR config data) to shorten the period if required.
+ assign dummy_cnt_threshold = lfsr_data.cnt & {dummy_instr_mask_i,{TIMEOUT_CNT_W-3{1'b1}}};
+ assign dummy_cnt_incr = dummy_cnt_q + {{TIMEOUT_CNT_W-1{1'b0}},1'b1};
+ // Clear the counter everytime a new instruction is inserted
+ assign dummy_cnt_d = insert_dummy_instr ? '0 : dummy_cnt_incr;
+ // Increment the counter for each executed instruction while dummy instuctions are
+ // enabled.
+ assign dummy_cnt_en = dummy_instr_en_i & id_in_ready_i &
+ (fetch_valid_i | insert_dummy_instr);
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ dummy_cnt_q <= '0;
+ end else if (dummy_cnt_en) begin
+ dummy_cnt_q <= dummy_cnt_d;
+ end
+ end
+
+ // Insert a dummy instruction each time the counter hits the threshold
+ assign insert_dummy_instr = dummy_instr_en_i & (dummy_cnt_q == dummy_cnt_threshold);
+
+ // Encode instruction
+ always_comb begin
+ unique case (lfsr_data.instr_type)
+ DUMMY_ADD: begin
+ dummy_set = 7'b0000000;
+ dummy_opcode = 3'b000;
+ end
+ DUMMY_MUL: begin
+ dummy_set = 7'b0000001;
+ dummy_opcode = 3'b000;
+ end
+ DUMMY_DIV: begin
+ dummy_set = 7'b0000001;
+ dummy_opcode = 3'b100;
+ end
+ DUMMY_AND: begin
+ dummy_set = 7'b0000000;
+ dummy_opcode = 3'b111;
+ end
+ default: begin
+ dummy_set = 7'b0000000;
+ dummy_opcode = 3'b000;
+ end
+ endcase
+ end
+
+ // SET RS2 RS1 OP RD
+ assign dummy_instr = {dummy_set, lfsr_data.op_b, lfsr_data.op_a, dummy_opcode, 5'h00, 7'h33};
+
+ // Assign outputs
+ assign insert_dummy_instr_o = insert_dummy_instr;
+ assign dummy_instr_data_o = dummy_instr;
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_ex_block.sv b/hw/ip/cheriot-ibex/rtl/cheriot_ex_block.sv
new file mode 100644
index 0000000..8eb30a5
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_ex_block.sv
@@ -0,0 +1,199 @@
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Execution stage
+ *
+ * Execution block: Hosts ALU and MUL/DIV unit
+ */
+module cheriot_ex_block #(
+ parameter cheriot_pkg::rv32m_e RV32M = cheriot_pkg::RV32MFast,
+ parameter cheriot_pkg::rv32b_e RV32B = cheriot_pkg::RV32BNone,
+ parameter bit BranchTargetALU = 0
+) (
+ input logic clk_i,
+ input logic rst_ni,
+
+ // ALU
+ input cheriot_pkg::alu_op_e alu_operator_i,
+ input logic [31:0] alu_operand_a_i,
+ input logic [31:0] alu_operand_b_i,
+ input logic alu_instr_first_cycle_i,
+
+ // Branch Target ALU
+ // All of these signals are unusued when BranchTargetALU == 0
+ input logic [31:0] bt_a_operand_i,
+ input logic [31:0] bt_b_operand_i,
+
+ // Multiplier/Divider
+ input cheriot_pkg::md_op_e multdiv_operator_i,
+ input logic mult_en_i, // dynamic enable signal, for FSM control
+ input logic div_en_i, // dynamic enable signal, for FSM control
+ input logic mult_sel_i, // static decoder output, for data muxes
+ input logic div_sel_i, // static decoder output, for data muxes
+ input logic [1:0] multdiv_signed_mode_i,
+ input logic [31:0] multdiv_operand_a_i,
+ input logic [31:0] multdiv_operand_b_i,
+ input logic multdiv_ready_id_i,
+ input logic data_ind_timing_i,
+
+ // intermediate val reg
+ output logic [1:0] imd_val_we_o,
+ output logic [33:0] imd_val_d_o[2],
+ input logic [33:0] imd_val_q_i[2],
+
+ // Outputs
+ output logic [31:0] alu_adder_result_ex_o, // to LSU
+ output logic [31:0] result_ex_o,
+ output logic [31:0] branch_target_o, // to IF
+ output logic branch_decision_o, // to ID
+
+ output logic ex_valid_o // EX has valid output
+);
+
+ import cheriot_pkg::*;
+
+ logic [31:0] alu_result, multdiv_result;
+
+ logic [32:0] multdiv_alu_operand_b, multdiv_alu_operand_a;
+ logic [33:0] alu_adder_result_ext;
+ logic alu_cmp_result, alu_is_equal_result;
+ logic multdiv_valid;
+ logic multdiv_sel;
+ logic [31:0] alu_imd_val_q[2];
+ logic [31:0] alu_imd_val_d[2];
+ logic [ 1:0] alu_imd_val_we;
+ logic [33:0] multdiv_imd_val_d[2];
+ logic [ 1:0] multdiv_imd_val_we;
+
+ /*
+ The multdiv_i output is never selected if RV32M=RV32MNone
+ At synthesis time, all the combinational and sequential logic
+ from the multdiv_i module are eliminated
+ */
+ if (RV32M != RV32MNone) begin : gen_multdiv_m
+ assign multdiv_sel = mult_sel_i | div_sel_i;
+ end else begin : gen_multdiv_no_m
+ assign multdiv_sel = 1'b0;
+ end
+
+ // Intermediate Value Register Mux
+ assign imd_val_d_o[0] = multdiv_sel ? multdiv_imd_val_d[0] : {2'b0, alu_imd_val_d[0]};
+ assign imd_val_d_o[1] = multdiv_sel ? multdiv_imd_val_d[1] : {2'b0, alu_imd_val_d[1]};
+ assign imd_val_we_o = multdiv_sel ? multdiv_imd_val_we : alu_imd_val_we;
+
+ assign alu_imd_val_q = '{imd_val_q_i[0][31:0], imd_val_q_i[1][31:0]};
+
+ assign result_ex_o = multdiv_sel ? multdiv_result : alu_result;
+
+ // branch handling
+ assign branch_decision_o = alu_cmp_result;
+
+ if (BranchTargetALU) begin : g_branch_target_alu
+ logic [32:0] bt_alu_result;
+ logic unused_bt_carry;
+
+ assign bt_alu_result = bt_a_operand_i + bt_b_operand_i;
+
+ assign unused_bt_carry = bt_alu_result[32];
+ assign branch_target_o = bt_alu_result[31:0];
+ end else begin : g_no_branch_target_alu
+ // Unused bt_operand signals cause lint errors, this avoids them
+ logic [31:0] unused_bt_a_operand, unused_bt_b_operand;
+
+ assign unused_bt_a_operand = bt_a_operand_i;
+ assign unused_bt_b_operand = bt_b_operand_i;
+
+ assign branch_target_o = alu_adder_result_ex_o;
+ end
+
+ /////////
+ // ALU //
+ /////////
+
+ cheriot_alu #(
+ .RV32B(RV32B)
+ ) alu_i (
+ .operator_i (alu_operator_i),
+ .operand_a_i (alu_operand_a_i),
+ .operand_b_i (alu_operand_b_i),
+ .instr_first_cycle_i(alu_instr_first_cycle_i),
+ .imd_val_q_i (alu_imd_val_q),
+ .imd_val_we_o (alu_imd_val_we),
+ .imd_val_d_o (alu_imd_val_d),
+ .multdiv_operand_a_i(multdiv_alu_operand_a),
+ .multdiv_operand_b_i(multdiv_alu_operand_b),
+ .multdiv_sel_i (multdiv_sel),
+ .adder_result_o (alu_adder_result_ex_o),
+ .adder_result_ext_o (alu_adder_result_ext),
+ .result_o (alu_result),
+ .comparison_result_o(alu_cmp_result),
+ .is_equal_result_o (alu_is_equal_result)
+ );
+
+ ////////////////
+ // Multiplier //
+ ////////////////
+
+ if (RV32M == RV32MSlow) begin : gen_multdiv_slow
+ cheriot_multdiv_slow multdiv_i (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .mult_en_i (mult_en_i),
+ .div_en_i (div_en_i),
+ .mult_sel_i (mult_sel_i),
+ .div_sel_i (div_sel_i),
+ .operator_i (multdiv_operator_i),
+ .signed_mode_i (multdiv_signed_mode_i),
+ .op_a_i (multdiv_operand_a_i),
+ .op_b_i (multdiv_operand_b_i),
+ .alu_adder_ext_i (alu_adder_result_ext),
+ .alu_adder_i (alu_adder_result_ex_o),
+ .equal_to_zero_i (alu_is_equal_result),
+ .data_ind_timing_i (data_ind_timing_i),
+ .valid_o (multdiv_valid),
+ .alu_operand_a_o (multdiv_alu_operand_a),
+ .alu_operand_b_o (multdiv_alu_operand_b),
+ .imd_val_q_i (imd_val_q_i),
+ .imd_val_d_o (multdiv_imd_val_d),
+ .imd_val_we_o (multdiv_imd_val_we),
+ .multdiv_ready_id_i(multdiv_ready_id_i),
+ .multdiv_result_o (multdiv_result)
+ );
+ end else if (RV32M == RV32MFast || RV32M == RV32MSingleCycle) begin : gen_multdiv_fast
+ cheriot_multdiv_fast #(
+ .RV32M(RV32M)
+ ) multdiv_i (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .mult_en_i (mult_en_i),
+ .div_en_i (div_en_i),
+ .mult_sel_i (mult_sel_i),
+ .div_sel_i (div_sel_i),
+ .operator_i (multdiv_operator_i),
+ .signed_mode_i (multdiv_signed_mode_i),
+ .op_a_i (multdiv_operand_a_i),
+ .op_b_i (multdiv_operand_b_i),
+ .alu_operand_a_o (multdiv_alu_operand_a),
+ .alu_operand_b_o (multdiv_alu_operand_b),
+ .alu_adder_ext_i (alu_adder_result_ext),
+ .alu_adder_i (alu_adder_result_ex_o),
+ .equal_to_zero_i (alu_is_equal_result),
+ .data_ind_timing_i (data_ind_timing_i),
+ .imd_val_q_i (imd_val_q_i),
+ .imd_val_d_o (multdiv_imd_val_d),
+ .imd_val_we_o (multdiv_imd_val_we),
+ .multdiv_ready_id_i(multdiv_ready_id_i),
+ .valid_o (multdiv_valid),
+ .multdiv_result_o (multdiv_result)
+ );
+ end
+
+ // Multiplier/divider may require multiple cycles. The ALU output is valid in the same cycle
+ // unless the intermediate result register is being written (which indicates this isn't the
+ // final cycle of ALU operation).
+ assign ex_valid_o = multdiv_sel ? multdiv_valid : ~(|alu_imd_val_we);
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_fetch_fifo.sv b/hw/ip/cheriot-ibex/rtl/cheriot_fetch_fifo.sv
new file mode 100644
index 0000000..463a9ec
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_fetch_fifo.sv
@@ -0,0 +1,298 @@
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Fetch Fifo for 32 bit memory interface
+ *
+ * input port: send address and data to the FIFO
+ * clear_i clears the FIFO for the following cycle, including any new request
+ */
+
+`include "prim_assert.sv"
+
+module cheriot_fetch_fifo #(
+ parameter int unsigned NUM_REQS = 2,
+ parameter bit ResetAll = 1'b0
+) (
+ input logic clk_i,
+ input logic rst_ni,
+
+ // control signals
+ input logic clear_i, // clears the contents of the FIFO
+ output logic [NUM_REQS-1:0] busy_o,
+
+ // input port
+ input logic in_valid_i,
+ input logic [31:0] in_addr_i,
+ input logic [31:0] in_rdata_i,
+ input logic in_err_i,
+
+ input logic cheri_force_uc_i, // force unaligned compressed based on CHERI bounds check
+
+ // output port
+ output logic out_valid_o,
+ input logic out_ready_i,
+ output logic [31:0] out_addr_o,
+ output logic [31:0] out_rdata_o,
+ output logic out_err_o,
+ output logic out_err_plus2_o
+);
+
+ localparam int unsigned DEPTH = NUM_REQS+1;
+
+ // index 0 is used for output
+ logic [DEPTH-1:0] [31:0] rdata_d, rdata_q;
+ logic [DEPTH-1:0] err_d, err_q;
+ logic [DEPTH-1:0] valid_d, valid_q;
+ logic [DEPTH-1:0] lowest_free_entry;
+ logic [DEPTH-1:0] valid_pushed, valid_popped;
+ logic [DEPTH-1:0] entry_en;
+
+ logic pop_fifo;
+ logic [31:0] rdata, rdata_unaligned;
+ logic err, err_unaligned, err_plus2;
+ logic valid, valid_unaligned;
+
+ logic aligned_is_compressed, unaligned_is_compressed;
+
+ logic addr_incr_two;
+ logic [31:1] instr_addr_next;
+ logic [31:1] instr_addr_d, instr_addr_q;
+ logic instr_addr_en;
+ logic unused_addr_in;
+
+ /////////////////
+ // Output port //
+ /////////////////
+
+ assign rdata = valid_q[0] ? rdata_q[0] : in_rdata_i;
+ assign err = valid_q[0] ? err_q[0] : in_err_i;
+ assign valid = valid_q[0] | in_valid_i;
+
+ // The FIFO contains word aligned memory fetches, but the instructions contained in each entry
+ // might be half-word aligned (due to compressed instructions)
+ // e.g.
+ // | 31 16 | 15 0 |
+ // FIFO entry 0 | Instr 1 [15:0] | Instr 0 [15:0] |
+ // FIFO entry 1 | Instr 2 [15:0] | Instr 1 [31:16] |
+ //
+ // The FIFO also has a direct bypass path, so a complete instruction might be made up of data
+ // from the FIFO and new incoming data.
+ //
+
+ // Construct the output data for an unaligned instruction
+ assign rdata_unaligned = valid_q[1] ? {rdata_q[1][15:0], rdata[31:16]} :
+ {in_rdata_i[15:0], rdata[31:16]};
+
+ // If entry[1] is valid, an error can come from entry[0] or entry[1], unless the
+ // instruction in entry[0] is compressed (entry[1] is a new instruction)
+ // If entry[1] is not valid, and entry[0] is, an error can come from entry[0] or the incoming
+ // data, unless the instruction in entry[0] is compressed
+ // If entry[0] is not valid, the error must come from the incoming data
+ assign err_unaligned = valid_q[1] ? ((err_q[1] & ~unaligned_is_compressed) | err_q[0]) :
+ ((valid_q[0] & err_q[0]) |
+ (in_err_i & (~valid_q[0] | ~unaligned_is_compressed)));
+
+ // Record when an error is caused by the second half of an unaligned 32bit instruction.
+ // Only needs to be correct when unaligned and if err_unaligned is set
+ assign err_plus2 = valid_q[1] ? (err_q[1] & ~err_q[0]) :
+ (in_err_i & valid_q[0] & ~err_q[0]);
+
+ // An uncompressed unaligned instruction is only valid if both parts are available
+ assign valid_unaligned = valid_q[1] ? 1'b1 :
+ (valid_q[0] & in_valid_i);
+
+ // If there is an error, rdata is unknown
+`ifdef DII_SIM
+ logic [31:0] instr_rdata_dii;
+ logic [31:0] instr_pc;
+ logic instr_ack;
+
+ // for DII we directly force out_rdata_o (re-aligned instruction)
+ // to keep the unaligned/aligned_is_compressed signals in sync
+ // 32-bit instruction; instr_rdata_dii[31:0] = instr
+ // 16-bit instruction: instr_rdata_dii[15:0] = compressed instruction
+ // instr_rdata_dii[31:0] = don't care
+
+ assign unaligned_is_compressed = out_addr_o[1] & cheri_force_uc_i | ((instr_rdata_dii[1:0] != 2'b11) & ~err);
+ assign aligned_is_compressed = ~out_addr_o[1] & (instr_rdata_dii[1:0] != 2'b11) & ~err;
+
+ assign instr_ack = out_ready_i & out_valid_o;
+ assign instr_pc = out_addr_o;
+`else
+ assign unaligned_is_compressed = cheri_force_uc_i | ((rdata[17:16] != 2'b11) & ~err);
+ assign aligned_is_compressed = (rdata[ 1: 0] != 2'b11) & ~err;
+`endif
+
+ ////////////////////////////////////////
+ // Instruction aligner (if unaligned) //
+ ////////////////////////////////////////
+
+ always_comb begin
+ if (out_addr_o[1]) begin
+ // unaligned case
+
+`ifdef DII_SIM
+ out_rdata_o = instr_rdata_dii;
+`else
+ out_rdata_o = rdata_unaligned;
+`endif
+ out_err_o = err_unaligned;
+ out_err_plus2_o = err_plus2;
+
+ if (unaligned_is_compressed) begin
+ out_valid_o = valid;
+ end else begin
+ out_valid_o = valid_unaligned;
+ end
+ end else begin
+ // aligned case
+`ifdef DII_SIM
+ out_rdata_o = instr_rdata_dii;
+`else
+ out_rdata_o = rdata;
+`endif
+ out_err_o = err;
+ out_err_plus2_o = 1'b0;
+ out_valid_o = valid;
+ end
+ end
+
+ /////////////////////////
+ // Instruction address //
+ /////////////////////////
+
+ // Update the address on branches and every time an instruction is driven
+ assign instr_addr_en = clear_i | (out_ready_i & out_valid_o);
+
+ // Increment the address by two every time a compressed instruction is popped
+ assign addr_incr_two = instr_addr_q[1] ? unaligned_is_compressed :
+ aligned_is_compressed;
+
+ assign instr_addr_next = (instr_addr_q[31:1] +
+ // Increment address by 4 or 2
+ {29'd0,~addr_incr_two,addr_incr_two});
+
+ assign instr_addr_d = clear_i ? in_addr_i[31:1] :
+ instr_addr_next;
+
+ if (ResetAll) begin : g_instr_addr_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ instr_addr_q <= '0;
+ end else if (instr_addr_en) begin
+ instr_addr_q <= instr_addr_d;
+ end
+ end
+ end else begin : g_instr_addr_nr
+ always_ff @(posedge clk_i) begin
+ if (instr_addr_en) begin
+ instr_addr_q <= instr_addr_d;
+ end
+ end
+ end
+
+ // Output PC of current instruction
+ assign out_addr_o = {instr_addr_q, 1'b0};
+
+ // The LSB of the address is unused, since all addresses are halfword aligned
+ assign unused_addr_in = in_addr_i[0];
+
+ /////////////////
+ // FIFO status //
+ /////////////////
+
+ // Indicate the fill level of fifo-entries. This is used to determine when a new request can be
+ // made on the bus. The prefetch buffer only needs to know about the upper entries which overlap
+ // with NUM_REQS.
+ assign busy_o = valid_q[DEPTH-1:DEPTH-NUM_REQS];
+
+ /////////////////////
+ // FIFO management //
+ /////////////////////
+
+ // Since an entry can contain unaligned instructions, popping an entry can leave the entry valid
+ assign pop_fifo = out_ready_i & out_valid_o & (~aligned_is_compressed | out_addr_o[1]);
+
+ for (genvar i = 0; i < (DEPTH - 1); i++) begin : g_fifo_next
+ // Calculate lowest free entry (write pointer)
+ if (i == 0) begin : g_ent0
+ assign lowest_free_entry[i] = ~valid_q[i];
+ end else begin : g_ent_others
+ assign lowest_free_entry[i] = ~valid_q[i] & valid_q[i-1];
+ end
+
+ // An entry is set when an incoming request chooses the lowest available entry
+ assign valid_pushed[i] = (in_valid_i & lowest_free_entry[i]) |
+ valid_q[i];
+ // Popping the FIFO shifts all entries down
+ assign valid_popped[i] = pop_fifo ? valid_pushed[i+1] : valid_pushed[i];
+ // All entries are wiped out on a clear
+ assign valid_d[i] = valid_popped[i] & ~clear_i;
+
+ // data flops are enabled if there is new data to shift into it, or
+ assign entry_en[i] = (valid_pushed[i+1] & pop_fifo) |
+ // a new request is incoming and this is the lowest free entry
+ (in_valid_i & lowest_free_entry[i] & ~pop_fifo);
+
+ // take the next entry or the incoming data
+ assign rdata_d[i] = valid_q[i+1] ? rdata_q[i+1] : in_rdata_i;
+ assign err_d [i] = valid_q[i+1] ? err_q [i+1] : in_err_i;
+ end
+ // The top entry is similar but with simpler muxing
+ assign lowest_free_entry[DEPTH-1] = ~valid_q[DEPTH-1] & valid_q[DEPTH-2];
+ assign valid_pushed [DEPTH-1] = valid_q[DEPTH-1] | (in_valid_i & lowest_free_entry[DEPTH-1]);
+ assign valid_popped [DEPTH-1] = pop_fifo ? 1'b0 : valid_pushed[DEPTH-1];
+ assign valid_d [DEPTH-1] = valid_popped[DEPTH-1] & ~clear_i;
+ assign entry_en[DEPTH-1] = in_valid_i & lowest_free_entry[DEPTH-1];
+ assign rdata_d [DEPTH-1] = in_rdata_i;
+ assign err_d [DEPTH-1] = in_err_i;
+
+ ////////////////////
+ // FIFO registers //
+ ////////////////////
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ valid_q <= '0;
+ end else begin
+ valid_q <= valid_d;
+ end
+ end
+
+ for (genvar i = 0; i < DEPTH; i++) begin : g_fifo_regs
+ if (ResetAll) begin : g_rdata_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rdata_q[i] <= '0;
+ err_q[i] <= '0;
+ end else if (entry_en[i]) begin
+ rdata_q[i] <= rdata_d[i];
+ err_q[i] <= err_d[i];
+ end
+ end
+ end else begin : g_rdata_nr
+ always_ff @(posedge clk_i) begin
+ if (entry_en[i]) begin
+ rdata_q[i] <= rdata_d[i];
+ err_q[i] <= err_d[i];
+ end
+ end
+ end
+ end
+
+ ////////////////
+ // Assertions //
+ ////////////////
+
+ // Must not push and pop simultaneously when FIFO full.
+ `ASSERT(IbexFetchFifoPushPopFull,
+ (in_valid_i && pop_fifo) |-> (!valid_q[DEPTH-1] || clear_i))
+
+ // Must not push to FIFO when full.
+ `ASSERT(IbexFetchFifoPushFull,
+ (in_valid_i) |-> (!valid_q[DEPTH-1] || clear_i))
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_icache.sv b/hw/ip/cheriot-ibex/rtl/cheriot_icache.sv
new file mode 100644
index 0000000..91ab025
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_icache.sv
@@ -0,0 +1,1155 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Instruction cache
+ *
+ * Provides an instruction cache along with cache management, instruction buffering and prefetching
+ */
+
+`include "prim_assert.sv"
+
+module cheriot_icache import cheriot_pkg::*; #(
+ parameter bit ICacheECC = 1'b0,
+ parameter bit ResetAll = 1'b0,
+ parameter int unsigned BusSizeECC = BUS_SIZE,
+ parameter int unsigned TagSizeECC = IC_TAG_SIZE,
+ parameter int unsigned LineSizeECC = IC_LINE_SIZE,
+ // Only cache branch targets
+ parameter bit BranchCache = 1'b0
+) (
+ // Clock and reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ // Signal that the core would like instructions
+ input logic req_i,
+
+ // Set the cache's address counter
+ input logic branch_i,
+ input logic branch_mispredict_i,
+ input logic [31:0] mispredict_addr_i,
+ input logic [31:0] addr_i,
+
+ // IF stage interface: Pass fetched instructions to the core
+ input logic ready_i,
+ output logic valid_o,
+ output logic [31:0] rdata_o,
+ output logic [31:0] addr_o,
+ output logic err_o,
+ output logic err_plus2_o,
+
+ // Instruction memory / interconnect interface: Fetch instruction data from memory
+ output logic instr_req_o,
+ input logic instr_gnt_i,
+ output logic [31:0] instr_addr_o,
+ input logic [BUS_SIZE-1:0] instr_rdata_i,
+ input logic instr_err_i,
+ input logic instr_rvalid_i,
+
+ // RAM IO
+ output logic [IC_NUM_WAYS-1:0] ic_tag_req_o,
+ output logic ic_tag_write_o,
+ output logic [IC_INDEX_W-1:0] ic_tag_addr_o,
+ output logic [TagSizeECC-1:0] ic_tag_wdata_o,
+ input logic [TagSizeECC-1:0] ic_tag_rdata_i [IC_NUM_WAYS],
+ output logic [IC_NUM_WAYS-1:0] ic_data_req_o,
+ output logic ic_data_write_o,
+ output logic [IC_INDEX_W-1:0] ic_data_addr_o,
+ output logic [LineSizeECC-1:0] ic_data_wdata_o,
+ input logic [LineSizeECC-1:0] ic_data_rdata_i [IC_NUM_WAYS],
+ input logic ic_scr_key_valid_i,
+
+ // Cache status
+ input logic icache_enable_i,
+ input logic icache_inval_i,
+ output logic busy_o
+);
+
+ // Number of fill buffers (must be >= 2)
+ localparam int unsigned NUM_FB = 4;
+ // Request throttling threshold
+ localparam int unsigned FB_THRESHOLD = NUM_FB - 2;
+
+ // Prefetch signals
+ logic [ADDR_W-1:0] lookup_addr_aligned;
+ logic [ADDR_W-1:0] prefetch_addr_d, prefetch_addr_q;
+ logic prefetch_addr_en;
+ logic branch_or_mispredict;
+ // Cache pipelipe IC0 signals
+ logic lookup_throttle;
+ logic lookup_req_ic0;
+ logic [ADDR_W-1:0] lookup_addr_ic0;
+ logic [IC_INDEX_W-1:0] lookup_index_ic0;
+ logic fill_req_ic0;
+ logic [IC_INDEX_W-1:0] fill_index_ic0;
+ logic [IC_TAG_SIZE-1:0] fill_tag_ic0;
+ logic [IC_LINE_SIZE-1:0] fill_wdata_ic0;
+ logic lookup_grant_ic0;
+ logic lookup_actual_ic0;
+ logic fill_grant_ic0;
+ logic tag_req_ic0;
+ logic [IC_INDEX_W-1:0] tag_index_ic0;
+ logic [IC_NUM_WAYS-1:0] tag_banks_ic0;
+ logic tag_write_ic0;
+ logic [TagSizeECC-1:0] tag_wdata_ic0;
+ logic data_req_ic0;
+ logic [IC_INDEX_W-1:0] data_index_ic0;
+ logic [IC_NUM_WAYS-1:0] data_banks_ic0;
+ logic data_write_ic0;
+ logic [LineSizeECC-1:0] data_wdata_ic0;
+ // Cache pipelipe IC1 signals
+ logic [TagSizeECC-1:0] tag_rdata_ic1 [IC_NUM_WAYS];
+ logic [LineSizeECC-1:0] data_rdata_ic1 [IC_NUM_WAYS];
+ logic [LineSizeECC-1:0] hit_data_ecc_ic1;
+ logic [IC_LINE_SIZE-1:0] hit_data_ic1;
+ logic lookup_valid_ic1;
+ logic [ADDR_W-1:IC_INDEX_HI+1] lookup_addr_ic1;
+ logic [IC_NUM_WAYS-1:0] tag_match_ic1;
+ logic tag_hit_ic1;
+ logic [IC_NUM_WAYS-1:0] tag_invalid_ic1;
+ logic [IC_NUM_WAYS-1:0] lowest_invalid_way_ic1;
+ logic [IC_NUM_WAYS-1:0] round_robin_way_ic1, round_robin_way_q;
+ logic [IC_NUM_WAYS-1:0] sel_way_ic1;
+ logic ecc_err_ic1;
+ logic ecc_write_req;
+ logic [IC_NUM_WAYS-1:0] ecc_write_ways;
+ logic [IC_INDEX_W-1:0] ecc_write_index;
+ // Fill buffer signals
+ logic [$clog2(NUM_FB)-1:0] fb_fill_level;
+ logic fill_cache_new;
+ logic fill_new_alloc;
+ logic fill_spec_req, fill_spec_done, fill_spec_hold;
+ logic [NUM_FB-1:0][NUM_FB-1:0] fill_older_d, fill_older_q;
+ logic [NUM_FB-1:0] fill_alloc_sel, fill_alloc;
+ logic [NUM_FB-1:0] fill_busy_d, fill_busy_q;
+ logic [NUM_FB-1:0] fill_done;
+ logic [NUM_FB-1:0] fill_in_ic1;
+ logic [NUM_FB-1:0] fill_stale_d, fill_stale_q;
+ logic [NUM_FB-1:0] fill_cache_d, fill_cache_q;
+ logic [NUM_FB-1:0] fill_hit_ic1, fill_hit_d, fill_hit_q;
+ logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_ext_cnt_d, fill_ext_cnt_q;
+ logic [NUM_FB-1:0] fill_ext_hold_d, fill_ext_hold_q;
+ logic [NUM_FB-1:0] fill_ext_done_d, fill_ext_done_q;
+ logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_rvd_cnt_d, fill_rvd_cnt_q;
+ logic [NUM_FB-1:0] fill_rvd_done;
+ logic [NUM_FB-1:0] fill_ram_done_d, fill_ram_done_q;
+ logic [NUM_FB-1:0] fill_out_grant;
+ logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_out_cnt_d, fill_out_cnt_q;
+ logic [NUM_FB-1:0] fill_out_done;
+ logic [NUM_FB-1:0] fill_ext_req, fill_rvd_exp, fill_ram_req, fill_out_req;
+ logic [NUM_FB-1:0] fill_data_sel, fill_data_reg;
+ logic [NUM_FB-1:0] fill_data_hit, fill_data_rvd;
+ logic [NUM_FB-1:0][IC_LINE_BEATS_W-1:0] fill_ext_off, fill_rvd_off;
+ logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_ext_beat, fill_rvd_beat;
+ logic [NUM_FB-1:0] fill_ext_arb, fill_ram_arb, fill_out_arb;
+ logic [NUM_FB-1:0] fill_rvd_arb;
+ logic [NUM_FB-1:0] fill_entry_en;
+ logic [NUM_FB-1:0] fill_addr_en;
+ logic [NUM_FB-1:0] fill_way_en;
+ logic [NUM_FB-1:0][IC_LINE_BEATS-1:0] fill_data_en;
+ logic [NUM_FB-1:0][IC_LINE_BEATS-1:0] fill_err_d, fill_err_q;
+ logic [ADDR_W-1:0] fill_addr_q [NUM_FB];
+ logic [IC_NUM_WAYS-1:0] fill_way_q [NUM_FB];
+ logic [IC_LINE_SIZE-1:0] fill_data_d [NUM_FB];
+ logic [IC_LINE_SIZE-1:0] fill_data_q [NUM_FB];
+ logic [ADDR_W-1:BUS_W] fill_ext_req_addr;
+ logic [ADDR_W-1:0] fill_ram_req_addr;
+ logic [IC_NUM_WAYS-1:0] fill_ram_req_way;
+ logic [IC_LINE_SIZE-1:0] fill_ram_req_data;
+ logic [IC_LINE_SIZE-1:0] fill_out_data;
+ logic [IC_LINE_BEATS-1:0] fill_out_err;
+ // External req signals
+ logic instr_req;
+ logic [ADDR_W-1:BUS_W] instr_addr;
+ // Data output signals
+ logic skid_complete_instr;
+ logic skid_ready;
+ logic output_compressed;
+ logic skid_valid_d, skid_valid_q, skid_en;
+ logic [15:0] skid_data_d, skid_data_q;
+ logic skid_err_q;
+ logic output_valid;
+ logic addr_incr_two;
+ logic output_addr_en;
+ logic [ADDR_W-1:1] output_addr_incr;
+ logic [ADDR_W-1:1] output_addr_d, output_addr_q;
+ logic [15:0] output_data_lo, output_data_hi;
+ logic data_valid, output_ready;
+ logic [IC_LINE_SIZE-1:0] line_data;
+ logic [IC_LINE_BEATS-1:0] line_err;
+ logic [31:0] line_data_muxed;
+ logic line_err_muxed;
+ logic [31:0] output_data;
+ logic output_err;
+ // Invalidations
+ logic start_inval, inval_done;
+ logic inval_lock, inval_req_d, inval_req_q;
+ logic reset_inval_q;
+ logic inval_prog_d, inval_prog_q;
+ logic [IC_INDEX_W-1:0] inval_index_d, inval_index_q;
+
+ //////////////////////////
+ // Instruction prefetch //
+ //////////////////////////
+
+ assign branch_or_mispredict = branch_i | branch_mispredict_i;
+
+ assign lookup_addr_aligned = {lookup_addr_ic0[ADDR_W-1:IC_LINE_W], {IC_LINE_W{1'b0}}};
+
+ // The prefetch address increments by one cache line for each granted request.
+ // This address is also updated if there is a branch that is not granted, since the target
+ // address (addr_i) is only valid for one cycle while branch_i is high.
+
+ // The captured branch target address is not forced to be aligned since the offset in the cache
+ // line must also be recorded for later use by the fill buffers.
+ assign prefetch_addr_d =
+ lookup_grant_ic0 ? (lookup_addr_aligned +
+ {{ADDR_W-IC_LINE_W-1{1'b0}}, 1'b1, {IC_LINE_W{1'b0}}}) :
+ branch_i ? addr_i :
+ mispredict_addr_i;
+
+ assign prefetch_addr_en = branch_or_mispredict | lookup_grant_ic0;
+
+ if (ResetAll) begin : g_prefetch_addr_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ prefetch_addr_q <= '0;
+ end else if (prefetch_addr_en) begin
+ prefetch_addr_q <= prefetch_addr_d;
+ end
+ end
+ end else begin : g_prefetch_addr_nr
+ always_ff @(posedge clk_i) begin
+ if (prefetch_addr_en) begin
+ prefetch_addr_q <= prefetch_addr_d;
+ end
+ end
+ end
+
+ ////////////////////////
+ // Pipeline stage IC0 //
+ ////////////////////////
+
+ // Cache lookup
+ assign lookup_throttle = (fb_fill_level > FB_THRESHOLD[$clog2(NUM_FB)-1:0]);
+
+ assign lookup_req_ic0 = req_i & ~&fill_busy_q & (branch_or_mispredict | ~lookup_throttle) &
+ ~ecc_write_req;
+ assign lookup_addr_ic0 = branch_i ? addr_i :
+ branch_mispredict_i ? mispredict_addr_i :
+ prefetch_addr_q;
+ assign lookup_index_ic0 = lookup_addr_ic0[IC_INDEX_HI:IC_LINE_W];
+
+ // Cache write
+ assign fill_req_ic0 = (|fill_ram_req);
+ assign fill_index_ic0 = fill_ram_req_addr[IC_INDEX_HI:IC_LINE_W];
+ assign fill_tag_ic0 = {(~inval_prog_q & ~ecc_write_req),
+ fill_ram_req_addr[ADDR_W-1:IC_INDEX_HI+1]};
+ assign fill_wdata_ic0 = fill_ram_req_data;
+
+ // Arbitrated signals - lookups have highest priority
+ assign lookup_grant_ic0 = lookup_req_ic0;
+ assign fill_grant_ic0 = fill_req_ic0 & ~lookup_req_ic0 & ~inval_prog_q &
+ ~ecc_write_req;
+ // Qualified lookup grant to mask ram signals in IC1 if access was not made
+ assign lookup_actual_ic0 = lookup_grant_ic0 & icache_enable_i & ~inval_prog_q &
+ ~icache_inval_i & ~inval_lock & ~start_inval;
+
+ // Tagram
+ assign tag_req_ic0 = lookup_req_ic0 | fill_req_ic0 | inval_prog_q | ecc_write_req;
+ assign tag_index_ic0 = inval_prog_q ? inval_index_q :
+ ecc_write_req ? ecc_write_index :
+ fill_grant_ic0 ? fill_index_ic0 :
+ lookup_index_ic0;
+ assign tag_banks_ic0 = ecc_write_req ? ecc_write_ways :
+ fill_grant_ic0 ? fill_ram_req_way :
+ {IC_NUM_WAYS{1'b1}};
+ assign tag_write_ic0 = fill_grant_ic0 | inval_prog_q | ecc_write_req;
+
+ // Dataram
+ assign data_req_ic0 = lookup_req_ic0 | fill_req_ic0;
+ assign data_index_ic0 = tag_index_ic0;
+ assign data_banks_ic0 = tag_banks_ic0;
+ assign data_write_ic0 = tag_write_ic0;
+
+ // Append ECC checkbits to write data if required
+ if (ICacheECC) begin : gen_ecc_wdata
+
+ // Tagram ECC
+ // Reuse the same ecc encoding module for larger cache sizes by padding with zeros
+ logic [21:0] tag_ecc_input_padded;
+ logic [27:0] tag_ecc_output_padded;
+ logic [22-IC_TAG_SIZE:0] unused_tag_ecc_output;
+
+ assign tag_ecc_input_padded = {{22-IC_TAG_SIZE{1'b0}},fill_tag_ic0};
+ assign unused_tag_ecc_output = tag_ecc_output_padded[21:IC_TAG_SIZE-1];
+
+ prim_secded_inv_28_22_enc tag_ecc_enc (
+ .data_i (tag_ecc_input_padded),
+ .data_o (tag_ecc_output_padded)
+ );
+
+ assign tag_wdata_ic0 = {tag_ecc_output_padded[27:22],tag_ecc_output_padded[IC_TAG_SIZE-1:0]};
+
+ // Dataram ECC
+ for (genvar bank = 0; bank < IC_LINE_BEATS; bank++) begin : gen_ecc_banks
+ prim_secded_inv_39_32_enc data_ecc_enc (
+ .data_i (fill_wdata_ic0[bank*BUS_SIZE+:BUS_SIZE]),
+ .data_o (data_wdata_ic0[bank*BusSizeECC+:BusSizeECC])
+ );
+ end
+
+ end else begin : gen_noecc_wdata
+ assign tag_wdata_ic0 = fill_tag_ic0;
+ assign data_wdata_ic0 = fill_wdata_ic0;
+ end
+
+ ////////////////
+ // IC0 -> IC1 //
+ ////////////////
+
+ // Tag RAMs outputs
+ assign ic_tag_req_o = {IC_NUM_WAYS{tag_req_ic0}} & tag_banks_ic0;
+ assign ic_tag_write_o = tag_write_ic0;
+ assign ic_tag_addr_o = tag_index_ic0;
+ assign ic_tag_wdata_o = tag_wdata_ic0;
+
+ // Tag RAMs inputs
+ assign tag_rdata_ic1 = ic_tag_rdata_i;
+
+ // Data RAMs outputs
+ assign ic_data_req_o = {IC_NUM_WAYS{data_req_ic0}} & data_banks_ic0;
+ assign ic_data_write_o = data_write_ic0;
+ assign ic_data_addr_o = data_index_ic0;
+ assign ic_data_wdata_o = data_wdata_ic0;
+
+ // Data RAMs inputs
+ assign data_rdata_ic1 = ic_data_rdata_i;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ lookup_valid_ic1 <= 1'b0;
+ end else begin
+ lookup_valid_ic1 <= lookup_actual_ic0;
+ end
+ end
+
+ if (ResetAll) begin : g_lookup_addr_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ lookup_addr_ic1 <= '0;
+ fill_in_ic1 <= '0;
+ end else if (lookup_grant_ic0) begin
+ lookup_addr_ic1 <= lookup_addr_ic0[ADDR_W-1:IC_INDEX_HI+1];
+ fill_in_ic1 <= fill_alloc_sel;
+ end
+ end
+ end else begin : g_lookup_addr_nr
+ always_ff @(posedge clk_i) begin
+ if (lookup_grant_ic0) begin
+ lookup_addr_ic1 <= lookup_addr_ic0[ADDR_W-1:IC_INDEX_HI+1];
+ fill_in_ic1 <= fill_alloc_sel;
+ end
+ end
+ end
+
+ ////////////////////////
+ // Pipeline stage IC1 //
+ ////////////////////////
+
+ // Tag matching
+ for (genvar way = 0; way < IC_NUM_WAYS; way++) begin : gen_tag_match
+ assign tag_match_ic1[way] = (tag_rdata_ic1[way][IC_TAG_SIZE-1:0] ==
+ {1'b1,lookup_addr_ic1[ADDR_W-1:IC_INDEX_HI+1]});
+ assign tag_invalid_ic1[way] = ~tag_rdata_ic1[way][IC_TAG_SIZE-1];
+ end
+
+ assign tag_hit_ic1 = |tag_match_ic1;
+
+ // Hit data mux
+ always_comb begin
+ hit_data_ecc_ic1 = 'b0;
+ for (int way = 0; way < IC_NUM_WAYS; way++) begin
+ if (tag_match_ic1[way]) begin
+ hit_data_ecc_ic1 |= data_rdata_ic1[way];
+ end
+ end
+ end
+
+ // Way selection for allocations to the cache (onehot signals)
+ // 1 first invalid way
+ // 2 global round-robin (pseudorandom) way
+ assign lowest_invalid_way_ic1[0] = tag_invalid_ic1[0];
+ assign round_robin_way_ic1[0] = round_robin_way_q[IC_NUM_WAYS-1];
+ for (genvar way = 1; way < IC_NUM_WAYS; way++) begin : gen_lowest_way
+ assign lowest_invalid_way_ic1[way] = tag_invalid_ic1[way] & ~|tag_invalid_ic1[way-1:0];
+ assign round_robin_way_ic1[way] = round_robin_way_q[way-1];
+ end
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ round_robin_way_q <= {{IC_NUM_WAYS-1{1'b0}}, 1'b1};
+ end else if (lookup_valid_ic1) begin
+ round_robin_way_q <= round_robin_way_ic1;
+ end
+ end
+
+ assign sel_way_ic1 = |tag_invalid_ic1 ? lowest_invalid_way_ic1 :
+ round_robin_way_q;
+
+ // ECC checking logic
+ if (ICacheECC) begin : gen_data_ecc_checking
+ logic [IC_NUM_WAYS-1:0] tag_err_ic1;
+ logic [IC_LINE_BEATS*2-1:0] data_err_ic1;
+ logic ecc_correction_write_d, ecc_correction_write_q;
+ logic [IC_NUM_WAYS-1:0] ecc_correction_ways_d, ecc_correction_ways_q;
+ logic [IC_INDEX_W-1:0] lookup_index_ic1, ecc_correction_index_q;
+
+ // Tag ECC checking
+ for (genvar way = 0; way < IC_NUM_WAYS; way++) begin : gen_tag_ecc
+ logic [1:0] tag_err_bank_ic1;
+ logic [27:0] tag_rdata_padded_ic1;
+
+ // Expand the tag rdata with extra padding if the tag size is less than the maximum
+ assign tag_rdata_padded_ic1 = {tag_rdata_ic1[way][TagSizeECC-1-:6],
+ {22-IC_TAG_SIZE{1'b0}},
+ tag_rdata_ic1[way][IC_TAG_SIZE-1:0]};
+
+ prim_secded_inv_28_22_dec data_ecc_dec (
+ .data_i (tag_rdata_padded_ic1),
+ .data_o (),
+ .syndrome_o (),
+ .err_o (tag_err_bank_ic1)
+ );
+ assign tag_err_ic1[way] = |tag_err_bank_ic1;
+ end
+
+ // Data ECC checking
+ // Note - could generate for all ways and mux after
+ for (genvar bank = 0; bank < IC_LINE_BEATS; bank++) begin : gen_ecc_banks
+ prim_secded_inv_39_32_dec data_ecc_dec (
+ .data_i (hit_data_ecc_ic1[bank*BusSizeECC+:BusSizeECC]),
+ .data_o (),
+ .syndrome_o (),
+ .err_o (data_err_ic1[bank*2+:2])
+ );
+
+ assign hit_data_ic1[bank*BUS_SIZE+:BUS_SIZE] =
+ hit_data_ecc_ic1[bank*BusSizeECC+:BUS_SIZE];
+
+ end
+
+ assign ecc_err_ic1 = lookup_valid_ic1 & ((|data_err_ic1) | (|tag_err_ic1));
+
+ // Error correction
+ // All ways will be invalidated on a tag error to prevent X-propagation from data_err_ic1 on
+ // spurious hits. Also prevents the same line being allocated twice when there was a true
+ // hit and a spurious hit.
+ assign ecc_correction_ways_d = {IC_NUM_WAYS{|tag_err_ic1}} |
+ (tag_match_ic1 & {IC_NUM_WAYS{|data_err_ic1}});
+ assign ecc_correction_write_d = ecc_err_ic1;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ ecc_correction_write_q <= 1'b0;
+ end else begin
+ ecc_correction_write_q <= ecc_correction_write_d;
+ end
+ end
+
+ // The index is required in IC1 only when ECC is configured so is registered here
+ if (ResetAll) begin : g_lookup_ind_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ lookup_index_ic1 <= '0;
+ end else if (lookup_grant_ic0) begin
+ lookup_index_ic1 <= lookup_addr_ic0[IC_INDEX_HI-:IC_INDEX_W];
+ end
+ end
+ end else begin : g_lookup_ind_nr
+ always_ff @(posedge clk_i) begin
+ if (lookup_grant_ic0) begin
+ lookup_index_ic1 <= lookup_addr_ic0[IC_INDEX_HI-:IC_INDEX_W];
+ end
+ end
+ end
+
+ // Store the ways with errors to be invalidated
+ if (ResetAll) begin : g_ecc_correction_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ ecc_correction_ways_q <= '0;
+ ecc_correction_index_q <= '0;
+ end else if (ecc_err_ic1) begin
+ ecc_correction_ways_q <= ecc_correction_ways_d;
+ ecc_correction_index_q <= lookup_index_ic1;
+ end
+ end
+ end else begin : g_ecc_correction_nr
+ always_ff @(posedge clk_i) begin
+ if (ecc_err_ic1) begin
+ ecc_correction_ways_q <= ecc_correction_ways_d;
+ ecc_correction_index_q <= lookup_index_ic1;
+ end
+ end
+ end
+
+ assign ecc_write_req = ecc_correction_write_q;
+ assign ecc_write_ways = ecc_correction_ways_q;
+ assign ecc_write_index = ecc_correction_index_q;
+
+ end else begin : gen_no_data_ecc
+ assign ecc_err_ic1 = 1'b0;
+ assign ecc_write_req = 1'b0;
+ assign ecc_write_ways = '0;
+ assign ecc_write_index = '0;
+ assign hit_data_ic1 = hit_data_ecc_ic1;
+ end
+
+ ///////////////////////////////
+ // Cache allocation decision //
+ ///////////////////////////////
+
+ if (BranchCache) begin : gen_caching_logic
+
+ // Cache branch target + a number of subsequent lines
+ localparam int unsigned CACHE_AHEAD = 2;
+ localparam int unsigned CACHE_CNT_W = (CACHE_AHEAD == 1) ? 1 : $clog2(CACHE_AHEAD) + 1;
+ logic cache_cnt_dec;
+ logic [CACHE_CNT_W-1:0] cache_cnt_d, cache_cnt_q;
+
+ assign cache_cnt_dec = lookup_grant_ic0 & (|cache_cnt_q);
+ assign cache_cnt_d = branch_i ? CACHE_AHEAD[CACHE_CNT_W-1:0] :
+ (cache_cnt_q - {{CACHE_CNT_W-1{1'b0}},cache_cnt_dec});
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ cache_cnt_q <= '0;
+ end else begin
+ cache_cnt_q <= cache_cnt_d;
+ end
+ end
+
+ assign fill_cache_new = (branch_i | (|cache_cnt_q)) & icache_enable_i &
+ ~icache_inval_i & ~inval_lock & ~inval_prog_q;
+
+ end else begin : gen_cache_all
+
+ // Cache all missing fetches
+ assign fill_cache_new = icache_enable_i & ~start_inval & ~inval_prog_q;
+ end
+
+ //////////////////////////
+ // Fill buffer tracking //
+ //////////////////////////
+
+ always_comb begin
+ fb_fill_level = '0;
+ for (int i = 0; i < NUM_FB; i++) begin
+ if (fill_busy_q[i] & ~fill_stale_q[i]) begin
+ fb_fill_level += {{$clog2(NUM_FB) - 1{1'b0}}, 1'b1};
+ end
+ end
+ end
+
+ // Allocate a new buffer for every granted lookup
+ assign fill_new_alloc = lookup_grant_ic0;
+ // Track whether a speculative external request was made from IC0, and whether it was granted
+ // Speculative requests are only made for branches, or if the cache is disabled
+ assign fill_spec_req = (~icache_enable_i | branch_or_mispredict) & ~|fill_ext_req;
+ assign fill_spec_done = fill_spec_req & instr_gnt_i;
+ assign fill_spec_hold = fill_spec_req & ~instr_gnt_i;
+
+ for (genvar fb = 0; fb < NUM_FB; fb++) begin : gen_fbs
+
+ /////////////////////////////
+ // Fill buffer allocations //
+ /////////////////////////////
+
+ // Allocate the lowest available buffer
+ if (fb == 0) begin : gen_fb_zero
+ assign fill_alloc_sel[fb] = ~fill_busy_q[fb];
+ end else begin : gen_fb_rest
+ assign fill_alloc_sel[fb] = ~fill_busy_q[fb] & (&fill_busy_q[fb-1:0]);
+ end
+
+ assign fill_alloc[fb] = fill_alloc_sel[fb] & fill_new_alloc;
+ assign fill_busy_d[fb] = fill_alloc[fb] | (fill_busy_q[fb] & ~fill_done[fb]);
+
+ // Track which other fill buffers are older than this one (for age-based arbitration)
+ // TODO sparsify
+ assign fill_older_d[fb] = (fill_alloc[fb] ? fill_busy_q : fill_older_q[fb]) & ~fill_done;
+
+ // A fill buffer can release once all its actions are completed
+ // all data written to the cache (unless hit or error)
+ assign fill_done[fb] = (fill_ram_done_q[fb] | fill_hit_q[fb] | ~fill_cache_q[fb] |
+ (|fill_err_q[fb])) &
+ // all data output unless stale due to intervening branch
+ (fill_out_done[fb] | fill_stale_q[fb] | branch_or_mispredict) &
+ // all external requests completed
+ fill_rvd_done[fb];
+
+ /////////////////////////////////
+ // Fill buffer status tracking //
+ /////////////////////////////////
+
+ // Track staleness (requests become stale when a branch intervenes)
+ assign fill_stale_d[fb] = fill_busy_q[fb] & (branch_or_mispredict | fill_stale_q[fb]);
+ // Track whether or not this request should allocate to the cache
+ // Any invalidation or disabling of the cache while the buffer is busy will stop allocation
+ assign fill_cache_d[fb] = (fill_alloc[fb] & fill_cache_new) |
+ (fill_cache_q[fb] & fill_busy_q[fb] &
+ icache_enable_i & ~icache_inval_i & ~inval_lock);
+ // Record whether the request hit in the cache
+ assign fill_hit_ic1[fb] = lookup_valid_ic1 & fill_in_ic1[fb] & tag_hit_ic1 & ~ecc_err_ic1;
+ assign fill_hit_d[fb] = fill_hit_ic1[fb] | (fill_hit_q[fb] & fill_busy_q[fb]);
+
+ ///////////////////////////////////////////
+ // Fill buffer external request tracking //
+ ///////////////////////////////////////////
+
+ // Make an external request
+ assign fill_ext_req[fb] = fill_busy_q[fb] & ~fill_ext_done_d[fb];
+
+ // Count the number of completed external requests (each line requires IC_LINE_BEATS requests)
+ assign fill_ext_cnt_d[fb] = fill_alloc[fb] ?
+ {{IC_LINE_BEATS_W{1'b0}},fill_spec_done} :
+ (fill_ext_cnt_q[fb] + {{IC_LINE_BEATS_W{1'b0}},
+ fill_ext_arb[fb] & instr_gnt_i});
+ // External request must be held until granted
+ assign fill_ext_hold_d[fb] = (fill_alloc[fb] & fill_spec_hold) |
+ (fill_ext_arb[fb] & ~instr_gnt_i);
+ // External requests are completed when the counter is filled or when the request is cancelled
+ assign fill_ext_done_d[fb] = (fill_ext_cnt_q[fb][IC_LINE_BEATS_W] |
+ // external requests are considered complete if the request hit
+ fill_hit_ic1[fb] | fill_hit_q[fb] |
+ // cancel if the line won't be cached and, it is stale
+ (~fill_cache_q[fb] & (branch_or_mispredict | fill_stale_q[fb] |
+ // or we're already at the end of the line
+ fill_ext_beat[fb][IC_LINE_BEATS_W]))) &
+ // can't cancel while we are waiting for a grant on the bus
+ ~fill_ext_hold_q[fb] & fill_busy_q[fb];
+ // Track whether this fill buffer expects to receive beats of data
+ assign fill_rvd_exp[fb] = fill_busy_q[fb] & ~fill_rvd_done[fb];
+ // Count the number of rvalid beats received
+ assign fill_rvd_cnt_d[fb] = fill_alloc[fb] ? '0 :
+ (fill_rvd_cnt_q[fb] +
+ {{IC_LINE_BEATS_W{1'b0}},fill_rvd_arb[fb]});
+ // External data is complete when all issued external requests have received their data
+ assign fill_rvd_done[fb] = (fill_ext_done_q[fb] & ~fill_ext_hold_q[fb]) &
+ (fill_rvd_cnt_q[fb] == fill_ext_cnt_q[fb]);
+
+ //////////////////////////////////////
+ // Fill buffer data output tracking //
+ //////////////////////////////////////
+
+ // Send data to the IF stage for requests that are not stale, have not completed their
+ // data output, and have data available to send.
+ // Data is available if:
+ // - The request hit in the cache
+ // - Buffered data is available (fill_rvd_cnt_q is ahead of fill_out_cnt_q)
+ // - Data is available from the bus this cycle (fill_rvd_arb)
+ assign fill_out_req[fb] = fill_busy_q[fb] & ~fill_stale_q[fb] & ~fill_out_done[fb] &
+ (fill_hit_ic1[fb] | fill_hit_q[fb] |
+ (fill_rvd_beat[fb] > fill_out_cnt_q[fb]) | fill_rvd_arb[fb]);
+
+ // Calculate when a beat of data is output. Any ECC error squashes the output that cycle.
+ assign fill_out_grant[fb] = fill_out_arb[fb] & output_ready;
+
+ // Count the beats of data output to the IF stage
+ assign fill_out_cnt_d[fb] = fill_alloc[fb] ? {1'b0,lookup_addr_ic0[IC_LINE_W-1:BUS_W]} :
+ (fill_out_cnt_q[fb] +
+ {{IC_LINE_BEATS_W{1'b0}},fill_out_grant[fb]});
+ // Data output complete when the counter fills
+ assign fill_out_done[fb] = fill_out_cnt_q[fb][IC_LINE_BEATS_W];
+
+ //////////////////////////////////////
+ // Fill buffer ram request tracking //
+ //////////////////////////////////////
+
+ // make a fill request once all data beats received
+ assign fill_ram_req[fb] = fill_busy_q[fb] & fill_rvd_cnt_q[fb][IC_LINE_BEATS_W] &
+ // unless the request hit, was non-allocating or got an error
+ ~fill_hit_q[fb] & fill_cache_q[fb] & ~|fill_err_q[fb] &
+ // or the request was already completed
+ ~fill_ram_done_q[fb];
+
+ // Record when a cache allocation request has been completed
+ assign fill_ram_done_d[fb] = fill_ram_arb[fb] | (fill_ram_done_q[fb] & fill_busy_q[fb]);
+
+ //////////////////////////////
+ // Fill buffer line offsets //
+ //////////////////////////////
+
+ // When we branch into the middle of a line, the output count will not start from zero. This
+ // beat count is used to know which incoming rdata beats are relevant.
+ assign fill_ext_beat[fb] = {1'b0,fill_addr_q[fb][IC_LINE_W-1:BUS_W]} +
+ fill_ext_cnt_q[fb][IC_LINE_BEATS_W:0];
+ assign fill_ext_off[fb] = fill_ext_beat[fb][IC_LINE_BEATS_W-1:0];
+ assign fill_rvd_beat[fb] = {1'b0,fill_addr_q[fb][IC_LINE_W-1:BUS_W]} +
+ fill_rvd_cnt_q[fb][IC_LINE_BEATS_W:0];
+ assign fill_rvd_off[fb] = fill_rvd_beat[fb][IC_LINE_BEATS_W-1:0];
+
+ /////////////////////////////
+ // Fill buffer arbitration //
+ /////////////////////////////
+
+ // Age based arbitration - all these signals are one-hot
+ assign fill_ext_arb[fb] = fill_ext_req[fb] & ~|(fill_ext_req & fill_older_q[fb]);
+ assign fill_ram_arb[fb] = fill_ram_req[fb] & fill_grant_ic0 &
+ ~|(fill_ram_req & fill_older_q[fb]);
+ // Calculate which fill buffer is the oldest one which still needs to output data to IF
+ assign fill_data_sel[fb] = ~|(fill_busy_q & ~fill_out_done & ~fill_stale_q &
+ fill_older_q[fb]);
+ // Arbitrate the request which has data available to send, and is the oldest outstanding
+ assign fill_out_arb[fb] = fill_out_req[fb] & fill_data_sel[fb];
+ // Assign incoming rvalid data to the oldest fill buffer expecting it
+ assign fill_rvd_arb[fb] = instr_rvalid_i & fill_rvd_exp[fb] &
+ ~|(fill_rvd_exp & fill_older_q[fb]);
+
+ /////////////////////////////
+ // Fill buffer data muxing //
+ /////////////////////////////
+
+ // Output data muxing controls
+ // 1. Select data from the fill buffer data register
+ assign fill_data_reg[fb] = fill_busy_q[fb] & ~fill_stale_q[fb] &
+ ~fill_out_done[fb] & fill_data_sel[fb] &
+ // The incoming data is already ahead of the output count
+ ((fill_rvd_beat[fb] > fill_out_cnt_q[fb]) | fill_hit_q[fb] |
+ (|fill_err_q[fb]));
+ // 2. Select IC1 hit data
+ assign fill_data_hit[fb] = fill_busy_q[fb] & fill_hit_ic1[fb] & fill_data_sel[fb];
+ // 3. Select incoming instr_rdata_i
+ assign fill_data_rvd[fb] = fill_busy_q[fb] & fill_rvd_arb[fb] & ~fill_hit_q[fb] &
+ ~fill_hit_ic1[fb] & ~fill_stale_q[fb] & ~fill_out_done[fb] &
+ // The incoming data lines up with the output count
+ (fill_rvd_beat[fb] == fill_out_cnt_q[fb]) & fill_data_sel[fb];
+
+
+ ///////////////////////////
+ // Fill buffer registers //
+ ///////////////////////////
+
+ // Fill buffer general enable
+ assign fill_entry_en[fb] = fill_alloc[fb] | fill_busy_q[fb];
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ fill_busy_q[fb] <= 1'b0;
+ fill_older_q[fb] <= '0;
+ fill_stale_q[fb] <= 1'b0;
+ fill_cache_q[fb] <= 1'b0;
+ fill_hit_q[fb] <= 1'b0;
+ fill_ext_cnt_q[fb] <= '0;
+ fill_ext_hold_q[fb] <= 1'b0;
+ fill_ext_done_q[fb] <= 1'b0;
+ fill_rvd_cnt_q[fb] <= '0;
+ fill_ram_done_q[fb] <= 1'b0;
+ fill_out_cnt_q[fb] <= '0;
+ end else if (fill_entry_en[fb]) begin
+ fill_busy_q[fb] <= fill_busy_d[fb];
+ fill_older_q[fb] <= fill_older_d[fb];
+ fill_stale_q[fb] <= fill_stale_d[fb];
+ fill_cache_q[fb] <= fill_cache_d[fb];
+ fill_hit_q[fb] <= fill_hit_d[fb];
+ fill_ext_cnt_q[fb] <= fill_ext_cnt_d[fb];
+ fill_ext_hold_q[fb] <= fill_ext_hold_d[fb];
+ fill_ext_done_q[fb] <= fill_ext_done_d[fb];
+ fill_rvd_cnt_q[fb] <= fill_rvd_cnt_d[fb];
+ fill_ram_done_q[fb] <= fill_ram_done_d[fb];
+ fill_out_cnt_q[fb] <= fill_out_cnt_d[fb];
+ end
+ end
+
+ ////////////////////////////////////////
+ // Fill buffer address / data storage //
+ ////////////////////////////////////////
+
+ assign fill_addr_en[fb] = fill_alloc[fb];
+ assign fill_way_en[fb] = (lookup_valid_ic1 & fill_in_ic1[fb]);
+
+ if (ResetAll) begin : g_fill_addr_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ fill_addr_q[fb] <= '0;
+ end else if (fill_addr_en[fb]) begin
+ fill_addr_q[fb] <= lookup_addr_ic0;
+ end
+ end
+ end else begin : g_fill_addr_nr
+ always_ff @(posedge clk_i) begin
+ if (fill_addr_en[fb]) begin
+ fill_addr_q[fb] <= lookup_addr_ic0;
+ end
+ end
+ end
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ fill_way_q[fb] <= '0;
+ end else if (fill_way_en[fb]) begin
+ fill_way_q[fb] <= sel_way_ic1;
+ end
+ end
+
+ // Data either comes from the cache or the bus. If there was an ECC error, we must take
+ // the incoming bus data since the cache hit data is corrupted.
+ assign fill_data_d[fb] = fill_hit_ic1[fb] ? hit_data_ic1 :
+ {IC_LINE_BEATS{instr_rdata_i}};
+
+ for (genvar b = 0; b < IC_LINE_BEATS; b++) begin : gen_data_buf
+ // Error tracking (per beat)
+ assign fill_err_d[fb][b] = (fill_rvd_arb[fb] & instr_err_i &
+ (fill_rvd_off[fb] == b[IC_LINE_BEATS_W-1:0])) |
+ // Hold the error once recorded
+ (fill_busy_q[fb] & fill_err_q[fb][b]);
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ fill_err_q[fb][b] <= '0;
+ end else if (fill_entry_en[fb]) begin
+ fill_err_q[fb][b] <= fill_err_d[fb][b];
+ end
+ end
+
+ // Enable the relevant part of the data register (or all for cache hits)
+ // Ignore incoming rvalid data when we already have cache hit data
+ assign fill_data_en[fb][b] = fill_hit_ic1[fb] |
+ (fill_rvd_arb[fb] & ~fill_hit_q[fb] &
+ (fill_rvd_off[fb] == b[IC_LINE_BEATS_W-1:0]));
+
+ if (ResetAll) begin : g_fill_data_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ fill_data_q[fb][b*BUS_SIZE+:BUS_SIZE] <= '0;
+ end else if (fill_data_en[fb][b]) begin
+ fill_data_q[fb][b*BUS_SIZE+:BUS_SIZE] <= fill_data_d[fb][b*BUS_SIZE+:BUS_SIZE];
+ end
+ end
+ end else begin : g_fill_data_nr
+ always_ff @(posedge clk_i) begin
+ if (fill_data_en[fb][b]) begin
+ fill_data_q[fb][b*BUS_SIZE+:BUS_SIZE] <= fill_data_d[fb][b*BUS_SIZE+:BUS_SIZE];
+ end
+ end
+ end
+
+ end
+ end
+
+ ////////////////////////////////
+ // Fill buffer one-hot muxing //
+ ////////////////////////////////
+
+ // External req info
+ always_comb begin
+ fill_ext_req_addr = '0;
+ for (int i = 0; i < NUM_FB; i++) begin
+ if (fill_ext_arb[i]) begin
+ fill_ext_req_addr |= {fill_addr_q[i][ADDR_W-1:IC_LINE_W], fill_ext_off[i]};
+ end
+ end
+ end
+
+ // Cache req info
+ always_comb begin
+ fill_ram_req_addr = '0;
+ fill_ram_req_way = '0;
+ fill_ram_req_data = '0;
+ for (int i = 0; i < NUM_FB; i++) begin
+ if (fill_ram_arb[i]) begin
+ fill_ram_req_addr |= fill_addr_q[i];
+ fill_ram_req_way |= fill_way_q[i];
+ fill_ram_req_data |= fill_data_q[i];
+ end
+ end
+ end
+
+ // IF stage output data
+ always_comb begin
+ fill_out_data = '0;
+ fill_out_err = '0;
+ for (int i = 0; i < NUM_FB; i++) begin
+ if (fill_data_reg[i]) begin
+ fill_out_data |= fill_data_q[i];
+ // Ignore any speculative errors accumulated on cache hits
+ fill_out_err |= (fill_err_q[i] & ~{IC_LINE_BEATS{fill_hit_q[i]}});
+ end
+ end
+ end
+
+ ///////////////////////
+ // External requests //
+ ///////////////////////
+
+ assign instr_req = ((~icache_enable_i | branch_or_mispredict) & lookup_grant_ic0) |
+ (|fill_ext_req);
+
+ assign instr_addr = |fill_ext_req ? fill_ext_req_addr :
+ lookup_addr_ic0[ADDR_W-1:BUS_W];
+
+ assign instr_req_o = instr_req;
+ assign instr_addr_o = {instr_addr[ADDR_W-1:BUS_W],{BUS_W{1'b0}}};
+
+ ////////////////////////
+ // Output data muxing //
+ ////////////////////////
+
+ // Mux between line-width data sources
+ assign line_data = |fill_data_hit ? hit_data_ic1 : fill_out_data;
+ assign line_err = |fill_data_hit ? {IC_LINE_BEATS{1'b0}} : fill_out_err;
+
+ // Mux the relevant beat of line data, based on the output address
+ always_comb begin
+ line_data_muxed = '0;
+ line_err_muxed = 1'b0;
+ for (int unsigned i = 0; i < IC_LINE_BEATS; i++) begin
+ // When data has been skidded, the output address is behind by one
+ if ((output_addr_q[IC_LINE_W-1:BUS_W] + {{IC_LINE_BEATS_W-1{1'b0}},skid_valid_q}) ==
+ i[IC_LINE_BEATS_W-1:0]) begin
+ line_data_muxed |= line_data[i*32+:32];
+ line_err_muxed |= line_err[i];
+ end
+ end
+ end
+
+ // Mux between incoming rdata and the muxed line data
+ assign output_data = |fill_data_rvd ? instr_rdata_i : line_data_muxed;
+ assign output_err = |fill_data_rvd ? instr_err_i : line_err_muxed;
+
+ // Output data is valid (from any of the three possible sources). Note that fill_out_arb
+ // must be used here rather than fill_out_req because data can become valid out of order
+ // (e.g. cache hit data can become available ahead of an older outstanding miss).
+ assign data_valid = |fill_out_arb;
+
+ // Skid buffer data
+ assign skid_data_d = output_data[31:16];
+
+ assign skid_en = data_valid & (ready_i | skid_ready);
+
+ if (ResetAll) begin : g_skid_data_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ skid_data_q <= '0;
+ skid_err_q <= '0;
+ end else if (skid_en) begin
+ skid_data_q <= skid_data_d;
+ skid_err_q <= output_err;
+ end
+ end
+ end else begin : g_skid_data_nr
+ always_ff @(posedge clk_i) begin
+ if (skid_en) begin
+ skid_data_q <= skid_data_d;
+ skid_err_q <= output_err;
+ end
+ end
+ end
+
+ // The data in the skid buffer is ready if it's a complete compressed instruction or if there's
+ // an error (no need to wait for the second half)
+ assign skid_complete_instr = skid_valid_q & ((skid_data_q[1:0] != 2'b11) | skid_err_q);
+
+ // Data can be loaded into the skid buffer for an unaligned uncompressed instruction
+ assign skid_ready = output_addr_q[1] & ~skid_valid_q & (~output_compressed | output_err);
+
+ assign output_ready = (ready_i | skid_ready) & ~skid_complete_instr;
+
+ assign output_compressed = (rdata_o[1:0] != 2'b11);
+
+ assign skid_valid_d =
+ // Branches invalidate the skid buffer
+ branch_or_mispredict ? 1'b0 :
+ // Once valid, the skid buffer stays valid until a compressed instruction realigns the stream
+ (skid_valid_q ? ~(ready_i & ((skid_data_q[1:0] != 2'b11) | skid_err_q)) :
+ // The skid buffer becomes valid when:
+ // - we branch to an unaligned uncompressed instruction
+ (data_valid &
+ (((output_addr_q[1] & (~output_compressed | output_err)) |
+ // - a compressed instruction misaligns the stream
+ (~output_addr_q[1] & output_compressed & ~output_err & ready_i)))));
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ skid_valid_q <= 1'b0;
+ end else begin
+ skid_valid_q <= skid_valid_d;
+ end
+ end
+
+ // Signal that valid data is available to the IF stage
+ // Note that if the first half of an unaligned instruction reports an error, we do not need
+ // to wait for the second half
+ // Compressed instruction completely satisfied by skid buffer
+ assign output_valid = skid_complete_instr |
+ // Output data available and, output stream aligned, or skid data available,
+ (data_valid & (~output_addr_q[1] | skid_valid_q |
+ // or this is an error or an unaligned compressed instruction
+ output_err | (output_data[17:16] != 2'b11)));
+
+ // Update the address on branches and every time an instruction is driven
+ assign output_addr_en = branch_or_mispredict | (ready_i & valid_o);
+
+ // Increment the address by two every time a compressed instruction is popped
+ assign addr_incr_two = output_compressed & ~err_o;
+
+ // Next IF stage PC
+ assign output_addr_incr = (output_addr_q[31:1] +
+ // Increment address by 4 or 2
+ {29'd0, ~addr_incr_two, addr_incr_two});
+
+ // Redirect the address on branches or mispredicts
+ assign output_addr_d = branch_i ? addr_i[31:1] :
+ branch_mispredict_i ? mispredict_addr_i[31:1] :
+ output_addr_incr;
+
+ if (ResetAll) begin : g_output_addr_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ output_addr_q <= '0;
+ end else if (output_addr_en) begin
+ output_addr_q <= output_addr_d;
+ end
+ end
+ end else begin : g_output_addr_nr
+ always_ff @(posedge clk_i) begin
+ if (output_addr_en) begin
+ output_addr_q <= output_addr_d;
+ end
+ end
+ end
+
+ // Mux the data from BUS_SIZE to halfword
+ // This muxing realigns data when instruction words are split across BUS_W e.g.
+ // word 1 |----|*h1*|
+ // word 0 |*h0*|----| --> |*h1*|*h0*|
+ // 31 15 0 31 15 0
+ always_comb begin
+ output_data_lo = '0;
+ for (int unsigned i = 0; i < IC_OUTPUT_BEATS; i++) begin
+ if (output_addr_q[BUS_W-1:1] == i[BUS_W-2:0]) begin
+ output_data_lo |= output_data[i*16+:16];
+ end
+ end
+ end
+
+ always_comb begin
+ output_data_hi = '0;
+ for (int unsigned i = 0; i < IC_OUTPUT_BEATS - 1; i++) begin
+ if (output_addr_q[BUS_W-1:1] == i[BUS_W-2:0]) begin
+ output_data_hi |= output_data[(i+1)*16+:16];
+ end
+ end
+ if (&output_addr_q[BUS_W-1:1]) begin
+ output_data_hi |= output_data[15:0];
+ end
+ end
+
+ assign valid_o = output_valid & ~branch_mispredict_i;
+ assign rdata_o = {output_data_hi, (skid_valid_q ? skid_data_q : output_data_lo)};
+ assign addr_o = {output_addr_q, 1'b0};
+ assign err_o = (skid_valid_q & skid_err_q) | (~skid_complete_instr & output_err);
+ // Error caused by the second half of a misaligned uncompressed instruction
+ // (only relevant when err_o is set)
+ assign err_plus2_o = skid_valid_q & ~skid_err_q;
+
+ ///////////////////
+ // Invalidations //
+ ///////////////////
+
+
+ // We need to save the invalidation request inside a register. That way we can wait
+ // until we have a valid scrambling key to do it. Since the key itself is needed for
+ // starting to fill in the RAMs and read from them, ICache also needs to stop operating.
+ assign inval_req_d = (inval_req_q | icache_inval_i) & ~(inval_done & inval_prog_q);
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ inval_req_q <= 1'b0;
+ end else begin
+ inval_req_q <= inval_req_d;
+ end
+ end
+
+ // This will act like a lock mechanism.
+ // Main idea is to lock the invalidation request until we got a valid scrambling key.
+ assign inval_lock = inval_req_d & ~ic_scr_key_valid_i;
+
+ // Invalidate on reset, or when instructed. If an invalidation request is received while a
+ // previous invalidation is ongoing, it does not need to be restarted. Do not start
+ // this process until inval lock is removed meaning the scrambling key is valid.
+ assign start_inval = ~inval_lock & (~reset_inval_q | inval_req_q) & ~inval_prog_q ;
+ assign inval_prog_d = ~inval_lock & (start_inval | (inval_prog_q & ~inval_done));
+ assign inval_done = &inval_index_q;
+ assign inval_index_d = start_inval ? '0 : (inval_index_q + {{IC_INDEX_W-1{1'b0}},1'b1});
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ inval_prog_q <= 1'b0;
+ reset_inval_q <= 1'b0;
+ end else begin
+ inval_prog_q <= inval_prog_d;
+ reset_inval_q <= 1'b1;
+ end
+ end
+
+ if (ResetAll) begin : g_inval_index_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ inval_index_q <= '0;
+ end else if (inval_prog_d) begin
+ inval_index_q <= inval_index_d;
+ end
+ end
+ end else begin : g_inval_index_nr
+ always_ff @(posedge clk_i) begin
+ if (inval_prog_d) begin
+ inval_index_q <= inval_index_d;
+ end
+ end
+ end
+
+ /////////////////
+ // Busy status //
+ /////////////////
+
+ // Only busy (for WFI purposes) while an invalidation is in-progress, or external requests are
+ // outstanding.
+ assign busy_o = inval_req_q | (|(fill_busy_q & ~fill_rvd_done));
+
+ ////////////////
+ // Assertions //
+ ////////////////
+
+ `ASSERT_INIT(size_param_legal, (IC_LINE_SIZE > 32))
+
+ // ECC primitives will need to be changed for different sizes
+ `ASSERT_INIT(ecc_tag_param_legal, (IC_TAG_SIZE <= 27))
+ `ASSERT_INIT(ecc_data_param_legal, !ICacheECC || (BUS_SIZE == 32))
+
+ // Lookups in the tag ram should always give a known result
+ `ASSERT_KNOWN(TagHitKnown, lookup_valid_ic1 & tag_hit_ic1)
+ `ASSERT_KNOWN(TagInvalidKnown, lookup_valid_ic1 & tag_invalid_ic1)
+
+ // This is only used for the Yosys-based formal flow. Once we have working bind support, we can
+ // get rid of it.
+`ifdef FORMAL
+ `ifdef YOSYS
+ // Unfortunately, Yosys doesn't support passing unpacked arrays as ports. Explicitly pack up the
+ // signals we need.
+ logic [NUM_FB-1:0][ADDR_W-1:0] packed_fill_addr_q;
+ always_comb begin
+ for (int i = 0; i < NUM_FB; i++) begin
+ packed_fill_addr_q[i][ADDR_W-1:0] = fill_addr_q[i];
+ end
+ end
+
+ `include "formal_tb_frag.svh"
+ `endif
+`endif
+
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_id_stage.sv b/hw/ip/cheriot-ibex/rtl/cheriot_id_stage.sv
new file mode 100644
index 0000000..8305792
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_id_stage.sv
@@ -0,0 +1,1270 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+`ifdef RISCV_FORMAL
+ `define RVFI
+`endif
+
+/**
+ * Instruction Decode Stage
+ *
+ * Decode stage of the core. It decodes the instructions and hosts the register
+ * file.
+ */
+
+`include "prim_assert.sv"
+`include "dv_fcov_macros.svh"
+
+module cheriot_id_stage import cheri_pkg::*; #(
+ parameter bit RV32E = 0,
+ parameter cheriot_pkg::rv32m_e RV32M = cheriot_pkg::RV32MFast,
+ parameter cheriot_pkg::rv32b_e RV32B = cheriot_pkg::RV32BNone,
+ parameter bit DataIndTiming = 1'b0,
+ parameter bit BranchTargetALU = 0,
+ parameter bit WritebackStage = 0,
+ parameter bit BranchPredictor = 0,
+ parameter bit CHERIoTEn = 1'b1,
+ parameter bit CheriPPLBC = 1'b1,
+ parameter bit CheriSBND2 = 1'b0
+) (
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic cheri_pmode_i,
+ input logic cheri_tsafe_en_i,
+ output logic ctrl_busy_o,
+ output logic illegal_insn_o,
+
+ // Interface to IF stage
+ input logic instr_valid_i,
+ input logic [31:0] instr_rdata_i, // from IF-ID pipeline registers
+ input logic [31:0] instr_rdata_alu_i, // from IF-ID pipeline registers
+ input logic [15:0] instr_rdata_c_i, // from IF-ID pipeline registers
+ input logic instr_is_compressed_i,
+ input logic instr_bp_taken_i,
+ output logic instr_req_o,
+ output logic instr_first_cycle_id_o,
+ output logic instr_valid_clear_o, // kill instr in IF-ID reg
+ output logic id_in_ready_o, // ID stage is ready for next instr
+ output logic icache_inval_o,
+
+ // Jumps and branches
+ input logic branch_decision_i,
+
+ // IF and ID stage signals
+ output logic pc_set_o,
+ output cheriot_pkg::pc_sel_e pc_mux_o,
+ output logic nt_branch_mispredict_o,
+ output logic [31:0] nt_branch_addr_o,
+ output cheriot_pkg::exc_pc_sel_e exc_pc_mux_o,
+ output cheriot_pkg::exc_cause_e exc_cause_o,
+
+ input logic illegal_c_insn_i,
+ input logic instr_fetch_err_i,
+ input logic instr_fetch_err_plus2_i,
+ input logic instr_fetch_cheri_acc_vio_i,
+ input logic instr_fetch_cheri_bound_vio_i,
+
+ input logic [31:0] pc_id_i,
+
+ // Stalls
+ input logic ex_valid_i, // EX stage has valid output
+ input logic lsu_resp_valid_i, // LSU has valid output, or is done
+ // ALU
+ output cheriot_pkg::alu_op_e alu_operator_ex_o,
+ output logic [31:0] alu_operand_a_ex_o,
+ output logic [31:0] alu_operand_b_ex_o,
+
+ // Multicycle Operation Stage Register
+ input logic [1:0] imd_val_we_ex_i,
+ input logic [33:0] imd_val_d_ex_i[2],
+ output logic [33:0] imd_val_q_ex_o[2],
+
+ // Branch target ALU
+ output logic [31:0] bt_a_operand_o,
+ output logic [31:0] bt_b_operand_o,
+
+ // MUL, DIV
+ output logic mult_en_ex_o,
+ output logic div_en_ex_o,
+ output logic mult_sel_ex_o,
+ output logic div_sel_ex_o,
+ output cheriot_pkg::md_op_e multdiv_operator_ex_o,
+ output logic [1:0] multdiv_signed_mode_ex_o,
+ output logic [31:0] multdiv_operand_a_ex_o,
+ output logic [31:0] multdiv_operand_b_ex_o,
+ output logic multdiv_ready_id_o,
+
+ // CSR
+ output logic csr_access_o,
+ output cheriot_pkg::csr_op_e csr_op_o,
+ output logic csr_op_en_o,
+ output logic csr_save_if_o,
+ output logic csr_save_id_o,
+ output logic csr_save_wb_o,
+ output logic csr_restore_mret_id_o,
+ output logic csr_restore_dret_id_o,
+ output logic csr_save_cause_o,
+ output logic csr_mepcc_clrtag_o,
+ output logic [31:0] csr_mtval_o,
+ input cheriot_pkg::priv_lvl_e priv_mode_i,
+ input logic csr_mstatus_tw_i,
+ input logic illegal_csr_insn_i,
+ input logic data_ind_timing_i,
+ input logic csr_pcc_perm_sr_i,
+
+ // Interface to load store unit
+ output logic lsu_req_o,
+ output logic lsu_we_o,
+ output logic [1:0] lsu_type_o,
+ output logic lsu_sign_ext_o,
+ output logic [31:0] lsu_wdata_o,
+
+ input logic lsu_req_done_i, // Data req to LSU is complete and
+ // instruction can move to writeback
+ // (only relevant where writeback stage is
+ // present)
+
+ input logic lsu_addr_incr_req_i,
+ input logic [31:0] lsu_addr_last_i,
+
+ // Interrupt signals
+ input logic csr_mstatus_mie_i,
+ input logic irq_pending_i,
+ input cheriot_pkg::irqs_t irqs_i,
+ input logic irq_nm_i,
+ output logic nmi_mode_o,
+
+ input logic lsu_load_err_i,
+ input logic lsu_store_err_i,
+ input logic lsu_err_is_cheri_i,
+
+ // Debug Signal
+ output logic debug_mode_o,
+ output cheriot_pkg::dbg_cause_e debug_cause_o,
+ output logic debug_csr_save_o,
+ input logic debug_req_i,
+ input logic debug_single_step_i,
+ input logic debug_ebreakm_i,
+ input logic debug_ebreaku_i,
+ input logic trigger_match_i,
+
+ // Write back signal
+ input logic [31:0] result_ex_i,
+ input logic [31:0] csr_rdata_i,
+
+ // Register file read
+ output logic [4:0] rf_raddr_a_o,
+ input logic [31:0] rf_rdata_a_i,
+ output logic [4:0] rf_raddr_b_o,
+ input logic [31:0] rf_rdata_b_i,
+ output logic rf_ren_a_o,
+ output logic rf_ren_b_o,
+
+ // Register file write (via writeback)
+ output logic [4:0] rf_waddr_id_o,
+ output logic [31:0] rf_wdata_id_o,
+ output logic rf_we_id_o,
+ output logic rf_rd_a_wb_match_o,
+ output logic rf_rd_b_wb_match_o,
+ input logic [31:0] rf_reg_rdy_i,
+
+ // Register write information from writeback (for resolving data hazards)
+ input logic [4:0] rf_waddr_wb_i,
+ input logic [31:0] rf_wdata_fwd_wb_i,
+ input logic rf_write_wb_i,
+
+ output logic en_wb_o,
+ output cheriot_pkg::wb_instr_type_e instr_type_wb_o,
+ output logic instr_perf_count_id_o,
+ input logic ready_wb_i,
+ input logic outstanding_load_wb_i,
+ input logic outstanding_store_wb_i,
+
+ // Performance Counters
+ output logic perf_jump_o, // executing a jump instr
+ output logic perf_branch_o, // executing a branch instr
+ output logic perf_tbranch_o, // executing a taken branch instr
+ output logic perf_dside_wait_o, // instruction in ID/EX is awaiting memory
+ // access to finish before proceeding
+ output logic perf_mul_wait_o,
+ output logic perf_div_wait_o,
+ output logic instr_id_done_o,
+
+ // cheri signals
+ output logic cheri_exec_id_o,
+ output logic instr_is_cheri_id_o,
+ output logic instr_is_rv32lsu_id_o,
+ output logic [11:0] cheri_imm12_o,
+ output logic [19:0] cheri_imm20_o,
+ output logic [20:0] cheri_imm21_o,
+ output logic [OPDW-1:0] cheri_operator_o,
+ output logic [4:0] cheri_cs2_dec_o,
+ output logic cheri_load_o,
+ output logic cheri_store_o,
+
+ input logic cheri_ex_valid_i,
+ input logic cheri_ex_err_i,
+ input logic [11:0] cheri_ex_err_info_i,
+ input logic cheri_wb_err_i,
+ input logic [15:0] cheri_wb_err_info_i,
+ input logic cheri_branch_req_i, // from cheri EX
+ input logic [31:0] cheri_branch_target_i
+);
+
+ import cheriot_pkg::*;
+
+ // Decoder/Controller, ID stage internal signals
+ logic illegal_insn_dec;
+ logic ebrk_insn;
+ logic mret_insn_dec;
+ logic dret_insn_dec;
+ logic ecall_insn_dec;
+ logic wfi_insn_dec;
+
+ logic wb_exception;
+ logic unused_id_exception;
+ logic id_exception_nc;
+
+ logic branch_in_dec;
+ logic branch_set, branch_set_raw, branch_set_raw_d;
+ logic branch_jump_set_done_q, branch_jump_set_done_d;
+ logic branch_not_set;
+ logic branch_taken;
+ logic jump_in_dec;
+ logic jump_set_dec;
+ logic jump_set, jump_set_raw;
+
+ logic instr_first_cycle;
+ logic instr_executing_spec;
+ logic instr_executing;
+ logic instr_done;
+ logic controller_run;
+ logic stall_ld_hz;
+ logic stall_mem;
+ logic stall_multdiv;
+ logic stall_branch;
+ logic stall_jump;
+ logic stall_id;
+ logic stall_wb;
+ logic stall_cheri;
+ logic flush_id;
+ logic multicycle_done;
+
+ // Immediate decoding and sign extension
+ logic [31:0] imm_i_type;
+ logic [31:0] imm_s_type;
+ logic [31:0] imm_b_type;
+ logic [31:0] imm_u_type;
+ logic [31:0] imm_j_type;
+ logic [31:0] zimm_rs1_type;
+
+ logic [31:0] imm_a; // contains the immediate for operand b
+ logic [31:0] imm_b; // contains the immediate for operand b
+
+ // Register file interface
+
+ rf_wd_sel_e rf_wdata_sel;
+ logic rf_we_dec, rf_we_raw;
+ logic rf_ren_a, rf_ren_b;
+ logic rf_ren_a_dec, rf_ren_b_dec;
+ logic rf_we_or_load;
+
+ // Read enables should only be asserted for valid and legal instructions
+ assign rf_ren_a = instr_valid_i & ~instr_fetch_err_i & ~illegal_insn_o & rf_ren_a_dec;
+ assign rf_ren_b = instr_valid_i & ~instr_fetch_err_i & ~illegal_insn_o & rf_ren_b_dec;
+
+ assign rf_ren_a_o = rf_ren_a;
+ assign rf_ren_b_o = rf_ren_b;
+
+ logic [31:0] rf_rdata_a_fwd;
+ logic [31:0] rf_rdata_b_fwd;
+
+ logic cheri_lsu_req_dec;
+ logic cheri_multicycle_dec;
+ logic ex_valid_all;
+
+ // ALU Control
+ alu_op_e alu_operator;
+ op_a_sel_e alu_op_a_mux_sel, alu_op_a_mux_sel_dec;
+ op_b_sel_e alu_op_b_mux_sel, alu_op_b_mux_sel_dec;
+ logic alu_multicycle_dec;
+ logic stall_alu;
+
+ logic [33:0] imd_val_q[2];
+
+ op_a_sel_e bt_a_mux_sel;
+ imm_b_sel_e bt_b_mux_sel;
+
+ imm_a_sel_e imm_a_mux_sel;
+ imm_b_sel_e imm_b_mux_sel, imm_b_mux_sel_dec;
+
+ // Multiplier Control
+ logic mult_en_id, mult_en_dec; // use integer multiplier
+ logic div_en_id, div_en_dec; // use integer division or reminder
+ logic multdiv_en_dec;
+ md_op_e multdiv_operator;
+ logic [1:0] multdiv_signed_mode;
+
+ // Data Memory Control
+ logic lsu_we;
+ logic [1:0] lsu_type;
+ logic lsu_sign_ext;
+ logic lsu_req, lsu_req_dec;
+ logic data_req_allowed;
+
+ // CSR control
+ logic csr_pipe_flush;
+ logic csr_cheri_always_ok;
+
+ logic [31:0] alu_operand_a;
+ logic [31:0] alu_operand_b;
+
+ logic stall_cheri_trvk;
+ logic instr_is_legal_cheri;
+
+ /////////////
+ // LSU Mux //
+ /////////////
+
+ // Misaligned loads/stores result in two aligned loads/stores, compute second address
+ assign alu_op_a_mux_sel = lsu_addr_incr_req_i ? OP_A_FWD : alu_op_a_mux_sel_dec;
+ assign alu_op_b_mux_sel = lsu_addr_incr_req_i ? OP_B_IMM : alu_op_b_mux_sel_dec;
+ assign imm_b_mux_sel = lsu_addr_incr_req_i ? IMM_B_INCR_ADDR : imm_b_mux_sel_dec;
+
+ ///////////////////
+ // Operand MUXES //
+ ///////////////////
+
+ // Main ALU immediate MUX for Operand A
+ assign imm_a = (imm_a_mux_sel == IMM_A_Z) ? zimm_rs1_type : '0;
+
+ // Main ALU MUX for Operand A
+ always_comb begin : alu_operand_a_mux
+ unique case (alu_op_a_mux_sel)
+ OP_A_REG_A: alu_operand_a = rf_rdata_a_fwd;
+ OP_A_FWD: alu_operand_a = lsu_addr_last_i;
+ OP_A_CURRPC: alu_operand_a = pc_id_i;
+ OP_A_IMM: alu_operand_a = imm_a;
+ default: alu_operand_a = pc_id_i;
+ endcase
+ end
+
+ if (BranchTargetALU) begin : g_btalu_muxes
+ // Branch target ALU operand A mux
+ always_comb begin : bt_operand_a_mux
+ unique case (bt_a_mux_sel)
+ OP_A_REG_A: bt_a_operand_o = rf_rdata_a_fwd;
+ OP_A_CURRPC: bt_a_operand_o = pc_id_i;
+ default: bt_a_operand_o = pc_id_i;
+ endcase
+ end
+
+ // Branch target ALU operand B mux
+ always_comb begin : bt_immediate_b_mux
+ unique case (bt_b_mux_sel)
+ IMM_B_I: bt_b_operand_o = imm_i_type;
+ IMM_B_B: bt_b_operand_o = imm_b_type;
+ IMM_B_J: bt_b_operand_o = imm_j_type;
+ IMM_B_INCR_PC: bt_b_operand_o = instr_is_compressed_i ? 32'h2 : 32'h4;
+ default: bt_b_operand_o = instr_is_compressed_i ? 32'h2 : 32'h4;
+ endcase
+ end
+
+ // Reduced main ALU immediate MUX for Operand B
+ always_comb begin : immediate_b_mux
+ unique case (imm_b_mux_sel)
+ IMM_B_I: imm_b = imm_i_type;
+ IMM_B_S: imm_b = imm_s_type;
+ IMM_B_U: imm_b = imm_u_type;
+ IMM_B_INCR_PC: imm_b = instr_is_compressed_i ? 32'h2 : 32'h4;
+ IMM_B_INCR_ADDR: imm_b = 32'h4;
+ default: imm_b = 32'h4;
+ endcase
+ end
+ `ASSERT(IbexImmBMuxSelValid, instr_valid_i |-> imm_b_mux_sel inside {
+ IMM_B_I,
+ IMM_B_S,
+ IMM_B_U,
+ IMM_B_INCR_PC,
+ IMM_B_INCR_ADDR})
+ end else begin : g_nobtalu
+ op_a_sel_e unused_a_mux_sel;
+ imm_b_sel_e unused_b_mux_sel;
+
+ assign unused_a_mux_sel = bt_a_mux_sel;
+ assign unused_b_mux_sel = bt_b_mux_sel;
+ assign bt_a_operand_o = '0;
+ assign bt_b_operand_o = '0;
+
+ // Full main ALU immediate MUX for Operand B
+ always_comb begin : immediate_b_mux
+ unique case (imm_b_mux_sel)
+ IMM_B_I: imm_b = imm_i_type;
+ IMM_B_S: imm_b = imm_s_type;
+ IMM_B_B: imm_b = imm_b_type;
+ IMM_B_U: imm_b = imm_u_type;
+ IMM_B_J: imm_b = imm_j_type;
+ IMM_B_INCR_PC: imm_b = instr_is_compressed_i ? 32'h2 : 32'h4;
+ IMM_B_INCR_ADDR: imm_b = 32'h4;
+ default: imm_b = 32'h4;
+ endcase
+ end
+ `ASSERT(IbexImmBMuxSelValid, instr_valid_i |-> imm_b_mux_sel inside {
+ IMM_B_I,
+ IMM_B_S,
+ IMM_B_B,
+ IMM_B_U,
+ IMM_B_J,
+ IMM_B_INCR_PC,
+ IMM_B_INCR_ADDR})
+ end
+
+ // ALU MUX for Operand B
+ assign alu_operand_b = (alu_op_b_mux_sel == OP_B_IMM) ? imm_b : rf_rdata_b_fwd;
+
+ /////////////////////////////////////////
+ // Multicycle Operation Stage Register //
+ /////////////////////////////////////////
+
+ for (genvar i = 0; i < 2; i++) begin : gen_intermediate_val_reg
+ always_ff @(posedge clk_i or negedge rst_ni) begin : intermediate_val_reg
+ if (!rst_ni) begin
+ imd_val_q[i] <= '0;
+ end else if (imd_val_we_ex_i[i]) begin
+ imd_val_q[i] <= imd_val_d_ex_i[i];
+ end
+ end
+ end
+
+ assign imd_val_q_ex_o = imd_val_q;
+
+ ///////////////////////
+ // Register File MUX //
+ ///////////////////////
+
+ // Suppress register write if there is an illegal CSR access or instruction is not executing
+ assign rf_we_id_o = rf_we_raw & instr_executing & ~illegal_csr_insn_i;
+
+ // Register file write data mux
+ always_comb begin : rf_wdata_id_mux
+ unique case (rf_wdata_sel)
+ RF_WD_EX: rf_wdata_id_o = result_ex_i;
+ RF_WD_CSR: rf_wdata_id_o = csr_rdata_i;
+ default: rf_wdata_id_o = result_ex_i;
+ endcase
+ end
+
+ /////////////
+ // Decoder //
+ /////////////
+
+ cheriot_decoder #(
+ .RV32E (RV32E),
+ .RV32M (RV32M),
+ .RV32B (RV32B),
+ .BranchTargetALU(BranchTargetALU),
+ .CHERIoTEn (CHERIoTEn),
+ .CheriPPLBC (CheriPPLBC),
+ .CheriSBND2 (CheriSBND2)
+ ) decoder_i (
+ .clk_i (clk_i),
+ .rst_ni(rst_ni),
+
+ .cheri_pmode_i (cheri_pmode_i),
+ .cheri_tsafe_en_i (cheri_tsafe_en_i),
+ // controller
+ .illegal_insn_o(illegal_insn_dec),
+ .ebrk_insn_o (ebrk_insn),
+ .mret_insn_o (mret_insn_dec),
+ .dret_insn_o (dret_insn_dec),
+ .ecall_insn_o (ecall_insn_dec),
+ .wfi_insn_o (wfi_insn_dec),
+ .jump_set_o (jump_set_dec),
+ .branch_taken_i(branch_taken),
+ .icache_inval_o(icache_inval_o),
+
+ // from IF-ID pipeline register
+ .instr_first_cycle_i(instr_first_cycle),
+ .instr_rdata_i (instr_rdata_i),
+ .instr_rdata_alu_i (instr_rdata_alu_i),
+ .illegal_c_insn_i (illegal_c_insn_i),
+
+ // immediates
+ .imm_a_mux_sel_o(imm_a_mux_sel),
+ .imm_b_mux_sel_o(imm_b_mux_sel_dec),
+ .bt_a_mux_sel_o (bt_a_mux_sel),
+ .bt_b_mux_sel_o (bt_b_mux_sel),
+
+ .imm_i_type_o (imm_i_type),
+ .imm_s_type_o (imm_s_type),
+ .imm_b_type_o (imm_b_type),
+ .imm_u_type_o (imm_u_type),
+ .imm_j_type_o (imm_j_type),
+ .zimm_rs1_type_o(zimm_rs1_type),
+
+ // register file
+ .rf_wdata_sel_o(rf_wdata_sel),
+ .rf_we_o (rf_we_dec),
+ .rf_we_or_load_o(rf_we_or_load),
+
+ .rf_raddr_a_o(rf_raddr_a_o),
+ .rf_raddr_b_o(rf_raddr_b_o),
+ .rf_waddr_o (rf_waddr_id_o),
+ .rf_ren_a_o (rf_ren_a_dec),
+ .rf_ren_b_o (rf_ren_b_dec),
+
+ // ALU
+ .alu_operator_o (alu_operator),
+ .alu_op_a_mux_sel_o(alu_op_a_mux_sel_dec),
+ .alu_op_b_mux_sel_o(alu_op_b_mux_sel_dec),
+ .alu_multicycle_o (alu_multicycle_dec),
+
+ // MULT & DIV
+ .mult_en_o (mult_en_dec),
+ .div_en_o (div_en_dec),
+ .mult_sel_o (mult_sel_ex_o),
+ .div_sel_o (div_sel_ex_o),
+ .multdiv_operator_o (multdiv_operator),
+ .multdiv_signed_mode_o(multdiv_signed_mode),
+
+ // CSRs
+ .csr_access_o(csr_access_o),
+ .csr_op_o (csr_op_o),
+ .csr_cheri_always_ok_o (csr_cheri_always_ok),
+
+ // LSU
+ .data_req_o (lsu_req_dec),
+ .cheri_data_req_o (cheri_lsu_req_dec),
+ .data_we_o (lsu_we),
+ .data_type_o (lsu_type),
+ .data_sign_extension_o(lsu_sign_ext),
+
+ // jump/branches
+ .jump_in_dec_o (jump_in_dec),
+ .branch_in_dec_o(branch_in_dec),
+
+ // cheri signals
+ .instr_is_cheri_o (instr_is_cheri_id_o),
+ .instr_is_legal_cheri_o (instr_is_legal_cheri),
+ .cheri_imm12_o (cheri_imm12_o),
+ .cheri_imm20_o (cheri_imm20_o),
+ .cheri_imm21_o (cheri_imm21_o),
+ .cheri_operator_o (cheri_operator_o),
+ .cheri_cs2_dec_o (cheri_cs2_dec_o),
+ .cheri_multicycle_dec_o (cheri_multicycle_dec)
+ );
+
+ // assign cheri_lsu_req_dec = cheri_load_o | cheri_store_o;
+ assign instr_is_rv32lsu_id_o = lsu_req_dec; // go to cheri_ex
+
+ assign ex_valid_all = instr_is_cheri_id_o ? cheri_ex_valid_i : ex_valid_i;
+
+ // If use "internal" CLBC, execution is sequential/multicyle. Otherwise use pipelined version.
+ assign cheri_load_o = cheri_operator_o[CLOAD_CAP] & (~cheri_tsafe_en_i | CheriPPLBC);
+
+ assign cheri_store_o = cheri_operator_o[CSTORE_CAP];
+
+
+ /////////////////////////////////
+ // CSR-related pipline flushes //
+ /////////////////////////////////
+ always_comb begin : csr_pipeline_flushes
+ csr_pipe_flush = 1'b0;
+
+ // A pipeline flush is needed to let the controller react after modifying certain CSRs:
+ // - When enabling interrupts, pending IRQs become visible to the controller only during
+ // the next cycle. If during that cycle the core disables interrupts again, it does not
+ // see any pending IRQs and consequently does not start to handle interrupts.
+ // - When modifying debug CSRs - TODO: Check if this is really needed
+ if (csr_op_en_o == 1'b1 && (csr_op_o == CSR_OP_WRITE || csr_op_o == CSR_OP_SET)) begin
+ if (csr_num_e'(instr_rdata_i[31:20]) == CSR_MSTATUS ||
+ csr_num_e'(instr_rdata_i[31:20]) == CSR_MIE) begin
+ csr_pipe_flush = 1'b1;
+ end
+ end else if (csr_op_en_o == 1'b1 && csr_op_o != CSR_OP_READ) begin
+ if (csr_num_e'(instr_rdata_i[31:20]) == CSR_DCSR ||
+ csr_num_e'(instr_rdata_i[31:20]) == CSR_DPC ||
+ csr_num_e'(instr_rdata_i[31:20]) == CSR_DSCRATCH0 ||
+ csr_num_e'(instr_rdata_i[31:20]) == CSR_DSCRATCH1) begin
+ csr_pipe_flush = 1'b1;
+ end
+ end
+ end
+
+ ////////////////
+ // Controller //
+ ////////////////
+
+ assign illegal_insn_o = instr_valid_i & (illegal_insn_dec | illegal_csr_insn_i);
+
+ cheriot_controller #(
+ .CHERIoTEn (CHERIoTEn),
+ .WritebackStage (WritebackStage),
+ .BranchPredictor(BranchPredictor)
+ ) controller_i (
+ .clk_i (clk_i),
+ .rst_ni(rst_ni),
+ .cheri_pmode_i (cheri_pmode_i),
+ .ctrl_busy_o(ctrl_busy_o),
+
+ // decoder related signals
+ .illegal_insn_i (illegal_insn_o),
+ .ecall_insn_i (ecall_insn_dec),
+ .mret_insn_i (mret_insn_dec),
+ .dret_insn_i (dret_insn_dec),
+ .wfi_insn_i (wfi_insn_dec),
+ .ebrk_insn_i (ebrk_insn),
+ .csr_pipe_flush_i(csr_pipe_flush),
+ .csr_access_i (csr_access_o),
+ .csr_cheri_always_ok_i (csr_cheri_always_ok),
+
+ // from IF-ID pipeline
+ .instr_valid_i (instr_valid_i),
+ .instr_i (instr_rdata_i),
+ .instr_compressed_i (instr_rdata_c_i),
+ .instr_is_compressed_i (instr_is_compressed_i),
+ .instr_bp_taken_i (instr_bp_taken_i),
+ .instr_fetch_err_i (instr_fetch_err_i),
+ .instr_fetch_err_plus2_i(instr_fetch_err_plus2_i),
+ .instr_fetch_cheri_acc_vio_i (instr_fetch_cheri_acc_vio_i),
+ .instr_fetch_cheri_bound_vio_i (instr_fetch_cheri_bound_vio_i),
+
+ .pc_id_i (pc_id_i),
+
+ // to IF-ID pipeline
+ .instr_valid_clear_o(instr_valid_clear_o),
+ .id_in_ready_o (id_in_ready_o),
+ .controller_run_o (controller_run),
+
+ // to prefetcher
+ .instr_req_o (instr_req_o),
+ .pc_set_o (pc_set_o),
+ .pc_mux_o (pc_mux_o),
+ .nt_branch_mispredict_o(nt_branch_mispredict_o),
+ .exc_pc_mux_o (exc_pc_mux_o),
+ .exc_cause_o (exc_cause_o),
+
+ // LSU
+ .lsu_addr_last_i(lsu_addr_last_i),
+ .load_err_i (lsu_load_err_i),
+ .store_err_i (lsu_store_err_i),
+ .lsu_err_is_cheri_i (lsu_err_is_cheri_i),
+ .wb_exception_o (wb_exception),
+ .id_exception_o (unused_id_exception),
+ .id_exception_nc_o (id_exception_nc),
+
+ // jump/branch control
+ .branch_set_i (branch_set),
+ .branch_not_set_i (branch_not_set),
+ .jump_set_i (jump_set),
+
+ // interrupt signals
+ .csr_mstatus_mie_i(csr_mstatus_mie_i),
+ .irq_pending_i (irq_pending_i),
+ .irqs_i (irqs_i),
+ .irq_nm_i (irq_nm_i),
+ .nmi_mode_o (nmi_mode_o),
+
+ // CSR Controller Signals
+ .csr_save_if_o (csr_save_if_o),
+ .csr_save_id_o (csr_save_id_o),
+ .csr_save_wb_o (csr_save_wb_o),
+ .csr_restore_mret_id_o(csr_restore_mret_id_o),
+ .csr_restore_dret_id_o(csr_restore_dret_id_o),
+ .csr_save_cause_o (csr_save_cause_o),
+ .csr_mepcc_clrtag_o (csr_mepcc_clrtag_o),
+ .csr_mtval_o (csr_mtval_o),
+ .priv_mode_i (priv_mode_i),
+ .csr_mstatus_tw_i (csr_mstatus_tw_i),
+ .csr_pcc_perm_sr_i (csr_pcc_perm_sr_i),
+
+ // Debug Signal
+ .debug_mode_o (debug_mode_o),
+ .debug_cause_o (debug_cause_o),
+ .debug_csr_save_o (debug_csr_save_o),
+ .debug_req_i (debug_req_i),
+ .debug_single_step_i(debug_single_step_i),
+ .debug_ebreakm_i (debug_ebreakm_i),
+ .debug_ebreaku_i (debug_ebreaku_i),
+ .trigger_match_i (trigger_match_i),
+
+ .stall_id_i(stall_id),
+ .stall_wb_i(stall_wb),
+ .flush_id_o(flush_id),
+ .ready_wb_i(ready_wb_i),
+
+ // Performance Counters
+ .perf_jump_o (perf_jump_o),
+ .perf_tbranch_o(perf_tbranch_o),
+
+ .instr_is_cheri_i (instr_is_cheri_id_o) ,
+ .cheri_ex_valid_i (cheri_ex_valid_i) ,
+ .cheri_ex_err_i (cheri_ex_err_i) ,
+ .cheri_ex_err_info_i (cheri_ex_err_info_i) ,
+ .cheri_wb_err_i (cheri_wb_err_i) ,
+ .cheri_wb_err_info_i (cheri_wb_err_info_i) ,
+ .cheri_branch_req_i (cheri_branch_req_i) , // from cheri EX
+ .cheri_branch_target_i (cheri_branch_target_i)
+ );
+
+ assign multdiv_en_dec = mult_en_dec | div_en_dec;
+
+ // note data_req_allowed is already part of instr_executing
+ assign lsu_req = instr_executing ? data_req_allowed & lsu_req_dec : 1'b0;
+ assign mult_en_id = instr_executing ? mult_en_dec : 1'b0;
+ assign div_en_id = instr_executing ? div_en_dec : 1'b0;
+
+ assign lsu_req_o = lsu_req;
+ assign lsu_we_o = lsu_we;
+ assign lsu_type_o = lsu_type;
+ assign lsu_sign_ext_o = lsu_sign_ext;
+ assign lsu_wdata_o = rf_rdata_b_fwd;
+ // csr_op_en_o is set when CSR access should actually happen.
+ // csv_access_o is set when CSR access instruction is present and is used to compute whether a CSR
+ // access is illegal. A combinational loop would be created if csr_op_en_o was used along (as
+ // asserting it for an illegal csr access would result in a flush that would need to deassert it).
+
+ // assign csr_op_en_o = csr_access_o & instr_executing & instr_id_done_o;
+ // improve timing for CHERIoT mode (instr_id_done has too much logic)
+ assign csr_op_en_o = csr_access_o & instr_executing &
+ (CHERIoTEn ? instr_first_cycle : instr_id_done_o);
+
+ assign alu_operator_ex_o = alu_operator;
+ assign alu_operand_a_ex_o = alu_operand_a;
+ assign alu_operand_b_ex_o = alu_operand_b;
+
+ assign mult_en_ex_o = mult_en_id;
+ assign div_en_ex_o = div_en_id;
+
+ assign multdiv_operator_ex_o = multdiv_operator;
+ assign multdiv_signed_mode_ex_o = multdiv_signed_mode;
+ assign multdiv_operand_a_ex_o = rf_rdata_a_fwd;
+ assign multdiv_operand_b_ex_o = rf_rdata_b_fwd;
+
+ ////////////////////////
+ // Branch set control //
+ ////////////////////////
+
+ if (BranchTargetALU && !DataIndTiming) begin : g_branch_set_direct
+ // Branch set fed straight to controller with branch target ALU
+ // (condition pass/fail used same cycle as generated instruction request)
+ assign branch_set_raw = branch_set_raw_d;
+ end else begin : g_branch_set_flop
+ // Branch set flopped without branch target ALU, or in fixed time execution mode
+ // (condition pass/fail used next cycle where branch target is calculated)
+ logic branch_set_raw_q;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ branch_set_raw_q <= 1'b0;
+ // bug here (see the 07082022 report). should qualify this with instr_executing
+ // (same as id_fsm_q). let's wait for now and fix later QQQ
+ end else begin
+ branch_set_raw_q <= branch_set_raw_d;
+ end
+ end
+
+ // Branches always take two cycles in fixed time execution mode, with or without the branch
+ // target ALU (to avoid a path from the branch decision into the branch target ALU operand
+ // muxing).
+ assign branch_set_raw = (BranchTargetALU && !data_ind_timing_i) ? branch_set_raw_d :
+ branch_set_raw_q;
+
+ end
+
+ // Track whether the current instruction in ID/EX has done a branch or jump set.
+ assign branch_jump_set_done_d = (branch_set_raw | jump_set_raw | branch_jump_set_done_q) &
+ ~instr_valid_clear_o;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ branch_jump_set_done_q <= 1'b0;
+ end else begin
+ branch_jump_set_done_q <= branch_jump_set_done_d;
+ end
+ end
+
+ // the _raw signals from the state machine may be asserted for multiple cycles when
+ // instr_executing_spec is asserted and instr_executing is not asserted. This may occur where
+ // a memory error is seen or a there are outstanding memory accesses (indicate a load or store is
+ // in the WB stage). The branch or jump speculatively begins the fetch but is held back from
+ // completing until it is certain the outstanding access hasn't seen a memory error. This logic
+ // ensures only the first cycle of a branch or jump set is sent to the controller to prevent
+ // needless extra IF flushes and fetches.
+ assign jump_set = jump_set_raw & ~branch_jump_set_done_q;
+ assign branch_set = branch_set_raw & ~branch_jump_set_done_q;
+
+ // Branch condition is calculated in the first cycle and flopped for use in the second cycle
+ // (only used in fixed time execution mode to determine branch destination).
+ if (DataIndTiming) begin : g_sec_branch_taken
+ logic branch_taken_q;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ branch_taken_q <= 1'b0;
+ end else begin
+ branch_taken_q <= branch_decision_i;
+ end
+ end
+
+ assign branch_taken = ~data_ind_timing_i | branch_taken_q;
+
+ end else begin : g_nosec_branch_taken
+
+ // Signal unused without fixed time execution mode - only taken branches will trigger
+ // branch_set_raw
+ assign branch_taken = 1'b1;
+
+ end
+
+ // Holding branch_set/jump_set high for more than one cycle should not cause a functional issue.
+ // However it could generate needless prefetch buffer flushes and instruction fetches. The ID/EX
+ // designs ensures that this never happens for non-predicted branches.
+ `ASSERT(NeverDoubleBranch, branch_set & ~instr_bp_taken_i |=> ~branch_set)
+ `ASSERT(NeverDoubleJump, jump_set & ~instr_bp_taken_i |=> ~jump_set)
+
+ //////////////////////////////
+ // Branch not-taken address //
+ //////////////////////////////
+
+ if (BranchPredictor) begin : g_calc_nt_addr
+ assign nt_branch_addr_o = pc_id_i + (instr_is_compressed_i ? 32'd2 : 32'd4);
+ end else begin : g_n_calc_nt_addr
+ assign nt_branch_addr_o = 32'd0;
+ end
+
+ ///////////////
+ // ID-EX FSM //
+ ///////////////
+
+ typedef enum logic { FIRST_CYCLE, MULTI_CYCLE } id_fsm_e;
+ id_fsm_e id_fsm_q, id_fsm_d;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin : id_pipeline_reg
+ if (!rst_ni) begin
+ id_fsm_q <= FIRST_CYCLE;
+ end else if (instr_executing) begin
+ id_fsm_q <= id_fsm_d;
+ end
+ end
+
+ // ID/EX stage can be in two states, FIRST_CYCLE and MULTI_CYCLE. An instruction enters
+ // MULTI_CYCLE if it requires multiple cycles to complete regardless of stalls and other
+ // considerations. An instruction may be held in FIRST_CYCLE if it's unable to begin executing
+ // (this is controlled by instr_executing).
+
+ always_comb begin
+ id_fsm_d = id_fsm_q;
+ rf_we_raw = rf_we_dec;
+ stall_multdiv = 1'b0;
+ stall_jump = 1'b0;
+ stall_branch = 1'b0;
+ stall_alu = 1'b0;
+ stall_cheri = 1'b0;
+ branch_set_raw_d = 1'b0;
+ branch_not_set = 1'b0;
+ jump_set_raw = 1'b0;
+ perf_branch_o = 1'b0;
+
+ if (instr_executing_spec) begin
+ unique case (id_fsm_q)
+ FIRST_CYCLE: begin
+ unique case (1'b1)
+ lsu_req_dec: begin
+ if (!WritebackStage) begin
+ // LSU operation
+ id_fsm_d = MULTI_CYCLE;
+ end else if(~lsu_req_done_i) begin
+ id_fsm_d = MULTI_CYCLE;
+ end
+ end
+ cheri_lsu_req_dec: begin
+ if (cheri_pmode_i) begin
+ if (!WritebackStage) begin
+ id_fsm_d = MULTI_CYCLE;
+ end else if(~lsu_req_done_i) begin // covers the lsu_cheri_err case (1cycle)
+ id_fsm_d = MULTI_CYCLE;
+ end
+ end
+ end
+ multdiv_en_dec: begin
+ // MUL or DIV operation
+ if (~ex_valid_i) begin
+ // When single-cycle multiply is configured mul can finish in the first cycle so
+ // only enter MULTI_CYCLE state if a result isn't immediately available
+ id_fsm_d = MULTI_CYCLE;
+ rf_we_raw = 1'b0;
+ stall_multdiv = 1'b1;
+ end
+ end
+ branch_in_dec: begin
+ // cond branch operation
+ // All branches take two cycles in fixed time execution mode, regardless of branch
+ // condition.
+ id_fsm_d = (data_ind_timing_i || (!BranchTargetALU && branch_decision_i)) ?
+ MULTI_CYCLE : FIRST_CYCLE;
+ stall_branch = (~BranchTargetALU & branch_decision_i) | data_ind_timing_i;
+ branch_set_raw_d = (branch_decision_i | data_ind_timing_i);
+
+ if (BranchPredictor) begin
+ branch_not_set = ~branch_decision_i;
+ end
+
+ perf_branch_o = 1'b1;
+ end
+ jump_in_dec: begin
+ // uncond branch operation
+ // BTALU means jumps only need one cycle
+ id_fsm_d = BranchTargetALU ? FIRST_CYCLE : MULTI_CYCLE;
+ stall_jump = ~BranchTargetALU;
+ jump_set_raw = jump_set_dec;
+ end
+ alu_multicycle_dec: begin
+ stall_alu = 1'b1;
+ id_fsm_d = MULTI_CYCLE;
+ rf_we_raw = 1'b0;
+ end
+ cheri_multicycle_dec: begin
+ if (cheri_pmode_i) begin
+ id_fsm_d = MULTI_CYCLE;
+ rf_we_raw = 1'b0;
+ stall_cheri = 1'b1;
+ end
+ end
+ default: begin
+ id_fsm_d = FIRST_CYCLE;
+ end
+ endcase
+ end
+
+ MULTI_CYCLE: begin
+ if(multdiv_en_dec) begin
+ rf_we_raw = rf_we_dec & ex_valid_i;
+ end
+
+ if (multicycle_done & ready_wb_i) begin
+ id_fsm_d = FIRST_CYCLE;
+ end else begin
+ stall_multdiv = multdiv_en_dec;
+ stall_branch = branch_in_dec;
+ stall_jump = jump_in_dec;
+ stall_cheri = cheri_multicycle_dec;
+ end
+ end
+
+ default: begin
+ id_fsm_d = FIRST_CYCLE;
+ end
+ endcase
+ end
+ end
+
+ // Note for the two-stage configuration ready_wb_i is always set
+ assign multdiv_ready_id_o = ready_wb_i;
+
+ `ASSERT(StallIDIfMulticycle, (id_fsm_q == FIRST_CYCLE) & (id_fsm_d == MULTI_CYCLE) |-> stall_id)
+
+
+ // Stall ID/EX stage for reason that relates to instruction in ID/EX, update assertion below if
+ // modifying this.
+ assign stall_id = stall_ld_hz | stall_mem | stall_multdiv | stall_jump | stall_branch | stall_cheri |
+ stall_alu | stall_cheri_trvk;
+
+ // Generally illegal instructions have no reason to stall, however they must still stall waiting
+ // for outstanding memory requests so exceptions related to them take priority over the illegal
+ // instruction exception.
+ `ASSERT(IllegalInsnStallMustBeMemStall, illegal_insn_o & stall_id |-> stall_mem &
+ ~(stall_ld_hz | stall_multdiv | stall_jump | stall_branch | stall_alu | stall_cheri_trvk))
+
+ assign instr_done = ~stall_id & ~flush_id & instr_executing;
+
+ // Signal instruction in ID is in it's first cycle. It can remain in its
+ // first cycle if it is stalled.
+ assign instr_first_cycle = instr_valid_i & (id_fsm_q == FIRST_CYCLE);
+ // Used by RVFI to know when to capture register read data
+ // Used by ALU to access RS3 if ternary instruction.
+ assign instr_first_cycle_id_o = instr_first_cycle;
+
+ if (WritebackStage) begin : gen_stall_mem
+ // Register read address matches write address in WB
+ logic rf_rd_a_wb_match;
+ logic rf_rd_b_wb_match;
+ // Hazard between registers being read and written
+ logic rf_rd_a_hz;
+ logic rf_rd_b_hz;
+
+ logic outstanding_memory_access;
+
+ logic instr_kill;
+
+ assign multicycle_done = (lsu_req_dec|cheri_lsu_req_dec) ? ~stall_mem : ex_valid_all;
+
+ // Is a memory access ongoing that isn't finishing this cycle
+ assign outstanding_memory_access = (outstanding_load_wb_i | outstanding_store_wb_i) &
+ ~lsu_resp_valid_i;
+
+ // Can start a new memory access if any previous one has finished or is finishing
+ assign data_req_allowed = ~outstanding_memory_access;
+
+ // Instruction won't execute because:
+ // - There is a pending exception in writeback
+ // The instruction in ID/EX will be flushed and the core will jump to an exception handler
+ // - The controller isn't running instructions
+ // This either happens in preparation for a flush and jump to an exception handler e.g. in
+ // response to an IRQ or debug request or whilst the core is sleeping or resetting/fetching
+ // first instruction in which case any valid instruction in ID/EX should be ignored.
+ // - There was an error on instruction fetch
+
+ // cheri instr can only generate exception after execution
+ // exclude cheri EX exception from insr_kill improves timing
+
+ assign instr_kill = instr_fetch_err_i |
+ wb_exception |
+ id_exception_nc | // exclude cheri EX exceptions
+ ~controller_run;
+
+ // With writeback stage instructions must be prevented from executing if there is:
+ // - A load hazard
+ // - A pending memory access
+ // If it receives an error response this results in a precise exception from WB so ID/EX
+ // instruction must not execute until error response is known).
+ // - A load/store error
+ // This will cause a precise exception for the instruction in WB so ID/EX instruction must not
+ // execute
+ //
+ // instr_executing_spec is a speculative signal. It indicates an instruction can execute
+ // assuming there are no exceptions from writeback and any outstanding memory access won't
+ // receive an error. It is required so branch and jump requests don't factor in an incoming dmem
+ // error (that in turn would factor directly into imem requests leading to a feedthrough path).
+ //
+ // instr_executing is the full signal, it will only allow execution once any potential
+ // exceptions from writeback have been resolved.
+ assign instr_executing_spec = instr_valid_i &
+ ~instr_fetch_err_i &
+ controller_run &
+ ~stall_ld_hz &
+ ~stall_cheri_trvk;
+
+ assign instr_executing = instr_valid_i &
+ ~instr_kill &
+ ~stall_ld_hz &
+ ~stall_cheri_trvk &
+ ~outstanding_memory_access;
+
+ // allowing a cheri instruction to start execution - valid instruction not stalled by WB/hz
+ // note we can't use_instr_kill here since it includes id_exception (cherr_ex_err), which causes a
+ // comb loop.
+
+ assign cheri_exec_id_o = cheri_pmode_i & instr_valid_i &
+ ~instr_fetch_err_i &
+ instr_is_legal_cheri &
+ controller_run &
+ ~wb_exception &
+ ~stall_ld_hz &
+ ~stall_cheri_trvk &
+ ~outstanding_memory_access;
+
+
+ `ASSERT(IbexExecutingSpecIfExecuting, instr_executing |-> instr_executing_spec)
+
+ `ASSERT(IbexStallIfValidInstrNotExecuting,
+ instr_valid_i & ~instr_kill & ~instr_executing |-> stall_id)
+
+ `ASSERT(IbexCannotRetireWithPendingExceptions,
+ instr_done |-> ~(wb_exception | outstanding_memory_access))
+
+ // Stall for reasons related to memory:
+ // * There is an outstanding memory access that won't resolve this cycle (need to wait to allow
+ // precise exceptions)
+ // * There is a load/store request not being granted or which is unaligned and waiting to issue
+ // a second request (needs to stay in ID for the address calculation)
+
+
+ // For pipeline timing/stalling, we treat cheri data load/stores the same as legacy RV32 load/stores
+ assign stall_mem = instr_valid_i & (outstanding_memory_access |
+ ((lsu_req_dec | cheri_lsu_req_dec) & ~lsu_req_done_i));
+
+ // If we stall a load in ID for any reason, it must not make an LSU request
+ // (otherwide we might issue two requests for the same instruction)
+ `ASSERT(IbexStallMemNoRequest,
+ instr_valid_i & lsu_req_dec & ~instr_done |-> ~lsu_req_done_i)
+
+ assign rf_rd_a_wb_match = (rf_waddr_wb_i == rf_raddr_a_o) & |rf_raddr_a_o;
+ assign rf_rd_b_wb_match = (rf_waddr_wb_i == rf_raddr_b_o) & |rf_raddr_b_o;
+
+ assign rf_rd_a_wb_match_o = rf_rd_a_wb_match;
+ assign rf_rd_b_wb_match_o = rf_rd_b_wb_match;
+
+ // If instruction is reading register that load will be writing stall in
+ // ID until load is complete. No need to stall when reading zero register.
+ assign rf_rd_a_hz = rf_rd_a_wb_match & rf_ren_a;
+ assign rf_rd_b_hz = rf_rd_b_wb_match & rf_ren_b;
+
+ // If instruction is read register that writeback is writing forward writeback data to read
+ // data. Note this doesn't factor in load data as it arrives too late, such hazards are
+ // resolved via a stall (see above).
+ assign rf_rdata_a_fwd = rf_rd_a_wb_match & rf_write_wb_i ? rf_wdata_fwd_wb_i : rf_rdata_a_i;
+ assign rf_rdata_b_fwd = rf_rd_b_wb_match & rf_write_wb_i ? rf_wdata_fwd_wb_i : rf_rdata_b_i;
+
+ assign stall_ld_hz = outstanding_load_wb_i & (rf_rd_a_hz | rf_rd_b_hz);
+
+ logic rf_we_or_load_valid;
+ assign rf_we_or_load_valid = rf_we_or_load & instr_valid_i & ~instr_fetch_err_i & ~illegal_insn_o;
+
+
+ assign stall_cheri_trvk = (CHERIoTEn & cheri_pmode_i & CheriPPLBC) ?
+ ((rf_ren_a && ~rf_reg_rdy_i[rf_raddr_a_o]) |
+ (rf_ren_b && ~rf_reg_rdy_i[rf_raddr_b_o]) |
+ (rf_we_or_load_valid && ~rf_reg_rdy_i[rf_waddr_id_o])) :
+ 1'b0;
+
+ assign instr_type_wb_o = ~lsu_req_dec ? WB_INSTR_OTHER :
+ lsu_we ? WB_INSTR_STORE :
+ WB_INSTR_LOAD;
+
+ assign instr_id_done_o = en_wb_o & ready_wb_i;
+
+ // Stall ID/EX as instruction in ID/EX cannot proceed to writeback yet
+ assign stall_wb = en_wb_o & ~ready_wb_i;
+
+ assign perf_dside_wait_o = instr_valid_i & ~instr_kill &
+ (outstanding_memory_access | stall_ld_hz | stall_cheri_trvk);
+ end else begin : gen_no_stall_mem
+
+ assign multicycle_done = (cheri_lsu_req_dec | lsu_req_dec) ? lsu_resp_valid_i : ex_valid_all;
+
+ assign data_req_allowed = instr_first_cycle;
+
+ // Without Writeback Stage always stall the first cycle of a load/store.
+ // Then stall until it is complete
+ assign stall_mem = instr_valid_i & ((lsu_req_dec | cheri_lsu_req_dec) & (~lsu_resp_valid_i | instr_first_cycle));
+
+ // No load hazards without Writeback Stage
+ assign stall_ld_hz = 1'b0;
+ assign stall_cheri_trvk = 1'b0; // CheriPPLBC can't work with 2-stage pipeline configuration
+
+ // Without writeback stage any valid instruction that hasn't seen an error will execute
+ assign instr_executing_spec = instr_valid_i & ~instr_fetch_err_i & controller_run;
+ assign instr_executing = instr_executing_spec;
+ assign cheri_exec_id_o = instr_executing;
+
+ `ASSERT(IbexStallIfValidInstrNotExecuting,
+ instr_valid_i & ~instr_fetch_err_i & ~instr_executing & controller_run |-> stall_id)
+
+ // No data forwarding without writeback stage so always take source register data direct from
+ // register file
+ assign rf_rdata_a_fwd = rf_rdata_a_i;
+ assign rf_rdata_b_fwd = rf_rdata_b_i;
+
+ assign rf_rd_a_wb_match_o = 1'b0;
+ assign rf_rd_b_wb_match_o = 1'b0;
+
+ // Unused Writeback stage only IO & wiring
+ // Assign inputs and internal wiring to unused signals to satisfy lint checks
+ // Tie-off outputs to constant values
+ logic unused_data_req_done_ex;
+ logic [4:0] unused_rf_waddr_wb;
+ logic unused_rf_write_wb;
+ logic unused_outstanding_load_wb;
+ logic unused_outstanding_store_wb;
+ logic unused_wb_exception;
+ logic [31:0] unused_rf_wdata_fwd_wb;
+
+ assign unused_data_req_done_ex = lsu_req_done_i;
+ assign unused_rf_waddr_wb = rf_waddr_wb_i;
+ assign unused_rf_write_wb = rf_write_wb_i;
+ assign unused_outstanding_load_wb = outstanding_load_wb_i;
+ assign unused_outstanding_store_wb = outstanding_store_wb_i;
+ assign unused_wb_exception = wb_exception;
+ assign unused_rf_wdata_fwd_wb = rf_wdata_fwd_wb_i;
+
+ assign instr_type_wb_o = WB_INSTR_OTHER;
+ assign stall_wb = 1'b0;
+
+ assign perf_dside_wait_o = instr_executing & lsu_req_dec & ~lsu_resp_valid_i;
+
+ assign instr_id_done_o = instr_done;
+ end
+
+ // Signal which instructions to count as retired in minstret, all traps along with ebrk and
+ // ecall instructions are not counted.
+ assign instr_perf_count_id_o = ~ebrk_insn & ~ecall_insn_dec & ~illegal_insn_dec &
+ ~illegal_csr_insn_i & ~instr_fetch_err_i;
+
+ // An instruction is ready to move to the writeback stage (or retire if there is no writeback
+ // stage)
+ assign en_wb_o = instr_done;
+
+ assign perf_mul_wait_o = stall_multdiv & mult_en_dec;
+ assign perf_div_wait_o = stall_multdiv & div_en_dec;
+
+ //////////
+ // FCOV //
+ //////////
+
+ `DV_FCOV_SIGNAL_GEN_IF(logic, rf_rd_wb_hz,
+ (gen_stall_mem.rf_rd_a_hz | gen_stall_mem.rf_rd_b_hz) & instr_valid_i, WritebackStage)
+ `DV_FCOV_SIGNAL(logic, branch_taken,
+ instr_executing & (id_fsm_q == FIRST_CYCLE) & branch_decision_i)
+ `DV_FCOV_SIGNAL(logic, branch_not_taken,
+ instr_executing & (id_fsm_q == FIRST_CYCLE) & ~branch_decision_i)
+
+ ////////////////
+ // Assertions //
+ ////////////////
+
+ // Selectors must be known/valid.
+ `ASSERT_KNOWN_IF(IbexAluOpMuxSelKnown, alu_op_a_mux_sel, instr_valid_i)
+ `ASSERT(IbexAluAOpMuxSelValid, instr_valid_i |-> alu_op_a_mux_sel inside {
+ OP_A_REG_A,
+ OP_A_FWD,
+ OP_A_CURRPC,
+ OP_A_IMM})
+ `ASSERT_KNOWN_IF(IbexBTAluAOpMuxSelKnown, bt_a_mux_sel, instr_valid_i)
+ `ASSERT(IbexBTAluAOpMuxSelValid, instr_valid_i |-> bt_a_mux_sel inside {
+ OP_A_REG_A,
+ OP_A_CURRPC})
+ `ASSERT_KNOWN_IF(IbexBTAluBOpMuxSelKnown, bt_b_mux_sel, instr_valid_i)
+ `ASSERT(IbexBTAluBOpMuxSelValid, instr_valid_i |-> bt_b_mux_sel inside {
+ IMM_B_I,
+ IMM_B_B,
+ IMM_B_J,
+ IMM_B_INCR_PC})
+ `ASSERT(IbexRegfileWdataSelValid, instr_valid_i |-> rf_wdata_sel inside {
+ RF_WD_EX,
+ RF_WD_CSR})
+ `ASSERT_KNOWN(IbexWbStateKnown, id_fsm_q)
+
+ // Branch decision must be valid when jumping.
+ `ASSERT_KNOWN_IF(IbexBranchDecisionValid, branch_decision_i,
+ instr_valid_i && !(illegal_csr_insn_i || instr_fetch_err_i))
+
+ // Instruction delivered to ID stage can not contain X.
+ `ASSERT_KNOWN_IF(IbexIdInstrKnown, instr_rdata_i,
+ instr_valid_i && !(illegal_c_insn_i || instr_fetch_err_i))
+
+ // Instruction delivered to ID stage can not contain X.
+ `ASSERT_KNOWN_IF(IbexIdInstrALUKnown, instr_rdata_alu_i,
+ instr_valid_i && !(illegal_c_insn_i || instr_fetch_err_i))
+
+ // Multicycle enable signals must be unique.
+ `ASSERT(IbexMulticycleEnableUnique,
+ $onehot0({lsu_req_dec, multdiv_en_dec, branch_in_dec, jump_in_dec}))
+
+ // Duplicated instruction flops must match
+ // === as DV environment can produce instructions with Xs in, so must use precise match that
+ // includes Xs
+ `ASSERT(IbexDuplicateInstrMatch, instr_valid_i |-> instr_rdata_i === instr_rdata_alu_i)
+
+ `ifdef CHECK_MISALIGNED
+ `ASSERT(IbexMisalignedMemoryAccess, !lsu_addr_incr_req_i)
+ `endif
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_if_stage.sv b/hw/ip/cheriot-ibex/rtl/cheriot_if_stage.sv
new file mode 100644
index 0000000..2829dd5
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_if_stage.sv
@@ -0,0 +1,807 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Instruction Fetch Stage
+ *
+ * Instruction fetch unit: Selection of the next PC, and buffering (sampling) of
+ * the read instruction.
+ */
+
+`include "prim_assert.sv"
+
+module cheriot_if_stage import cheriot_pkg::*; import cheri_pkg::*; #(
+ parameter int unsigned DmHaltAddr = 32'h1A110800,
+ parameter int unsigned DmExceptionAddr = 32'h1A110808,
+ parameter bit DummyInstructions = 1'b0,
+ parameter bit ICache = 1'b0,
+ parameter bit ICacheECC = 1'b0,
+ parameter int unsigned BusSizeECC = BUS_SIZE,
+ parameter int unsigned TagSizeECC = IC_TAG_SIZE,
+ parameter int unsigned LineSizeECC = IC_LINE_SIZE,
+ parameter bit PCIncrCheck = 1'b0,
+ parameter bit ResetAll = 1'b0,
+ parameter lfsr_seed_t RndCnstLfsrSeed = RndCnstLfsrSeedDefault,
+ parameter lfsr_perm_t RndCnstLfsrPerm = RndCnstLfsrPermDefault,
+ parameter bit BranchPredictor = 1'b0,
+ parameter bit CHERIoTEn = 1'b1
+) (
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic cheri_pmode_i,
+ input logic [31:0] boot_addr_i, // also used for mtvec
+ input logic req_i, // instruction request control
+ input logic debug_mode_i,
+
+ // instruction cache interface
+ output logic instr_req_o,
+ output logic [31:0] instr_addr_o,
+ input logic instr_gnt_i,
+ input logic instr_rvalid_i,
+ input logic [31:0] instr_rdata_i,
+ input logic instr_err_i,
+
+ // ICache RAM IO
+ output logic [IC_NUM_WAYS-1:0] ic_tag_req_o,
+ output logic ic_tag_write_o,
+ output logic [IC_INDEX_W-1:0] ic_tag_addr_o,
+ output logic [TagSizeECC-1:0] ic_tag_wdata_o,
+ input logic [TagSizeECC-1:0] ic_tag_rdata_i [IC_NUM_WAYS],
+ output logic [IC_NUM_WAYS-1:0] ic_data_req_o,
+ output logic ic_data_write_o,
+ output logic [IC_INDEX_W-1:0] ic_data_addr_o,
+ output logic [LineSizeECC-1:0] ic_data_wdata_o,
+ input logic [LineSizeECC-1:0] ic_data_rdata_i [IC_NUM_WAYS],
+ input logic ic_scr_key_valid_i,
+
+ // output of ID stage
+ output logic instr_valid_id_o, // instr in IF-ID is valid
+ output logic instr_new_id_o, // instr in IF-ID is new
+ output logic [31:0] instr_rdata_id_o, // instr for ID stage
+ output logic [31:0] instr_rdata_alu_id_o, // replicated instr for ID stage
+ // to reduce fan-out
+ output logic [15:0] instr_rdata_c_id_o, // compressed instr for ID stage
+ // (mtval), meaningful only if
+ // instr_is_compressed_id_o = 1'b1
+ output logic instr_is_compressed_id_o, // compressed decoder thinks this
+ // is a compressed instr
+ output logic instr_bp_taken_o, // instruction was predicted to be
+ // a taken branch
+ output logic instr_fetch_err_o, // bus error on fetch
+ output logic instr_fetch_err_plus2_o, // bus error misaligned
+ output logic illegal_c_insn_id_o, // compressed decoder thinks this
+ // is an invalid instr
+ output logic instr_fetch_cheri_acc_vio_o,
+ output logic instr_fetch_cheri_bound_vio_o,
+ output logic dummy_instr_id_o, // Instruction is a dummy
+ output logic [31:0] pc_if_o,
+ output logic [31:0] pc_id_o,
+ input logic pmp_err_if_i,
+ input logic pmp_err_if_plus2_i,
+
+ // control signals
+ input logic instr_valid_clear_i, // clear instr valid bit in IF-ID
+ input logic pc_set_i, // set the PC to a new value
+ input pc_sel_e pc_mux_i, // selector for PC multiplexer
+ input logic nt_branch_mispredict_i, // Not-taken branch in ID/EX was
+ // mispredicted (predicted taken)
+ input logic [31:0] nt_branch_addr_i, // Not-taken branch address in ID/EX
+ input exc_pc_sel_e exc_pc_mux_i, // selects ISR address
+ input exc_cause_e exc_cause, // selects ISR address for
+ // vectorized interrupt lines
+ input logic dummy_instr_en_i,
+ input logic [2:0] dummy_instr_mask_i,
+ input logic dummy_instr_seed_en_i,
+ input logic [31:0] dummy_instr_seed_i,
+ input logic icache_enable_i,
+ input logic icache_inval_i,
+
+ // jump and branch target
+ input logic [31:0] branch_target_ex_i, // branch/jump target address
+
+ // CSRs
+ input logic [31:0] csr_mepc_i, // PC to restore after handling
+ // the interrupt/exception
+ input logic [31:0] csr_depc_i, // PC to restore after handling
+ // the debug request
+ input logic [31:0] csr_mtvec_i, // base PC to jump to on exception
+ output logic csr_mtvec_init_o, // tell CS regfile to init mtvec
+
+ // pipeline stall
+ input logic id_in_ready_i, // ID stage is ready for new instr
+
+ // misc signals
+ output logic pc_mismatch_alert_o,
+ output logic if_busy_o, // IF stage is busy fetching instr
+ input pcc_cap_t pcc_cap_i
+);
+
+ logic instr_valid_id_d, instr_valid_id_q;
+ logic instr_new_id_d, instr_new_id_q;
+
+ // prefetch buffer related signals
+ logic prefetch_busy;
+ logic branch_req;
+ logic [31:0] fetch_addr_n;
+ logic unused_fetch_addr_n0;
+
+ logic fetch_valid;
+ logic fetch_ready;
+ logic [31:0] fetch_rdata;
+ logic [31:0] fetch_addr;
+ logic fetch_err;
+ logic fetch_err_plus2;
+
+ logic [31:0] instr_decompressed;
+ logic illegal_c_insn;
+ logic instr_is_compressed;
+
+ logic if_instr_valid;
+ logic [31:0] if_instr_rdata;
+ logic [31:0] if_instr_addr;
+ logic if_instr_bus_err;
+ logic if_instr_pmp_err;
+ logic if_instr_err;
+ logic if_instr_err_plus2;
+
+ logic [31:0] exc_pc;
+
+ logic [5:0] irq_id;
+ logic unused_irq_bit;
+
+ logic if_id_pipe_reg_we; // IF-ID pipeline reg write enable
+
+ // Dummy instruction signals
+ logic stall_dummy_instr;
+ logic [31:0] instr_out;
+ logic instr_is_compressed_out;
+ logic illegal_c_instr_out;
+ logic instr_err_out;
+
+ logic predict_branch_taken;
+ logic [31:0] predict_branch_pc;
+
+ cheriot_pkg::pc_sel_e pc_mux_internal;
+
+ logic [7:0] unused_boot_addr;
+ logic [7:0] unused_csr_mtvec;
+
+ logic cheri_acc_vio, cheri_bound_vio;
+ logic cheri_force_uc;
+
+ assign unused_boot_addr = boot_addr_i[7:0];
+ assign unused_csr_mtvec = csr_mtvec_i[7:0];
+
+ // extract interrupt ID from exception cause
+ assign irq_id = {exc_cause};
+ assign unused_irq_bit = irq_id[5]; // MSB distinguishes interrupts from exceptions
+
+ // exception PC selection mux
+ always_comb begin : exc_pc_mux
+ unique case (exc_pc_mux_i)
+ EXC_PC_EXC: exc_pc = (csr_mtvec_i[0] | ~cheri_pmode_i)? { csr_mtvec_i[31:8], 8'h00 } : {csr_mtvec_i[31:2], 2'b00};
+ EXC_PC_IRQ: exc_pc = (csr_mtvec_i[0] | ~cheri_pmode_i) ? { csr_mtvec_i[31:8], 1'b0, irq_id[4:0], 2'b00 } : {csr_mtvec_i[31:2], 2'b00};
+ EXC_PC_DBD: exc_pc = DmHaltAddr;
+ EXC_PC_DBG_EXC: exc_pc = DmExceptionAddr;
+ default: exc_pc = { csr_mtvec_i[31:8], 8'h00 };
+ endcase
+ end
+
+ // The Branch predictor can provide a new PC which is internal to if_stage. Only override the mux
+ // select to choose this if the core isn't already trying to set a PC.
+ assign pc_mux_internal =
+ (BranchPredictor && predict_branch_taken && !pc_set_i) ? PC_BP : pc_mux_i;
+
+ // fetch address selection mux
+ always_comb begin : fetch_addr_mux
+ unique case (pc_mux_internal)
+ PC_BOOT: fetch_addr_n = { boot_addr_i[31:8], 8'h80 };
+ PC_JUMP: fetch_addr_n = branch_target_ex_i;
+ PC_EXC: fetch_addr_n = exc_pc; // set PC to exception handler
+ PC_ERET: fetch_addr_n = csr_mepc_i; // restore PC when returning from EXC
+ PC_DRET: fetch_addr_n = csr_depc_i;
+ // Without branch predictor will never get pc_mux_internal == PC_BP. We still handle no branch
+ // predictor case here to ensure redundant mux logic isn't synthesised.
+ PC_BP: fetch_addr_n = BranchPredictor ? predict_branch_pc : { boot_addr_i[31:8], 8'h80 };
+ default: fetch_addr_n = { boot_addr_i[31:8], 8'h80 };
+ endcase
+ end
+
+ // tell CS register file to initialize mtvec on boot
+ assign csr_mtvec_init_o = (pc_mux_i == PC_BOOT) & pc_set_i;
+ if (ICache) begin : gen_icache
+ // Full I-Cache option
+ cheriot_icache #(
+ .ICacheECC (ICacheECC),
+ .ResetAll (ResetAll),
+ .BusSizeECC (BusSizeECC),
+ .TagSizeECC (TagSizeECC),
+ .LineSizeECC (LineSizeECC)
+ ) icache_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+
+ .req_i ( req_i ),
+
+ .branch_i ( branch_req ),
+ .branch_mispredict_i ( nt_branch_mispredict_i ),
+ .mispredict_addr_i ( nt_branch_addr_i ),
+ .addr_i ( {fetch_addr_n[31:1], 1'b0} ),
+
+ .ready_i ( fetch_ready ),
+ .valid_o ( fetch_valid ),
+ .rdata_o ( fetch_rdata ),
+ .addr_o ( fetch_addr ),
+ .err_o ( fetch_err ),
+ .err_plus2_o ( fetch_err_plus2 ),
+
+ .instr_req_o ( instr_req_o ),
+ .instr_addr_o ( instr_addr_o ),
+ .instr_gnt_i ( instr_gnt_i ),
+ .instr_rvalid_i ( instr_rvalid_i ),
+ .instr_rdata_i ( instr_rdata_i ),
+ .instr_err_i ( instr_err_i ),
+
+ .ic_tag_req_o ( ic_tag_req_o ),
+ .ic_tag_write_o ( ic_tag_write_o ),
+ .ic_tag_addr_o ( ic_tag_addr_o ),
+ .ic_tag_wdata_o ( ic_tag_wdata_o ),
+ .ic_tag_rdata_i ( ic_tag_rdata_i ),
+ .ic_data_req_o ( ic_data_req_o ),
+ .ic_data_write_o ( ic_data_write_o ),
+ .ic_data_addr_o ( ic_data_addr_o ),
+ .ic_data_wdata_o ( ic_data_wdata_o ),
+ .ic_data_rdata_i ( ic_data_rdata_i ),
+ .ic_scr_key_valid_i ( ic_scr_key_valid_i ),
+
+ .icache_enable_i ( icache_enable_i ),
+ .icache_inval_i ( icache_inval_i ),
+ .busy_o ( prefetch_busy )
+ );
+
+ end else begin : gen_prefetch_buffer
+
+ // prefetch buffer, caches a fixed number of instructions
+ cheriot_prefetch_buffer #(
+ .ResetAll (ResetAll)
+ ) prefetch_buffer_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+
+ .req_i ( req_i ),
+
+ .branch_i ( branch_req ),
+ .branch_mispredict_i ( nt_branch_mispredict_i ),
+ .mispredict_addr_i ( nt_branch_addr_i ),
+ .addr_i ( {fetch_addr_n[31:1], 1'b0} ),
+
+ .ready_i ( fetch_ready ),
+ .valid_o ( fetch_valid ),
+ .rdata_o ( fetch_rdata ),
+ .addr_o ( fetch_addr ),
+ .err_o ( fetch_err ),
+ .err_plus2_o ( fetch_err_plus2 ),
+
+ .cheri_force_uc_i ( cheri_force_uc ),
+
+ .instr_req_o ( instr_req_o ),
+ .instr_addr_o ( instr_addr_o ),
+ .instr_gnt_i ( instr_gnt_i ),
+ .instr_rvalid_i ( instr_rvalid_i ),
+ .instr_rdata_i ( instr_rdata_i ),
+ .instr_err_i ( instr_err_i ),
+
+ .busy_o ( prefetch_busy )
+ );
+
+ // ICache tieoffs
+ logic unused_icen, unused_icinv, unused_scr_key_valid;
+ logic [TagSizeECC-1:0] unused_tag_ram_input [IC_NUM_WAYS];
+ logic [LineSizeECC-1:0] unused_data_ram_input [IC_NUM_WAYS];
+ assign unused_icen = icache_enable_i;
+ assign unused_icinv = icache_inval_i;
+ assign unused_tag_ram_input = ic_tag_rdata_i;
+ assign unused_data_ram_input = ic_data_rdata_i;
+ assign unused_scr_key_valid = ic_scr_key_valid_i;
+ assign ic_tag_req_o = 'b0;
+ assign ic_tag_write_o = 'b0;
+ assign ic_tag_addr_o = 'b0;
+ assign ic_tag_wdata_o = 'b0;
+ assign ic_data_req_o = 'b0;
+ assign ic_data_write_o = 'b0;
+ assign ic_data_addr_o = 'b0;
+ assign ic_data_wdata_o = 'b0;
+
+`ifndef SYNTHESIS
+ // If we don't instantiate an icache and this is a simulation then we have a problem because the
+ // simulator might discard the icache module entirely, including some DPI exports that it
+ // implies. This then causes problems for linking against C++ testbench code that expected them.
+ // As a slightly ugly hack, let's define the DPI functions here (the real versions are defined
+ // in prim_util_get_scramble_params.svh)
+ export "DPI-C" function simutil_get_scramble_key;
+ export "DPI-C" function simutil_get_scramble_nonce;
+ function automatic int simutil_get_scramble_key(output bit [127:0] val);
+ return 0;
+ endfunction
+ function automatic int simutil_get_scramble_nonce(output bit [319:0] nonce);
+ return 0;
+ endfunction
+`endif
+ end
+
+ assign unused_fetch_addr_n0 = fetch_addr_n[0];
+
+ assign branch_req = pc_set_i | predict_branch_taken;
+
+ assign pc_if_o = if_instr_addr;
+ assign if_busy_o = prefetch_busy;
+
+ // PMP errors
+ // An error can come from the instruction address, or the next instruction address for unaligned,
+ // uncompressed instructions.
+ assign if_instr_pmp_err = pmp_err_if_i |
+ (if_instr_addr[1] & ~instr_is_compressed & pmp_err_if_plus2_i);
+
+ // Combine bus errors and pmp errors
+ assign if_instr_err = if_instr_bus_err | if_instr_pmp_err | cheri_acc_vio | cheri_bound_vio;
+
+ // Capture the second half of the address for errors on the second part of an instruction
+ // LEC_NOT_COMPATIBLE
+ assign if_instr_err_plus2 = ((if_instr_addr[1] & ~instr_is_compressed & pmp_err_if_plus2_i) |
+ fetch_err_plus2) & ~pmp_err_if_i;
+
+ // pre-calculate headroom to improve memory read timing
+ logic [33:0] instr_hdrm;
+ logic hdrm_ge4, hdrm_ge2, hdrm_ok, base_ok;
+ logic allow_all;
+
+ // allow_all is used to permit the pc wraparound case (pc = 0xffff_fffe, uncompressed instruction)
+ // - in this case fetch should be allowed if pcc bounds is specified as the entire 32-bit space.
+ // - If we don't treat this as a specail case the fetch would be erred since headroom < 4
+ assign allow_all = (pcc_cap_i.base32==0) & (pcc_cap_i.top33==33'h1_0000_0000);
+
+ assign instr_hdrm = {1'b0, pcc_cap_i.top33} - {2'b00, if_instr_addr};
+ assign hdrm_ge4 = (|instr_hdrm[32:2]) & ~instr_hdrm[33]; // >= 4
+ assign hdrm_ge2 = (|instr_hdrm[32:1]) & ~instr_hdrm[33]; // >= 2
+ assign hdrm_ok = allow_all || (instr_is_compressed ? hdrm_ge2 : hdrm_ge4);
+ assign base_ok = ~(if_instr_addr < pcc_cap_i.base32);
+
+ // only issue cheri_acc_vio on valid fetches
+ assign cheri_bound_vio = CHERIoTEn & cheri_pmode_i & ~debug_mode_i & (~base_ok || ~hdrm_ok);
+
+ // In order to have constant timing (avoid side-channel leakage due to data-dependent behavior),
+ // if base vio or headroom < 4 (we are only authorized to fetch 2 bytes), force the fetch_fifo
+ // to treat the current rdata as a unaligned compressed instruction if pc[1]=1, and push it to
+ // ID stage without waiting for the 2nd part of 32-bit instruciton.
+ //
+ assign cheri_force_uc = CHERIoTEn & cheri_pmode_i & ~allow_all & (~base_ok | ~hdrm_ge4);
+
+ // we still check seal/perm here to be safe, however by ISA those can't happen at fetch time
+ // since they are check elsewhere already
+ assign cheri_acc_vio = CHERIoTEn & cheri_pmode_i & ~debug_mode_i &
+ (~pcc_cap_i.perms[PERM_EX] || ~pcc_cap_i.valid || (pcc_cap_i.otype!=0));
+
+ // compressed instruction decoding, or more precisely compressed instruction
+ // expander
+ //
+ // since it does not matter where we decompress instructions, we do it here
+ // to ease timing closure
+ cheriot_compressed_decoder #(
+ .CHERIoTEn (CHERIoTEn)
+ ) compressed_decoder_i (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .valid_i (fetch_valid & ~fetch_err),
+ .instr_i (if_instr_rdata),
+ .cheri_pmode_i (cheri_pmode_i),
+ .instr_o (instr_decompressed),
+ .is_compressed_o(instr_is_compressed),
+ .illegal_instr_o(illegal_c_insn)
+ );
+
+ // Dummy instruction insertion
+ if (DummyInstructions) begin : gen_dummy_instr
+ logic insert_dummy_instr;
+ logic [31:0] dummy_instr_data;
+
+ cheriot_dummy_instr #(
+ .RndCnstLfsrSeed (RndCnstLfsrSeed),
+ .RndCnstLfsrPerm (RndCnstLfsrPerm)
+ ) dummy_instr_i (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .dummy_instr_en_i (dummy_instr_en_i),
+ .dummy_instr_mask_i (dummy_instr_mask_i),
+ .dummy_instr_seed_en_i(dummy_instr_seed_en_i),
+ .dummy_instr_seed_i (dummy_instr_seed_i),
+ .fetch_valid_i (fetch_valid),
+ .id_in_ready_i (id_in_ready_i),
+ .insert_dummy_instr_o (insert_dummy_instr),
+ .dummy_instr_data_o (dummy_instr_data)
+ );
+
+ // Mux between actual instructions and dummy instructions
+ assign instr_out = insert_dummy_instr ? dummy_instr_data : instr_decompressed;
+ assign instr_is_compressed_out = insert_dummy_instr ? 1'b0 : instr_is_compressed;
+ assign illegal_c_instr_out = insert_dummy_instr ? 1'b0 : illegal_c_insn;
+ assign instr_err_out = insert_dummy_instr ? 1'b0 : if_instr_err;
+
+ // Stall the IF stage if we insert a dummy instruction. The dummy will execute between whatever
+ // is currently in the ID stage and whatever is valid from the prefetch buffer this cycle. The
+ // PC of the dummy instruction will match whatever is next from the prefetch buffer.
+ assign stall_dummy_instr = insert_dummy_instr;
+
+ // Register the dummy instruction indication into the ID stage
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ dummy_instr_id_o <= 1'b0;
+ end else if (if_id_pipe_reg_we) begin
+ dummy_instr_id_o <= insert_dummy_instr;
+ end
+ end
+
+ end else begin : gen_no_dummy_instr
+ logic unused_dummy_en;
+ logic [2:0] unused_dummy_mask;
+ logic unused_dummy_seed_en;
+ logic [31:0] unused_dummy_seed;
+
+ assign unused_dummy_en = dummy_instr_en_i;
+ assign unused_dummy_mask = dummy_instr_mask_i;
+ assign unused_dummy_seed_en = dummy_instr_seed_en_i;
+ assign unused_dummy_seed = dummy_instr_seed_i;
+ assign instr_out = instr_decompressed;
+ assign instr_is_compressed_out = instr_is_compressed;
+ assign illegal_c_instr_out = illegal_c_insn;
+ assign instr_err_out = if_instr_err;
+ assign stall_dummy_instr = 1'b0;
+ assign dummy_instr_id_o = 1'b0;
+ end
+
+ // The ID stage becomes valid as soon as any instruction is registered in the ID stage flops.
+ // Note that the current instruction is squashed by the incoming pc_set_i signal.
+ // Valid is held until it is explicitly cleared (due to an instruction completing or an exception)
+ assign instr_valid_id_d = (if_instr_valid & id_in_ready_i & ~pc_set_i) |
+ (instr_valid_id_q & ~instr_valid_clear_i);
+ assign instr_new_id_d = if_instr_valid & id_in_ready_i;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ instr_valid_id_q <= 1'b0;
+ instr_new_id_q <= 1'b0;
+ end else begin
+ instr_valid_id_q <= instr_valid_id_d;
+ instr_new_id_q <= instr_new_id_d;
+ end
+ end
+
+ assign instr_valid_id_o = instr_valid_id_q;
+ // Signal when a new instruction enters the ID stage (only used for RVFI signalling).
+ assign instr_new_id_o = instr_new_id_q;
+
+ // IF-ID pipeline registers, frozen when the ID stage is stalled
+ assign if_id_pipe_reg_we = instr_new_id_d;
+
+ if (ResetAll) begin : g_instr_rdata_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ instr_rdata_id_o <= '0;
+ instr_rdata_alu_id_o <= '0;
+ instr_fetch_err_o <= '0;
+ instr_fetch_err_plus2_o <= '0;
+ instr_rdata_c_id_o <= '0;
+ instr_is_compressed_id_o <= '0;
+ illegal_c_insn_id_o <= '0;
+ pc_id_o <= '0;
+ instr_fetch_cheri_acc_vio_o <= '0;
+ instr_fetch_cheri_bound_vio_o <= '0;
+ end else if (if_id_pipe_reg_we) begin
+ instr_rdata_id_o <= instr_out;
+ // To reduce fan-out and help timing from the instr_rdata_id flops they are replicated.
+ instr_rdata_alu_id_o <= instr_out;
+ instr_fetch_err_o <= instr_err_out;
+ instr_fetch_err_plus2_o <= if_instr_err_plus2;
+ instr_rdata_c_id_o <= if_instr_rdata[15:0];
+ instr_is_compressed_id_o <= instr_is_compressed_out;
+ illegal_c_insn_id_o <= illegal_c_instr_out;
+ pc_id_o <= pc_if_o;
+ instr_fetch_cheri_acc_vio_o <= cheri_acc_vio;
+ instr_fetch_cheri_bound_vio_o <= cheri_bound_vio;
+ end
+ end
+ end else begin : g_instr_rdata_nr
+ always_ff @(posedge clk_i) begin
+ if (if_id_pipe_reg_we) begin
+ instr_rdata_id_o <= instr_out;
+ // To reduce fan-out and help timing from the instr_rdata_id flops they are replicated.
+ instr_rdata_alu_id_o <= instr_out;
+ instr_fetch_err_o <= instr_err_out;
+ instr_fetch_err_plus2_o <= if_instr_err_plus2;
+ instr_rdata_c_id_o <= if_instr_rdata[15:0];
+ instr_is_compressed_id_o <= instr_is_compressed_out;
+ illegal_c_insn_id_o <= illegal_c_instr_out;
+ pc_id_o <= pc_if_o;
+ instr_fetch_cheri_acc_vio_o <= cheri_acc_vio;
+ instr_fetch_cheri_bound_vio_o <= cheri_bound_vio;
+ end
+ end
+ end
+
+ // Check for expected increments of the PC when security hardening enabled
+ if (PCIncrCheck) begin : g_secure_pc
+ logic [31:0] prev_instr_addr_incr, prev_instr_addr_incr_buf;
+ logic prev_instr_seq_q, prev_instr_seq_d;
+
+ // Do not check for sequential increase after a branch, jump, exception, interrupt or debug
+ // request, all of which will set branch_req. Also do not check after reset or for dummys.
+ assign prev_instr_seq_d = (prev_instr_seq_q | instr_new_id_d) &
+ ~branch_req & ~if_instr_err & ~stall_dummy_instr;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ prev_instr_seq_q <= 1'b0;
+ end else begin
+ prev_instr_seq_q <= prev_instr_seq_d;
+ end
+ end
+
+ assign prev_instr_addr_incr = pc_id_o + (instr_is_compressed_id_o ? 32'd2 : 32'd4);
+
+ `ifdef FPGA
+ // Buffer anticipated next PC address to ensure optimiser cannot remove the check.
+ prim_buf #(.Width(32)) u_prev_instr_addr_incr_buf (
+ .in_i (prev_instr_addr_incr),
+ .out_o(prev_instr_addr_incr_buf)
+ );
+ `else
+ assign prev_instr_addr_incr_buf = prev_instr_addr_incr;
+ `endif
+
+ // Check that the address equals the previous address +2/+4
+ assign pc_mismatch_alert_o = prev_instr_seq_q & (pc_if_o != prev_instr_addr_incr_buf);
+
+ end else begin : g_no_secure_pc
+ assign pc_mismatch_alert_o = 1'b0;
+ end
+
+ if (BranchPredictor) begin : g_branch_predictor
+ logic [31:0] instr_skid_data_q;
+ logic [31:0] instr_skid_addr_q;
+ logic instr_skid_bp_taken_q;
+ logic instr_skid_valid_q, instr_skid_valid_d;
+ logic instr_skid_en;
+ logic instr_bp_taken_q, instr_bp_taken_d;
+
+ logic predict_branch_taken_raw;
+
+ // ID stages needs to know if branch was predicted taken so it can signal mispredicts
+ if (ResetAll) begin : g_bp_taken_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ instr_bp_taken_q <= '0;
+ end else if (if_id_pipe_reg_we) begin
+ instr_bp_taken_q <= instr_bp_taken_d;
+ end
+ end
+ end else begin : g_bp_taken_nr
+ always_ff @(posedge clk_i) begin
+ if (if_id_pipe_reg_we) begin
+ instr_bp_taken_q <= instr_bp_taken_d;
+ end
+ end
+ end
+
+ // When branch prediction is enabled a skid buffer between the IF and ID/EX stage is introduced.
+ // If an instruction in IF is predicted to be a taken branch and ID/EX is not ready the
+ // instruction in IF is moved to the skid buffer which becomes the output of the IF stage until
+ // the ID/EX stage accepts the instruction. The skid buffer is required as otherwise the ID/EX
+ // ready signal is coupled to the instr_req_o output which produces a feedthrough path from
+ // data_gnt_i -> instr_req_o (which needs to be avoided as for some interconnects this will
+ // result in a combinational loop).
+
+ assign instr_skid_en = predict_branch_taken & ~pc_set_i & ~id_in_ready_i & ~instr_skid_valid_q;
+
+ assign instr_skid_valid_d = (instr_skid_valid_q & ~id_in_ready_i & ~stall_dummy_instr) |
+ instr_skid_en;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ instr_skid_valid_q <= 1'b0;
+ end else begin
+ instr_skid_valid_q <= instr_skid_valid_d;
+ end
+ end
+
+ if (ResetAll) begin : g_instr_skid_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ instr_skid_bp_taken_q <= '0;
+ instr_skid_data_q <= '0;
+ instr_skid_addr_q <= '0;
+ end else if (instr_skid_en) begin
+ instr_skid_bp_taken_q <= predict_branch_taken;
+ instr_skid_data_q <= fetch_rdata;
+ instr_skid_addr_q <= fetch_addr;
+ end
+ end
+ end else begin : g_instr_skid_nr
+ always_ff @(posedge clk_i) begin
+ if (instr_skid_en) begin
+ instr_skid_bp_taken_q <= predict_branch_taken;
+ instr_skid_data_q <= fetch_rdata;
+ instr_skid_addr_q <= fetch_addr;
+ end
+ end
+ end
+
+ cheriot_branch_predict branch_predict_i (
+ .clk_i (clk_i),
+ .rst_ni (rst_ni),
+ .fetch_rdata_i(fetch_rdata),
+ .fetch_pc_i (fetch_addr),
+ .fetch_valid_i(fetch_valid),
+
+ .predict_branch_taken_o(predict_branch_taken_raw),
+ .predict_branch_pc_o (predict_branch_pc)
+ );
+
+ // If there is an instruction in the skid buffer there must be no branch prediction.
+ // Instructions are only placed in the skid after they have been predicted to be a taken branch
+ // so with the skid valid any prediction has already occurred.
+ // Do not branch predict on instruction errors.
+ assign predict_branch_taken = predict_branch_taken_raw & ~instr_skid_valid_q & ~fetch_err;
+
+ assign if_instr_valid = fetch_valid | (instr_skid_valid_q & ~nt_branch_mispredict_i);
+ assign if_instr_rdata = instr_skid_valid_q ? instr_skid_data_q : fetch_rdata;
+ assign if_instr_addr = instr_skid_valid_q ? instr_skid_addr_q : fetch_addr;
+
+ // Don't branch predict on instruction error so only instructions without errors end up in the
+ // skid buffer.
+ assign if_instr_bus_err = ~instr_skid_valid_q & fetch_err;
+ assign instr_bp_taken_d = instr_skid_valid_q ? instr_skid_bp_taken_q : predict_branch_taken;
+
+ assign fetch_ready = id_in_ready_i & ~stall_dummy_instr & ~instr_skid_valid_q;
+
+ assign instr_bp_taken_o = instr_bp_taken_q;
+
+ `ASSERT(NoPredictSkid, instr_skid_valid_q |-> ~predict_branch_taken)
+ `ASSERT(NoPredictIllegal, predict_branch_taken |-> ~illegal_c_insn)
+ end else begin : g_no_branch_predictor
+ assign instr_bp_taken_o = 1'b0;
+ assign predict_branch_taken = 1'b0;
+ assign predict_branch_pc = 32'b0;
+
+ assign if_instr_valid = fetch_valid;
+ assign if_instr_rdata = fetch_rdata;
+ assign if_instr_addr = fetch_addr;
+ assign if_instr_bus_err = fetch_err;
+ assign fetch_ready = id_in_ready_i & ~stall_dummy_instr;
+ end
+
+ ////////////////
+ // Assertions //
+ ////////////////
+
+ // Selectors must be known/valid.
+ `ASSERT_KNOWN(IbexExcPcMuxKnown, exc_pc_mux_i)
+
+ if (BranchPredictor) begin : g_branch_predictor_asserts
+ `ASSERT_IF(IbexPcMuxValid, pc_mux_internal inside {
+ PC_BOOT,
+ PC_JUMP,
+ PC_EXC,
+ PC_ERET,
+ PC_DRET,
+ PC_BP},
+ pc_set_i)
+
+`ifdef INC_ASSERT
+ /**
+ * Checks for branch prediction interface to fetch_fifo/icache
+ *
+ * The interface has two signals:
+ * - predicted_branch_i: When set with a branch (branch_i) indicates the branch is a predicted
+ * one, it should be ignored when a branch_i isn't set.
+ * - branch_mispredict_i: Indicates the previously predicted branch was mis-predicted and
+ * execution should resume with the not-taken side of the branch (i.e. continue with the PC
+ * that followed the predicted branch). This must be raised before the instruction that is
+ * made available following a predicted branch is accepted (Following a cycle with branch_i
+ * & predicted_branch_i, branch_mispredict_i can only be asserted before or on the same cycle
+ * as seeing fetch_valid & fetch_ready). When branch_mispredict_i is asserted, fetch_valid may
+ * be asserted in response. If fetch_valid is asserted on the same cycle as
+ * branch_mispredict_i this indicates the fetch_fifo/icache has the not-taken side of the
+ * branch immediately ready for use
+ */
+ logic predicted_branch_live_q, predicted_branch_live_d;
+ logic [31:0] predicted_branch_nt_pc_q, predicted_branch_nt_pc_d;
+ logic [31:0] awaiting_instr_after_mispredict_q, awaiting_instr_after_mispredict_d;
+ logic [31:0] next_pc;
+
+ logic mispredicted, mispredicted_d, mispredicted_q;
+
+ assign next_pc = fetch_addr + (instr_is_compressed_out ? 32'd2 : 32'd4);
+
+ logic predicted_branch;
+
+ // pc_set_i takes precendence over branch prediction
+ assign predicted_branch = predict_branch_taken & ~pc_set_i;
+
+ always_comb begin
+ predicted_branch_live_d = predicted_branch_live_q;
+ mispredicted_d = mispredicted_q;
+
+ if (branch_req & predicted_branch) begin
+ predicted_branch_live_d = 1'b1;
+ mispredicted_d = 1'b0;
+ end else if (predicted_branch_live_q) begin
+ if (fetch_valid & fetch_ready) begin
+ predicted_branch_live_d = 1'b0;
+ end else if (nt_branch_mispredict_i) begin
+ mispredicted_d = 1'b1;
+ end
+ end
+ end
+
+ always @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ predicted_branch_live_q <= 1'b0;
+ mispredicted_q <= 1'b0;
+ end else begin
+ predicted_branch_live_q <= predicted_branch_live_d;
+ mispredicted_q <= mispredicted_d;
+ end
+ end
+
+ always @(posedge clk_i) begin
+ if (branch_req & predicted_branch) begin
+ predicted_branch_nt_pc_q <= next_pc;
+ end
+ end
+
+ // Must only see mispredict after we've performed a predicted branch but before we've accepted
+ // any instruction (with fetch_ready & fetch_valid) that follows that predicted branch.
+ `ASSERT(MispredictOnlyImmediatelyAfterPredictedBranch,
+ nt_branch_mispredict_i |-> predicted_branch_live_q)
+ // Check that on mispredict we get the correct PC for the non-taken side of the branch when
+ // prefetch buffer/icache makes that PC available.
+ `ASSERT(CorrectPCOnMispredict,
+ predicted_branch_live_q & mispredicted_d & fetch_valid |->
+ fetch_addr == predicted_branch_nt_pc_q)
+ // Must not signal mispredict over multiple cycles but it's possible to have back to back
+ // mispredicts for different branches (core signals mispredict, prefetch buffer/icache immediate
+ // has not-taken side of the mispredicted branch ready, which itself is a predicted branch,
+ // following cycle core signal that that branch has mispredicted).
+ `ASSERT(MispredictSingleCycle,
+ nt_branch_mispredict_i & ~(fetch_valid & fetch_ready) |=> ~nt_branch_mispredict_i)
+ // Note that we should never see a mispredict and an incoming branch on the same cycle.
+ // The mispredict also cancels any predicted branch so overall branch_req must be low.
+ `ASSERT(NoMispredBranch, nt_branch_mispredict_i |-> ~branch_req)
+`endif
+
+ end else begin : g_no_branch_predictor_asserts
+ `ASSERT_IF(IbexPcMuxValid, pc_mux_internal inside {
+ PC_BOOT,
+ PC_JUMP,
+ PC_EXC,
+ PC_ERET,
+ PC_DRET},
+ pc_set_i)
+ end
+
+ // Boot address must be aligned to 256 bytes.
+ `ASSERT(IbexBootAddrUnaligned, boot_addr_i[7:0] == 8'h00)
+
+ // Address must not contain X when request is sent.
+ `ASSERT(IbexInstrAddrUnknown, instr_req_o |-> !$isunknown(instr_addr_o))
+
+ // Address must be word aligned when request is sent.
+ `ASSERT(IbexInstrAddrUnaligned, instr_req_o |-> (instr_addr_o[1:0] == 2'b00))
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_load_store_unit.sv b/hw/ip/cheriot-ibex/rtl/cheriot_load_store_unit.sv
new file mode 100644
index 0000000..ebbe74a
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_load_store_unit.sv
@@ -0,0 +1,760 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+
+/**
+ * Load Store Unit
+ *
+ * Load Store Unit, used to eliminate multiple access during processor stalls,
+ * and to align bytes and halfwords.
+ */
+
+`include "prim_assert.sv"
+`include "dv_fcov_macros.svh"
+
+module cheriot_load_store_unit import cheriot_pkg::*; import cheri_pkg::*; #(
+ parameter bit CHERIoTEn = 1'b1,
+ parameter bit MemCapFmt = 1'b0,
+ parameter bit CheriTBRE = 1'b0,
+ parameter bit CheriCapIT8 = 1'b0
+)(
+ input logic clk_i,
+ input logic rst_ni,
+ input logic cheri_pmode_i,
+
+ // data interface
+ output logic data_req_o,
+ output logic data_is_cap_o,
+ input logic data_gnt_i,
+ input logic data_rvalid_i,
+ input logic data_err_i,
+ input logic data_pmp_err_i,
+
+ output logic [31:0] data_addr_o,
+ output logic data_we_o,
+ output logic [3:0] data_be_o,
+ output logic [32:0] data_wdata_o, // kliu
+ input logic [32:0] data_rdata_i, // kliu
+
+ // signals to/from ID/EX stage
+ input logic lsu_we_i, // write enable -> from ID/EX
+ input logic lsu_is_cap_i, // kliu
+ input logic lsu_cheri_err_i, // kliu
+ input logic [1:0] lsu_type_i, // data type: word, half word, byte -> from ID/EX
+ input logic [32:0] lsu_wdata_i, // data to write to memory -> from ID/EX
+ input reg_cap_t lsu_wcap_i, // kliu
+ input logic [3:0] lsu_lc_clrperm_i,
+ input logic lsu_sign_ext_i, // sign extension -> from ID/EX
+ input logic cpu_stall_by_stkz_i,
+ input logic cpu_grant_to_stkz_i,
+
+ output reg_cap_t lsu_rcap_o, // kliu
+ output logic [32:0] lsu_rdata_o, // requested data -> to ID/EX
+ output logic lsu_rdata_valid_o,
+ input logic lsu_req_i, // data request -> from ID/EX
+
+ input logic [31:0] lsu_addr_i, // address computed in ALU -> from ID/EX
+
+ output logic lsu_addr_incr_req_o, // request address increment for
+ // misaligned accesses -> to ID/EX
+ output logic [31:0] addr_last_o, // address of last transaction -> to controller
+ // -> mtval
+ // -> AGU for misaligned accesses
+
+ output logic lsu_req_done_o, // Signals that data request is complete
+ // (only need to await final data
+ // response) -> to ID/EX
+ output logic lsu_resp_valid_o, // LSU has response from transaction -> to ID/EX & WB
+ output logic lsu_resp_is_wr_o,
+
+ // TBRE related signals
+ input logic tbre_lsu_req_i,
+ input logic cpu_lsu_dec_i,
+ output logic lsu_tbre_sel_o, // request-side selection signal
+ output logic lsu_tbre_addr_incr_req_o, // request address increment for
+ output logic [32:0] lsu_tbre_raw_lsw_o,
+ output logic lsu_tbre_req_done_o,
+ output logic lsu_tbre_resp_valid_o, // response from transaction -> to TBRE
+ output logic lsu_tbre_resp_err_o,
+
+ // exception signals
+ output logic load_err_o,
+ output logic store_err_o,
+ output logic lsu_err_is_cheri_o,
+
+ output logic busy_o,
+ output logic busy_tbre_o,
+
+ output logic perf_load_o,
+ output logic perf_store_o
+);
+
+ logic [31:0] data_addr;
+ logic [31:0] data_addr_w_aligned;
+ logic [31:0] addr_last_q, addr_last_d;
+
+ logic addr_update;
+ logic ctrl_update;
+ logic rdata_update;
+ logic [31:8] rdata_q;
+ logic [1:0] rdata_offset_q;
+ logic [1:0] data_type_q;
+ logic data_sign_ext_q;
+ logic data_we_q;
+
+ logic [1:0] data_offset; // mux control for data to be written to memory
+
+ logic [3:0] data_be;
+ logic [32:0] data_wdata;
+
+ logic [32:0] data_rdata_ext;
+
+ logic [32:0] rdata_w_ext; // word realignment for misaligned loads
+ logic [31:0] rdata_h_ext; // sign extension for half words
+ logic [31:0] rdata_b_ext; // sign extension for bytes
+
+ logic split_misaligned_access;
+ logic handle_misaligned_q, handle_misaligned_d; // high after receiving grant for first
+ // part of a misaligned access
+ logic pmp_err_q, pmp_err_d;
+ logic lsu_err_q, lsu_err_d;
+ logic data_or_pmp_err;
+
+ logic resp_is_cap_q;
+ logic cheri_err_d, cheri_err_q;
+ logic [3:0] resp_lc_clrperm_q;
+ logic cur_req_is_tbre;
+ logic req_is_tbre_q;
+ logic resp_is_tbre;
+ logic tbre_req_good;
+ logic outstanding_resp_q, resp_wait;
+ logic lsu_resp_valid;
+ logic lsu_go;
+ logic addr_incr_req;
+ logic cpu_req_erred, cpu_req_valid;
+
+
+ ls_fsm_e ls_fsm_cs, ls_fsm_ns;
+
+ cap_rx_fsm_t cap_rx_fsm_q, cap_rx_fsm_d;
+
+ logic cap_lsw_err_q;
+ logic [32:0] cap_lsw_q;
+
+ assign data_addr = lsu_addr_i;
+ assign data_offset = (cheri_pmode_i & lsu_is_cap_i) ? 2'b00 : data_addr[1:0];
+
+ ///////////////////
+ // BE generation //
+ ///////////////////
+
+ always_comb begin
+ if (CHERIoTEn & cheri_pmode_i & lsu_is_cap_i)
+ data_be = 4'b1111; // caps are always word aligned
+ else begin
+ unique case (lsu_type_i) // Data type 00 Word, 01 Half word, 11,10 byte
+ 2'b00: begin // Writing a word
+ if (!handle_misaligned_q) begin // first part of potentially misaligned transaction
+ unique case (data_offset)
+ 2'b00: data_be = 4'b1111;
+ 2'b01: data_be = 4'b1110;
+ 2'b10: data_be = 4'b1100;
+ 2'b11: data_be = 4'b1000;
+ default: data_be = 4'b1111;
+ endcase // case (data_offset)
+ end else begin // second part of misaligned transaction
+ unique case (data_offset)
+ 2'b00: data_be = 4'b0000; // this is not used, but included for completeness
+ 2'b01: data_be = 4'b0001;
+ 2'b10: data_be = 4'b0011;
+ 2'b11: data_be = 4'b0111;
+ default: data_be = 4'b1111;
+ endcase // case (data_offset)
+ end
+ end
+
+ 2'b01: begin // Writing a half word
+ if (!handle_misaligned_q) begin // first part of potentially misaligned transaction
+ unique case (data_offset)
+ 2'b00: data_be = 4'b0011;
+ 2'b01: data_be = 4'b0110;
+ 2'b10: data_be = 4'b1100;
+ 2'b11: data_be = 4'b1000;
+ default: data_be = 4'b1111;
+ endcase // case (data_offset)
+ end else begin // second part of misaligned transaction
+ data_be = 4'b0001;
+ end
+ end
+
+ 2'b10,
+ 2'b11: begin // Writing a byte
+ unique case (data_offset)
+ 2'b00: data_be = 4'b0001;
+ 2'b01: data_be = 4'b0010;
+ 2'b10: data_be = 4'b0100;
+ 2'b11: data_be = 4'b1000;
+ default: data_be = 4'b1111;
+ endcase // case (data_offset)
+ end
+
+ default: data_be = 4'b1111;
+ endcase // case (lsu_type_i)
+ end // if lsu_cap_i
+ end
+
+ /////////////////////
+ // WData alignment //
+ /////////////////////
+
+ // prepare data to be written to the memory
+ // we handle misaligned accesses, half word and byte accesses here
+ if (~MemCapFmt) begin : gen_memcap_wr_fmt0
+ always_comb begin
+ if (CHERIoTEn & cheri_pmode_i & lsu_is_cap_i && (ls_fsm_cs == CTX_WAIT_GNT2))
+ data_wdata = CheriCapIT8 ? reg2memcap_it8_fmt0(lsu_wcap_i):
+ reg2memcap_fmt0(lsu_wcap_i);
+ else if (CHERIoTEn & cheri_pmode_i & lsu_is_cap_i)
+ data_wdata = lsu_wdata_i;
+ else begin
+ unique case (data_offset)
+ 2'b00: data_wdata = lsu_wdata_i[32:0];
+ 2'b01: data_wdata = {1'b0, lsu_wdata_i[23:0], lsu_wdata_i[31:24]};
+ 2'b10: data_wdata = {1'b0, lsu_wdata_i[15:0], lsu_wdata_i[31:16]};
+ 2'b11: data_wdata = {1'b0, lsu_wdata_i[ 7:0], lsu_wdata_i[31: 8]};
+ default: data_wdata = lsu_wdata_i[32:0];
+ endcase // case (data_offset)
+ end
+ end
+ end else begin : gen_memcap_wr_fmt1
+ logic [65:0] mem_capaddr;
+ assign mem_capaddr = CheriCapIT8 ? reg2mem_it8_fmt1(lsu_wcap_i, lsu_wdata_i) :
+ reg2mem_fmt1(lsu_wcap_i, lsu_wdata_i);
+
+ always_comb begin
+ if (CHERIoTEn & lsu_is_cap_i && (ls_fsm_cs == CTX_WAIT_GNT2))
+ data_wdata = mem_capaddr[65:33];
+ else if (CHERIoTEn & lsu_is_cap_i)
+ data_wdata = mem_capaddr[32:0];
+ else begin
+ unique case (data_offset)
+ 2'b00: data_wdata = lsu_wdata_i[32:0];
+ 2'b01: data_wdata = {1'b0, lsu_wdata_i[23:0], lsu_wdata_i[31:24]};
+ 2'b10: data_wdata = {1'b0, lsu_wdata_i[15:0], lsu_wdata_i[31:16]};
+ 2'b11: data_wdata = {1'b0, lsu_wdata_i[ 7:0], lsu_wdata_i[31: 8]};
+ default: data_wdata = lsu_wdata_i[32:0];
+ endcase // case (data_offset)
+ end
+ end
+ end
+ /////////////////////
+ // RData alignment //
+ /////////////////////
+
+ // register for unaligned rdata
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rdata_q <= '0;
+ end else if (rdata_update) begin
+ rdata_q <= data_rdata_i[31:8];
+ end
+ end
+
+ // registers for transaction control
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rdata_offset_q <= 2'h0;
+ data_type_q <= 2'h0;
+ data_sign_ext_q <= 1'b0;
+ data_we_q <= 1'b0;
+ end else if (ctrl_update) begin
+ rdata_offset_q <= data_offset;
+ data_type_q <= lsu_type_i;
+ data_sign_ext_q <= lsu_sign_ext_i;
+ data_we_q <= lsu_we_i;
+ end
+ end
+
+ // Store last address for mtval + AGU for misaligned transactions. Do not update in case of
+ // errors, mtval needs the (first) failing address. Where an aligned access or the first half of
+ // a misaligned access sees an error provide the calculated access address. For the second half of
+ // a misaligned access provide the word aligned address of the second half.
+ assign addr_last_d = addr_incr_req ? data_addr_w_aligned : data_addr;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ addr_last_q <= '0;
+ end else if (addr_update & ~cur_req_is_tbre) begin
+ addr_last_q <= addr_last_d;
+ end
+ end
+
+ // take care of misaligned words
+ always_comb begin
+ unique case (rdata_offset_q)
+ 2'b00: rdata_w_ext = data_rdata_i[32:0];
+ 2'b01: rdata_w_ext = {1'b0, data_rdata_i[ 7:0], rdata_q[31:8]};
+ 2'b10: rdata_w_ext = {1'b0, data_rdata_i[15:0], rdata_q[31:16]};
+ 2'b11: rdata_w_ext = {1'b0, data_rdata_i[23:0], rdata_q[31:24]};
+ default: rdata_w_ext = data_rdata_i[32:0];
+ endcase
+ end
+
+ ////////////////////
+ // Sign extension //
+ ////////////////////
+
+ // sign extension for half words
+ always_comb begin
+ unique case (rdata_offset_q)
+ 2'b00: begin
+ if (!data_sign_ext_q) begin
+ rdata_h_ext = {16'h0000, data_rdata_i[15:0]};
+ end else begin
+ rdata_h_ext = {{16{data_rdata_i[15]}}, data_rdata_i[15:0]};
+ end
+ end
+
+ 2'b01: begin
+ if (!data_sign_ext_q) begin
+ rdata_h_ext = {16'h0000, data_rdata_i[23:8]};
+ end else begin
+ rdata_h_ext = {{16{data_rdata_i[23]}}, data_rdata_i[23:8]};
+ end
+ end
+
+ 2'b10: begin
+ if (!data_sign_ext_q) begin
+ rdata_h_ext = {16'h0000, data_rdata_i[31:16]};
+ end else begin
+ rdata_h_ext = {{16{data_rdata_i[31]}}, data_rdata_i[31:16]};
+ end
+ end
+
+ 2'b11: begin
+ if (!data_sign_ext_q) begin
+ rdata_h_ext = {16'h0000, data_rdata_i[7:0], rdata_q[31:24]};
+ end else begin
+ rdata_h_ext = {{16{data_rdata_i[7]}}, data_rdata_i[7:0], rdata_q[31:24]};
+ end
+ end
+
+ default: rdata_h_ext = {16'h0000, data_rdata_i[15:0]};
+ endcase // case (rdata_offset_q)
+ end
+
+ // sign extension for bytes
+ always_comb begin
+ unique case (rdata_offset_q)
+ 2'b00: begin
+ if (!data_sign_ext_q) begin
+ rdata_b_ext = {24'h00_0000, data_rdata_i[7:0]};
+ end else begin
+ rdata_b_ext = {{24{data_rdata_i[7]}}, data_rdata_i[7:0]};
+ end
+ end
+
+ 2'b01: begin
+ if (!data_sign_ext_q) begin
+ rdata_b_ext = {24'h00_0000, data_rdata_i[15:8]};
+ end else begin
+ rdata_b_ext = {{24{data_rdata_i[15]}}, data_rdata_i[15:8]};
+ end
+ end
+
+ 2'b10: begin
+ if (!data_sign_ext_q) begin
+ rdata_b_ext = {24'h00_0000, data_rdata_i[23:16]};
+ end else begin
+ rdata_b_ext = {{24{data_rdata_i[23]}}, data_rdata_i[23:16]};
+ end
+ end
+
+ 2'b11: begin
+ if (!data_sign_ext_q) begin
+ rdata_b_ext = {24'h00_0000, data_rdata_i[31:24]};
+ end else begin
+ rdata_b_ext = {{24{data_rdata_i[31]}}, data_rdata_i[31:24]};
+ end
+ end
+
+ default: rdata_b_ext = {24'h00_0000, data_rdata_i[7:0]};
+ endcase // case (rdata_offset_q)
+ end
+
+ // select word, half word or byte sign extended version
+ always_comb begin
+ unique case (data_type_q)
+ 2'b00: data_rdata_ext = rdata_w_ext;
+ 2'b01: data_rdata_ext = {1'b0, rdata_h_ext};
+ 2'b10,2'b11: data_rdata_ext = {1'b0, rdata_b_ext};
+ default: data_rdata_ext = rdata_w_ext;
+ endcase // case (data_type_q)
+ end
+
+ /////////////
+ // LSU FSM //
+ /////////////
+
+ // check for misaligned accesses that need to be split into two word-aligned accesses
+ assign split_misaligned_access =
+ ((lsu_type_i == 2'b00) && (data_offset != 2'b00)) || // misaligned word access
+ ((lsu_type_i == 2'b01) && (data_offset == 2'b11)); // misaligned half-word access
+
+ assign cpu_req_valid = lsu_req_i & ~lsu_cheri_err_i & ~cpu_stall_by_stkz_i;
+ assign cpu_req_erred = lsu_req_i & lsu_cheri_err_i;
+
+ // FSM
+ always_comb begin
+ ls_fsm_ns = ls_fsm_cs;
+
+ data_req_o = 1'b0;
+ addr_incr_req = 1'b0;
+ handle_misaligned_d = handle_misaligned_q;
+ pmp_err_d = pmp_err_q;
+ lsu_err_d = lsu_err_q;
+ cheri_err_d = cheri_err_q & cheri_pmode_i;
+
+ addr_update = 1'b0;
+ ctrl_update = 1'b0;
+ rdata_update = 1'b0;
+
+ perf_load_o = 1'b0;
+ perf_store_o = 1'b0;
+
+ lsu_go = 1'b0;
+
+ unique case (ls_fsm_cs)
+
+ IDLE: begin
+ pmp_err_d = 1'b0;
+ cheri_err_d = 1'b0;
+
+ if (CHERIoTEn & cheri_pmode_i & cpu_req_erred & ~resp_wait) begin
+ // cheri access error case, don't issue data_req but send error response back to WB stage
+ data_req_o = 1'b0;
+ cheri_err_d = 1'b1;
+ ctrl_update = 1'b1; // update ctrl/address so we can report error correctly
+ addr_update = 1'b1;
+ pmp_err_d = 1'b0;
+ lsu_err_d = 1'b0;
+ perf_load_o = 1'b0;
+ lsu_go = 1'b1; // decision to move forward with a request
+ ls_fsm_ns = IDLE;
+ end else if (CHERIoTEn & cheri_pmode_i & (cpu_req_valid | tbre_req_good) &
+ lsu_is_cap_i & ~resp_wait) begin
+ // normal cap access case
+ data_req_o = 1'b1;
+ cheri_err_d = 1'b0;
+ pmp_err_d = data_pmp_err_i;
+ lsu_err_d = 1'b0;
+ perf_load_o = ~lsu_we_i;
+ perf_store_o = lsu_we_i;
+ lsu_go = 1'b1; // decision to move forward with a request
+
+ if (data_gnt_i) begin
+ ctrl_update = 1'b1;
+ addr_update = 1'b1;
+ ls_fsm_ns = CTX_WAIT_GNT2;
+ end else begin
+ ls_fsm_ns = CTX_WAIT_GNT1;
+ end
+ end else if ((cpu_req_valid | tbre_req_good) & ~resp_wait) begin
+ // normal data access case
+ data_req_o = 1'b1;
+ cheri_err_d = 1'b0;
+ pmp_err_d = data_pmp_err_i;
+ lsu_err_d = 1'b0;
+ perf_load_o = ~lsu_we_i;
+ perf_store_o = lsu_we_i;
+ lsu_go = 1'b1; // decision to move forward with a request
+
+ if (data_gnt_i) begin
+ ctrl_update = 1'b1;
+ addr_update = 1'b1;
+ handle_misaligned_d = split_misaligned_access;
+ ls_fsm_ns = split_misaligned_access ? WAIT_RVALID_MIS : IDLE;
+ end else begin
+ ls_fsm_ns = split_misaligned_access ? WAIT_GNT_MIS : WAIT_GNT;
+ end
+ end
+
+ end
+
+ WAIT_GNT_MIS: begin
+ data_req_o = 1'b1;
+ // data_pmp_err_i is valid during the address phase of a request. An error will block the
+ // external request and so a data_gnt_i might never be signalled. The registered version
+ // pmp_err_q is only updated for new address phases and so can be used in WAIT_GNT* and
+ // WAIT_RVALID* states
+ if (data_gnt_i || pmp_err_q ) begin
+ addr_update = 1'b1;
+ ctrl_update = 1'b1;
+ handle_misaligned_d = 1'b1;
+ ls_fsm_ns = WAIT_RVALID_MIS;
+ end
+ end
+
+ WAIT_RVALID_MIS: begin
+ // push out second request
+ data_req_o = 1'b1;
+ // tell ID/EX stage to update the address
+ addr_incr_req = 1'b1;
+
+ // first part rvalid is received, or gets a PMP error
+ if (data_rvalid_i || pmp_err_q) begin
+ // Update the PMP error for the second part
+ pmp_err_d = data_pmp_err_i;
+ // Record the error status of the first part
+ lsu_err_d = data_err_i | pmp_err_q;
+ // Capture the first rdata for loads
+ rdata_update = ~data_we_q;
+ // If already granted, wait for second rvalid
+ ls_fsm_ns = data_gnt_i ? IDLE : WAIT_GNT;
+ // Update the address for the second part, if no error
+ addr_update = data_gnt_i & ~(data_err_i | pmp_err_q);
+ // clear handle_misaligned if second request is granted
+ handle_misaligned_d = ~data_gnt_i;
+ end else begin
+ // first part rvalid is NOT received
+ if (data_gnt_i) begin
+ // second grant is received
+ ls_fsm_ns = WAIT_RVALID_MIS_GNTS_DONE;
+ handle_misaligned_d = 1'b0;
+ end
+ end
+ end
+
+ WAIT_GNT: begin
+ // tell ID/EX stage to update the address
+ addr_incr_req = handle_misaligned_q;
+ data_req_o = 1'b1;
+ if (data_gnt_i || pmp_err_q) begin
+ ctrl_update = 1'b1;
+ // Update the address, unless there was an error
+ addr_update = ~lsu_err_q;
+ ls_fsm_ns = IDLE;
+ handle_misaligned_d = 1'b0;
+ end
+ end
+
+ WAIT_RVALID_MIS_GNTS_DONE: begin
+ // tell ID/EX stage to update the address (to make sure the
+ // second address can be captured correctly for mtval and PMP checking)
+ addr_incr_req = 1'b1;
+ // Wait for the first rvalid, second request is already granted
+ if (data_rvalid_i) begin
+ // Update the pmp error for the second part
+ pmp_err_d = data_pmp_err_i ;
+ // The first part cannot see a PMP error in this state
+ lsu_err_d = data_err_i;
+ // Now we can update the address for the second part if no error
+ addr_update = ~data_err_i;
+ // Capture the first rdata for loads
+ rdata_update = ~data_we_q;
+ // Wait for second rvalid
+ ls_fsm_ns = IDLE;
+ end
+ end
+
+ CTX_WAIT_GNT1: begin
+ if (cheri_pmode_i) begin
+ addr_incr_req = 1'b0;
+ data_req_o = 1'b1;
+ if (data_gnt_i) begin
+ ls_fsm_ns = CTX_WAIT_GNT2;
+ ctrl_update = 1'b1;
+ addr_update = 1'b1;
+ end
+ end else begin
+ ls_fsm_ns = IDLE;
+ end
+ end
+
+ CTX_WAIT_GNT2: begin
+ if (cheri_pmode_i) begin
+ addr_incr_req = 1'b1;
+ data_req_o = 1'b1;
+ if (data_gnt_i && (data_rvalid_i || (cap_rx_fsm_q == CRX_WAIT_RESP2))) ls_fsm_ns = IDLE;
+ else if (data_gnt_i) ls_fsm_ns = CTX_WAIT_RESP;
+ end else begin
+ ls_fsm_ns = IDLE;
+ end
+ end
+
+ CTX_WAIT_RESP: begin // only needed if mem allows 2 active req
+ if (cheri_pmode_i) begin
+ addr_incr_req = 1'b1; // stay 1 to reduce unnecessary addr toggling
+ data_req_o = 1'b0;
+ if (data_rvalid_i) ls_fsm_ns = IDLE;
+ end else begin
+ ls_fsm_ns = IDLE;
+ end
+ end
+
+ default: begin
+ ls_fsm_ns = IDLE;
+ end
+ endcase
+ end
+
+ always_comb begin
+ cap_rx_fsm_d = cap_rx_fsm_q;
+
+ case (cap_rx_fsm_q)
+ CRX_IDLE:
+ if (CHERIoTEn & cheri_pmode_i & lsu_is_cap_i && (ls_fsm_ns != IDLE)) cap_rx_fsm_d = CRX_WAIT_RESP1;
+ CRX_WAIT_RESP1:
+ if (data_rvalid_i) cap_rx_fsm_d = CRX_WAIT_RESP2;
+ CRX_WAIT_RESP2:
+ if (data_rvalid_i && lsu_is_cap_i && (ls_fsm_ns != IDLE)) cap_rx_fsm_d = CRX_WAIT_RESP1;
+ else if (data_rvalid_i) cap_rx_fsm_d = CRX_IDLE;
+ default:;
+ endcase
+ end
+
+ // this is the decision of granting LSU to TBRE/STKZ
+ assign tbre_req_good = CHERIoTEn & cheri_pmode_i & CheriTBRE & tbre_lsu_req_i &
+ (~cpu_lsu_dec_i | (cpu_lsu_dec_i & cpu_grant_to_stkz_i));
+
+ assign resp_wait = CHERIoTEn & cheri_pmode_i & CheriTBRE & outstanding_resp_q & ~lsu_resp_valid;
+
+ // we assume ctrl will be held till req_done asserted
+ // (once req captured in IDLE, it can be deasserted)
+ logic lsu_req_done;
+
+ assign lsu_req_done = (lsu_go | (ls_fsm_cs != IDLE)) & (ls_fsm_ns == IDLE);
+
+ assign lsu_req_done_o = lsu_req_done & (~cur_req_is_tbre);
+ assign lsu_tbre_req_done_o = lsu_req_done & cur_req_is_tbre & cheri_pmode_i;
+
+ assign lsu_addr_incr_req_o = addr_incr_req & ~cur_req_is_tbre;
+ assign lsu_tbre_addr_incr_req_o = addr_incr_req & cur_req_is_tbre;
+
+ assign cur_req_is_tbre = CHERIoTEn & cheri_pmode_i & CheriTBRE & ((ls_fsm_cs == IDLE) ?
+ (tbre_req_good & ~resp_wait) : req_is_tbre_q);
+
+ assign lsu_tbre_sel_o = cur_req_is_tbre; // req ctrl signal mux select (to cheri_ex/tbre_wrapper)
+
+ // registers for FSM
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ ls_fsm_cs <= IDLE;
+ handle_misaligned_q <= '0;
+ pmp_err_q <= '0;
+ lsu_err_q <= '0;
+ resp_is_cap_q <= 1'b0;
+ resp_lc_clrperm_q <= 4'h0;
+ req_is_tbre_q <= 1'b0;
+ cheri_err_q <= 1'b0;
+ cap_rx_fsm_q <= CRX_IDLE;
+ cap_lsw_err_q <= 1'b0;
+ cap_lsw_q <= 33'h0;
+ outstanding_resp_q <= 1'b0;
+ end else begin
+ ls_fsm_cs <= ls_fsm_ns;
+ handle_misaligned_q <= handle_misaligned_d;
+ pmp_err_q <= pmp_err_d;
+ lsu_err_q <= lsu_err_d;
+ cheri_err_q <= cheri_err_d;
+
+ cap_rx_fsm_q <= cap_rx_fsm_d;
+
+ // resp_is_cap_q aligns with responses on the data interface, lsu_is_cap_i aligns with requests
+ // we use lsu_go to qualify this update
+ // - note this implies that LSU only support a outstand request at a time
+ // - new request can't be issued (go) until resp_valid
+ // - also note resp_valid is gated by (ls_fsm_cs == IDLE)
+ if (lsu_go) begin
+ resp_is_cap_q <= lsu_is_cap_i;
+ resp_lc_clrperm_q <= lsu_lc_clrperm_i;
+ req_is_tbre_q <= cur_req_is_tbre;
+ end
+
+ if (CHERIoTEn & cheri_pmode_i && (cap_rx_fsm_q == CRX_WAIT_RESP1) && data_rvalid_i && (~data_we_q))
+ cap_lsw_q <= data_rdata_i;
+
+ if (CHERIoTEn & cheri_pmode_i && (cap_rx_fsm_q == CRX_WAIT_RESP1) && data_rvalid_i)
+ cap_lsw_err_q <= data_err_i;
+
+ if (lsu_go)
+ outstanding_resp_q <= 1'b1;
+ else if (lsu_resp_valid)
+ outstanding_resp_q <= 1'b0;
+
+ end
+ end
+
+ /////////////
+ // Outputs //
+ /////////////
+
+ assign resp_is_tbre = req_is_tbre_q;
+
+ logic all_resp;
+ assign data_or_pmp_err = lsu_err_q | data_err_i | pmp_err_q | (cheri_pmode_i &
+ (cheri_err_q | (resp_is_cap_q & cap_lsw_err_q)));
+
+ assign all_resp = data_rvalid_i | pmp_err_q | (cheri_pmode_i & cheri_err_q);
+ assign lsu_resp_valid = all_resp & (ls_fsm_cs == IDLE) ;
+
+ assign lsu_resp_valid_o = lsu_resp_valid & (~cheri_pmode_i | (~resp_is_tbre)) ;
+ assign lsu_tbre_resp_valid_o = lsu_resp_valid & resp_is_tbre;
+ assign lsu_resp_is_wr_o = data_we_q;
+
+ // this goes to wb as rf_we_lsu, so needs to be gated when data needs to go back to EX
+ assign lsu_rdata_valid_o = (ls_fsm_cs == IDLE) & data_rvalid_i & ~data_or_pmp_err & ~data_we_q &
+ (~cheri_pmode_i | (~resp_is_tbre));
+
+ // output to register file
+ if (CHERIoTEn & ~MemCapFmt) begin : gen_memcap_rd_fmt0
+ assign lsu_rdata_o = (cheri_pmode_i & resp_is_cap_q) ? cap_lsw_q : data_rdata_ext;
+ assign lsu_rcap_o = (resp_is_cap_q && data_rvalid_i && (cap_rx_fsm_q == CRX_WAIT_RESP2) && (~data_or_pmp_err)) ?
+ (CheriCapIT8 ? mem2regcap_it8_fmt0(data_rdata_i, cap_lsw_q, resp_lc_clrperm_q) :
+ mem2regcap_fmt0(data_rdata_i, cap_lsw_q, resp_lc_clrperm_q)) : NULL_REG_CAP;
+ end else if (CHERIoTEn) begin : gen_memcap_rd_fmt1
+ assign lsu_rdata_o = (cheri_pmode_i & resp_is_cap_q) ? mem2regaddr_fmt1(data_rdata_ext, cap_lsw_q, lsu_rcap_o): data_rdata_ext;
+ assign lsu_rcap_o = (resp_is_cap_q && data_rvalid_i && (cap_rx_fsm_q == CRX_WAIT_RESP2) && (~data_or_pmp_err)) ?
+ (CheriCapIT8 ? mem2regcap_it8_fmt1(data_rdata_i, cap_lsw_q, resp_lc_clrperm_q) :
+ mem2regcap_fmt1(data_rdata_i, cap_lsw_q, resp_lc_clrperm_q)) : NULL_REG_CAP;
+ end else begin : gen_no_cap_rd
+ assign lsu_rdata_o = data_rdata_ext;
+ assign lsu_rcap_o = NULL_REG_CAP;
+ end
+
+
+ assign lsu_tbre_raw_lsw_o = cap_lsw_q; // "raw" memory word to tbre
+
+ // output data address must be word aligned
+ assign data_addr_w_aligned = {data_addr[31:2], 2'b00};
+
+ // output to data interface
+ assign data_addr_o = data_addr_w_aligned;
+
+ assign data_wdata_o = data_wdata;
+ assign data_we_o = lsu_we_i;
+ assign data_be_o = data_be;
+
+ assign data_is_cap_o = lsu_is_cap_i;
+
+ // output to ID stage: mtval + AGU for misaligned transactions
+ assign addr_last_o = addr_last_q;
+
+ // Signal a load or store error depending on the transaction type outstanding
+ assign load_err_o = data_or_pmp_err & ~data_we_q & lsu_resp_valid & (~resp_is_tbre);
+ assign store_err_o = data_or_pmp_err & data_we_q & lsu_resp_valid & (~resp_is_tbre);
+
+ assign lsu_err_is_cheri_o = cheri_pmode_i & cheri_err_q; // send to controller for mcause encoding
+ assign lsu_tbre_resp_err_o = cheri_pmode_i & data_or_pmp_err & lsu_resp_valid & resp_is_tbre;
+
+ assign busy_o = (ls_fsm_cs != IDLE);
+ // assign busy_tbre_o = (ls_fsm_cs != IDLE) & cur_req_is_tbre;
+ assign busy_tbre_o = (ls_fsm_cs != IDLE) & cheri_pmode_i & resp_is_tbre;
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_lockstep.sv b/hw/ip/cheriot-ibex/rtl/cheriot_lockstep.sv
new file mode 100644
index 0000000..15815a0
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_lockstep.sv
@@ -0,0 +1,657 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Ibex lockstep module
+// This module instantiates a second copy of the core logic, and compares it's outputs against
+// those from the main core. The second core runs synchronously with the main core, delayed by
+// LockstepOffset cycles.
+module cheriot_lockstep import cheriot_pkg::*; import cheri_pkg::*; #(
+ parameter int unsigned LockstepOffset = 2,
+ parameter bit PMPEnable = 1'b0,
+ parameter int unsigned PMPGranularity = 0,
+ parameter int unsigned PMPNumRegions = 4,
+ parameter int unsigned MHPMCounterNum = 0,
+ parameter int unsigned MHPMCounterWidth = 40,
+ parameter bit RV32E = 1'b0,
+ parameter rv32m_e RV32M = RV32MFast,
+ parameter rv32b_e RV32B = RV32BNone,
+ parameter bit BranchTargetALU = 1'b0,
+ parameter bit WritebackStage = 1'b0,
+ parameter bit ICache = 1'b0,
+ parameter bit ICacheECC = 1'b0,
+ parameter int unsigned BusSizeECC = BUS_SIZE,
+ parameter int unsigned TagSizeECC = IC_TAG_SIZE,
+ parameter int unsigned LineSizeECC = IC_LINE_SIZE,
+ parameter bit BranchPredictor = 1'b0,
+ parameter bit DbgTriggerEn = 1'b0,
+ parameter int unsigned DbgHwBreakNum = 1,
+ parameter bit ResetAll = 1'b0,
+ parameter lfsr_seed_t RndCnstLfsrSeed = RndCnstLfsrSeedDefault,
+ parameter lfsr_perm_t RndCnstLfsrPerm = RndCnstLfsrPermDefault,
+ parameter bit SecureIbex = 1'b0,
+ parameter bit DummyInstructions = 1'b0,
+ parameter bit RegFileECC = 1'b0,
+ parameter int unsigned RegFileDataWidth = 32,
+ parameter int unsigned DmHaltAddr = 32'h1A110800,
+ parameter int unsigned DmExceptionAddr = 32'h1A110808,
+ // CHERIoT paramters
+ parameter bit CHERIoTEn = 1'b1,
+ parameter int unsigned DataWidth = 33,
+ parameter int unsigned HeapBase = 32'h2001_0000,
+ parameter int unsigned TSMapBase = 32'h2002_f000,
+ parameter int unsigned TSMapSize = 1024,
+ parameter bit MemCapFmt = 1'b0,
+ parameter bit CheriPPLBC = 1'b1,
+ parameter bit CheriSBND2 = 1'b0,
+ parameter bit CheriTBRE = 1'b1,
+ parameter int unsigned MMRegDinW = 128,
+ parameter int unsigned MMRegDoutW = 64
+) (
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic [31:0] hart_id_i,
+ input logic [31:0] boot_addr_i,
+ input logic cheri_pmode_i,
+ input logic cheri_tsafe_en_i,
+
+ input logic instr_req_i,
+ input logic instr_gnt_i,
+ input logic instr_rvalid_i,
+ input logic [31:0] instr_addr_i,
+ input logic [31:0] instr_rdata_i,
+ input logic [6:0] instr_rdata_intg_i,
+ input logic instr_err_i,
+
+ input logic data_req_i,
+ input logic data_gnt_i,
+ input logic data_rvalid_i,
+ input logic data_we_i,
+ input logic [3:0] data_be_i,
+ input logic [31:0] data_addr_i,
+ input logic [DataWidth-1:0] data_wdata_i,
+ input logic data_is_cap_i,
+ output logic [6:0] data_wdata_intg_o,
+ input logic [DataWidth-1:0] data_rdata_i,
+ input logic [6:0] data_rdata_intg_i,
+ input logic data_err_i,
+
+ input logic dummy_instr_id_i,
+ input logic [4:0] rf_raddr_a_i,
+ input logic [4:0] rf_raddr_b_i,
+ input logic [4:0] rf_waddr_wb_i,
+ input logic rf_we_wb_i,
+ input logic [RegFileDataWidth-1:0] rf_wdata_wb_ecc_i,
+ input logic [RegFileDataWidth-1:0] rf_rdata_a_ecc_i,
+ input logic [RegFileDataWidth-1:0] rf_rdata_b_ecc_i,
+
+ input reg_cap_t rf_wcap_wb_i,
+ input reg_cap_t rf_rcap_a_i,
+ input reg_cap_t rf_rcap_b_i,
+ input logic [31:0] rf_reg_rdy_i,
+ input logic rf_trsv_en_i,
+ input logic [4:0] rf_trsv_addr_i,
+ input logic [6:0] rf_trsv_par_i,
+ input logic [4:0] rf_trvk_addr_i,
+ input logic rf_trvk_en_i,
+ input logic rf_trvk_clrtag_i,
+ input logic [6:0] rf_trvk_par_i,
+ input logic tsmap_cs_i,
+ input logic [15:0] tsmap_addr_i,
+ input logic [31:0] tsmap_rdata_i,
+ input logic [6:0] tsmap_rdata_intg_i,
+ input logic [MMRegDinW-1:0] mmreg_corein_i,
+ input logic [MMRegDoutW-1:0] mmreg_coreout_i,
+
+ input logic [IC_NUM_WAYS-1:0] ic_tag_req_i,
+ input logic ic_tag_write_i,
+ input logic [IC_INDEX_W-1:0] ic_tag_addr_i,
+ input logic [TagSizeECC-1:0] ic_tag_wdata_i,
+ input logic [TagSizeECC-1:0] ic_tag_rdata_i [IC_NUM_WAYS],
+ input logic [IC_NUM_WAYS-1:0] ic_data_req_i,
+ input logic ic_data_write_i,
+ input logic [IC_INDEX_W-1:0] ic_data_addr_i,
+ input logic [LineSizeECC-1:0] ic_data_wdata_i,
+ input logic [LineSizeECC-1:0] ic_data_rdata_i [IC_NUM_WAYS],
+ input logic ic_scr_key_valid_i,
+
+ input logic irq_software_i,
+ input logic irq_timer_i,
+ input logic irq_external_i,
+ input logic [14:0] irq_fast_i,
+ input logic irq_nm_i,
+ input logic irq_pending_i,
+
+ input logic debug_req_i,
+ input crash_dump_t crash_dump_i,
+ input logic double_fault_seen_i,
+
+ input fetch_enable_t fetch_enable_i,
+ output logic alert_minor_o,
+ output logic alert_major_internal_o,
+ output logic alert_major_bus_o,
+ input logic icache_inval_i,
+ input logic core_busy_i,
+ input logic test_en_i,
+ input logic scan_rst_ni
+);
+
+ localparam int unsigned LockstepOffsetW = $clog2(LockstepOffset);
+ // Core outputs are delayed for an extra cycle due to shadow output registers
+ localparam int unsigned OutputsOffset = LockstepOffset + 1;
+
+ //////////////////////
+ // Reset generation //
+ //////////////////////
+
+ // Upon reset, the comparison is stopped and the shadow core is reset, both immediately. A
+ // counter is started. After LockstepOffset clock cycles:
+ // - The counter is stopped.
+ // - The reset of the shadow core is synchronously released.
+ // The comparison is started in the following clock cycle.
+
+ logic [LockstepOffsetW-1:0] rst_shadow_cnt_d, rst_shadow_cnt_q, rst_shadow_cnt_incr;
+ // Internally generated resets cause IMPERFECTSCH warnings
+ /* verilator lint_off IMPERFECTSCH */
+ logic rst_shadow_set_d, rst_shadow_set_q;
+ logic rst_shadow_n, enable_cmp_q;
+ /* verilator lint_on IMPERFECTSCH */
+
+ assign rst_shadow_cnt_incr = rst_shadow_cnt_q + LockstepOffsetW'(1);
+
+ assign rst_shadow_set_d = (rst_shadow_cnt_q == LockstepOffsetW'(LockstepOffset - 1));
+ assign rst_shadow_cnt_d = rst_shadow_set_d ? rst_shadow_cnt_q : rst_shadow_cnt_incr;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rst_shadow_cnt_q <= '0;
+ enable_cmp_q <= '0;
+ end else begin
+ rst_shadow_cnt_q <= rst_shadow_cnt_d;
+ enable_cmp_q <= rst_shadow_set_q;
+ end
+ end
+
+ // The primitives below are used to place size-only constraints in order to prevent
+ // synthesis optimizations and preserve anchor points for constraining backend tools.
+ prim_flop #(
+ .Width(1),
+ .ResetValue(1'b0)
+ ) u_prim_rst_shadow_set_flop (
+ .clk_i (clk_i),
+ .rst_ni(rst_ni),
+ .d_i (rst_shadow_set_d),
+ .q_o (rst_shadow_set_q)
+ );
+
+ prim_clock_mux2 #(
+ .NoFpgaBufG(1'b1)
+ ) u_prim_rst_shadow_n_mux2 (
+ .clk0_i(rst_shadow_set_q),
+ .clk1_i(scan_rst_ni),
+ .sel_i (test_en_i),
+ .clk_o (rst_shadow_n)
+ );
+
+ //////////////////
+ // Input delays //
+ //////////////////
+
+ typedef struct packed {
+ logic instr_gnt;
+ logic instr_rvalid;
+ logic [31:0] instr_rdata;
+ logic instr_err;
+ logic data_gnt;
+ logic data_rvalid;
+ logic [DataWidth-1:0] data_rdata;
+ logic data_err;
+ logic [RegFileDataWidth-1:0] rf_rdata_a_ecc;
+ logic [RegFileDataWidth-1:0] rf_rdata_b_ecc;
+ logic irq_software;
+ logic irq_timer;
+ logic irq_external;
+ logic [14:0] irq_fast;
+ logic irq_nm;
+ logic debug_req;
+ fetch_enable_t fetch_enable;
+ logic ic_scr_key_valid;
+ logic cheri_pmode;
+ logic cheri_tsafe_en;
+ reg_cap_t rf_rcap_a;
+ reg_cap_t rf_rcap_b;
+ logic [31:0] rf_reg_rdy;
+ logic [31:0] tsmap_rdata;
+ logic [MMRegDinW-1:0] mmreg_corein;
+ } delayed_inputs_t;
+
+ delayed_inputs_t [LockstepOffset-1:0] shadow_inputs_q;
+ delayed_inputs_t shadow_inputs_in;
+ logic [6:0] instr_rdata_intg_q, data_rdata_intg_q;
+ logic [6:0] tsmap_rdata_intg_q;
+ // Packed arrays must be dealt with separately
+ logic [TagSizeECC-1:0] shadow_tag_rdata_q [IC_NUM_WAYS][LockstepOffset];
+ logic [LineSizeECC-1:0] shadow_data_rdata_q [IC_NUM_WAYS][LockstepOffset];
+
+ // Assign the inputs to the delay structure
+ assign shadow_inputs_in.instr_gnt = instr_gnt_i;
+ assign shadow_inputs_in.instr_rvalid = instr_rvalid_i;
+ assign shadow_inputs_in.instr_rdata = instr_rdata_i;
+ assign shadow_inputs_in.instr_err = instr_err_i;
+ assign shadow_inputs_in.data_gnt = data_gnt_i;
+ assign shadow_inputs_in.data_rvalid = data_rvalid_i;
+ assign shadow_inputs_in.data_rdata = data_rdata_i;
+ assign shadow_inputs_in.data_err = data_err_i;
+ assign shadow_inputs_in.rf_rdata_a_ecc = rf_rdata_a_ecc_i;
+ assign shadow_inputs_in.rf_rdata_b_ecc = rf_rdata_b_ecc_i;
+ assign shadow_inputs_in.irq_software = irq_software_i;
+ assign shadow_inputs_in.irq_timer = irq_timer_i;
+ assign shadow_inputs_in.irq_external = irq_external_i;
+ assign shadow_inputs_in.irq_fast = irq_fast_i;
+ assign shadow_inputs_in.irq_nm = irq_nm_i;
+ assign shadow_inputs_in.debug_req = debug_req_i;
+ assign shadow_inputs_in.fetch_enable = fetch_enable_i;
+ assign shadow_inputs_in.ic_scr_key_valid = ic_scr_key_valid_i;
+ assign shadow_inputs_in.cheri_pmode = cheri_pmode_i;
+ assign shadow_inputs_in.cheri_tsafe_en = cheri_tsafe_en_i;
+ assign shadow_inputs_in.rf_rcap_a = rf_rcap_a_i;
+ assign shadow_inputs_in.rf_rcap_b = rf_rcap_b_i;
+ assign shadow_inputs_in.rf_reg_rdy = rf_reg_rdy_i;
+ assign shadow_inputs_in.tsmap_rdata = tsmap_rdata_i;
+ assign shadow_inputs_in.mmreg_corein = mmreg_corein_i;
+
+ // Delay the inputs
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ instr_rdata_intg_q <= '0;
+ data_rdata_intg_q <= '0;
+ tsmap_rdata_intg_q <= '0;
+ for (int unsigned i = 0; i < LockstepOffset; i++) begin
+ shadow_inputs_q[i] <= delayed_inputs_t'('0);
+ shadow_tag_rdata_q[i] <= '{default: 0};
+ shadow_data_rdata_q[i] <= '{default: 0};
+ end
+ end else begin
+ instr_rdata_intg_q <= instr_rdata_intg_i;
+ data_rdata_intg_q <= data_rdata_intg_i;
+ tsmap_rdata_intg_q <= tsmap_rdata_intg_i;
+ for (int unsigned i = 0; i < LockstepOffset - 1; i++) begin
+ shadow_inputs_q[i] <= shadow_inputs_q[i+1];
+ shadow_tag_rdata_q[i] <= shadow_tag_rdata_q[i+1];
+ shadow_data_rdata_q[i] <= shadow_data_rdata_q[i+1];
+ end
+ shadow_inputs_q[LockstepOffset-1] <= shadow_inputs_in;
+ shadow_tag_rdata_q[LockstepOffset-1] <= ic_tag_rdata_i;
+ shadow_data_rdata_q[LockstepOffset-1] <= ic_data_rdata_i;
+ end
+ end
+
+ ////////////////////////////
+ // Bus integrity checking //
+ ////////////////////////////
+
+ logic bus_intg_err;
+ logic [1:0] instr_intg_err, data_intg_err, data_intg_err_tmp;
+ logic [31:0] unused_wdata;
+ logic [1:0] data_we_q;
+ logic [31:0] rdata_tmp;
+
+ always @(posedge clk_i, negedge rst_ni) begin
+ if (~rst_ni) begin
+ data_we_q <= 2'b00;
+ end else begin
+ if (data_gnt_i) data_we_q[1] <= data_we_i;
+ data_we_q[0] <= data_we_q[1]; // align with shadow_inputs_q[LockstepOffset-1]
+ end
+ end
+
+ // Checks on incoming data
+ prim_secded_inv_39_32_dec u_instr_intg_dec (
+ .data_i ({instr_rdata_intg_q, shadow_inputs_q[LockstepOffset-1].instr_rdata}),
+ .data_o (),
+ .syndrome_o (),
+ .err_o (instr_intg_err)
+ );
+
+ if (CHERIoTEn) begin
+ assign rdata_tmp = shadow_inputs_q[LockstepOffset-1].data_rdata[31:0] ^
+ {31'h0, shadow_inputs_q[LockstepOffset-1].data_rdata[32]};
+ end else begin
+ assign rdata_tmp = shadow_inputs_q[LockstepOffset-1].data_rdata[31:0];
+ end
+
+ prim_secded_inv_39_32_dec u_data_intg_dec (
+ .data_i ({data_rdata_intg_q, rdata_tmp}),
+ .data_o (),
+ .syndrome_o (),
+ .err_o (data_intg_err_tmp)
+ );
+
+ // only check read data (data_rvalid includes both reads and writes)
+ assign data_intg_err = data_we_q[0] ? 2'h0 : data_intg_err_tmp;
+
+ assign bus_intg_err = (shadow_inputs_q[LockstepOffset-1].instr_rvalid & |instr_intg_err) |
+ (shadow_inputs_q[LockstepOffset-1].data_rvalid & |data_intg_err);
+
+ // Generate integrity bits
+ if (CHERIoTEn) begin
+ prim_secded_inv_39_32_enc u_data_gen (
+ .data_i (data_wdata_i[31:0]^{31'h0, data_wdata_i[32]}),
+ .data_o ({data_wdata_intg_o, unused_wdata})
+ );
+ end else begin
+ prim_secded_inv_39_32_enc u_data_gen (
+ .data_i (data_wdata_i[31:0]),
+ .data_o ({data_wdata_intg_o, unused_wdata})
+ );
+ end
+
+
+ ////////////////////////////////////////
+ // TSMAP interface integrity checking //
+ ////////////////////////////////////////
+
+ logic tsmap_intg_err;
+ logic [1:0] tsmap_intg_err_tmp;
+ logic [1:0] tsmap_cs_q;
+
+ if (CHERIoTEn && CheriPPLBC) begin
+ always @(posedge clk_i, negedge rst_ni) begin
+ if (~rst_ni) begin
+ tsmap_cs_q <= 2'b00;
+ end else begin
+ tsmap_cs_q <= {tsmap_cs_i, tsmap_cs_q[1]}; // align with shadow_inputs_q[LockstepOffset-1]
+ end
+ end
+
+ // Checks on incoming data
+ prim_secded_inv_39_32_dec u_tsmap_intg_dec (
+ .data_i ({tsmap_rdata_intg_q, shadow_inputs_q[LockstepOffset-1].tsmap_rdata}),
+ .data_o (),
+ .syndrome_o (),
+ .err_o (tsmap_intg_err_tmp)
+ );
+
+ assign tsmap_intg_err = tsmap_cs_q[0] & tsmap_intg_err_tmp;
+
+ end else begin
+ assign tsmap_intg_err = 1'b0;
+ end
+
+ ///////////////////
+ // Output delays //
+ ///////////////////
+
+ typedef struct packed {
+ logic instr_req;
+ logic [31:0] instr_addr;
+ logic data_req;
+ logic data_we;
+ logic [3:0] data_be;
+ logic [31:0] data_addr;
+ logic [DataWidth-1:0] data_wdata;
+ logic data_is_cap;
+ logic dummy_instr_id;
+ logic [4:0] rf_raddr_a;
+ logic [4:0] rf_raddr_b;
+ logic [4:0] rf_waddr_wb;
+ logic rf_we_wb;
+ logic [RegFileDataWidth-1:0] rf_wdata_wb_ecc;
+ logic [IC_NUM_WAYS-1:0] ic_tag_req;
+ logic ic_tag_write;
+ logic [IC_INDEX_W-1:0] ic_tag_addr;
+ logic [TagSizeECC-1:0] ic_tag_wdata;
+ logic [IC_NUM_WAYS-1:0] ic_data_req;
+ logic ic_data_write;
+ logic [IC_INDEX_W-1:0] ic_data_addr;
+ logic [LineSizeECC-1:0] ic_data_wdata;
+ logic irq_pending;
+ crash_dump_t crash_dump;
+ logic double_fault_seen;
+ logic icache_inval;
+ logic core_busy;
+ reg_cap_t rf_wcap_wb;
+ logic rf_trsv_en;
+ logic [4:0] rf_trsv_addr;
+ logic [6:0] rf_trsv_par;
+ logic [4:0] rf_trvk_addr;
+ logic rf_trvk_en;
+ logic rf_trvk_clrtag;
+ logic [6:0] rf_trvk_par;
+ logic tsmap_cs;
+ logic [15:0] tsmap_addr;
+ logic [MMRegDoutW-1:0] mmreg_coreout;
+ } delayed_outputs_t;
+
+ delayed_outputs_t [OutputsOffset-1:0] core_outputs_q;
+ delayed_outputs_t core_outputs_in;
+ delayed_outputs_t shadow_outputs_d, shadow_outputs_q;
+
+ // Assign core outputs to the structure
+ assign core_outputs_in.instr_req = instr_req_i;
+ assign core_outputs_in.instr_addr = instr_addr_i;
+ assign core_outputs_in.data_req = data_req_i;
+ assign core_outputs_in.data_we = data_we_i;
+ assign core_outputs_in.data_be = data_be_i;
+ assign core_outputs_in.data_addr = data_addr_i;
+ assign core_outputs_in.data_wdata = data_wdata_i;
+ assign core_outputs_in.data_is_cap = data_is_cap_i;
+ assign core_outputs_in.dummy_instr_id = dummy_instr_id_i;
+ assign core_outputs_in.rf_raddr_a = rf_raddr_a_i;
+ assign core_outputs_in.rf_raddr_b = rf_raddr_b_i;
+ assign core_outputs_in.rf_waddr_wb = rf_waddr_wb_i;
+ assign core_outputs_in.rf_we_wb = rf_we_wb_i;
+ assign core_outputs_in.rf_wdata_wb_ecc = rf_wdata_wb_ecc_i;
+ assign core_outputs_in.ic_tag_req = ic_tag_req_i;
+ assign core_outputs_in.ic_tag_write = ic_tag_write_i;
+ assign core_outputs_in.ic_tag_addr = ic_tag_addr_i;
+ assign core_outputs_in.ic_tag_wdata = ic_tag_wdata_i;
+ assign core_outputs_in.ic_data_req = ic_data_req_i;
+ assign core_outputs_in.ic_data_write = ic_data_write_i;
+ assign core_outputs_in.ic_data_addr = ic_data_addr_i;
+ assign core_outputs_in.ic_data_wdata = ic_data_wdata_i;
+ assign core_outputs_in.irq_pending = irq_pending_i;
+ assign core_outputs_in.crash_dump = crash_dump_i;
+ assign core_outputs_in.double_fault_seen = double_fault_seen_i;
+ assign core_outputs_in.icache_inval = icache_inval_i;
+ assign core_outputs_in.core_busy = core_busy_i;
+ assign core_outputs_in.rf_wcap_wb = rf_wcap_wb_i;
+ assign core_outputs_in.rf_trsv_en = rf_trsv_en_i;
+ assign core_outputs_in.rf_trsv_addr = rf_trsv_addr_i;
+ assign core_outputs_in.rf_trsv_par = rf_trsv_par_i;
+ assign core_outputs_in.rf_trvk_addr = rf_trvk_addr_i;
+ assign core_outputs_in.rf_trvk_en = rf_trvk_en_i;
+ assign core_outputs_in.rf_trvk_clrtag = rf_trvk_clrtag_i;
+ assign core_outputs_in.rf_trvk_par = rf_trvk_par_i;
+ assign core_outputs_in.tsmap_cs = tsmap_cs_i;
+ assign core_outputs_in.tsmap_addr = tsmap_addr_i;
+ assign core_outputs_in.mmreg_coreout = mmreg_coreout_i;
+
+ // Delay the outputs
+ always_ff @(posedge clk_i) begin
+ for (int unsigned i = 0; i < OutputsOffset - 1; i++) begin
+ core_outputs_q[i] <= core_outputs_q[i+1];
+ end
+ core_outputs_q[OutputsOffset-1] <= core_outputs_in;
+ end
+
+ ///////////////////////////////
+ // Shadow core instantiation //
+ ///////////////////////////////
+
+ logic shadow_alert_minor, shadow_alert_major;
+
+ cheriot_core #(
+ .PMPEnable ( PMPEnable ),
+ .PMPGranularity ( PMPGranularity ),
+ .PMPNumRegions ( PMPNumRegions ),
+ .MHPMCounterNum ( MHPMCounterNum ),
+ .MHPMCounterWidth ( MHPMCounterWidth ),
+ .RV32E ( RV32E ),
+ .RV32M ( RV32M ),
+ .RV32B ( RV32B ),
+ .BranchTargetALU ( BranchTargetALU ),
+ .ICache ( ICache ),
+ .ICacheECC ( ICacheECC ),
+ .BusSizeECC ( BusSizeECC ),
+ .TagSizeECC ( TagSizeECC ),
+ .LineSizeECC ( LineSizeECC ),
+ .BranchPredictor ( BranchPredictor ),
+ .DbgTriggerEn ( DbgTriggerEn ),
+ .DbgHwBreakNum ( DbgHwBreakNum ),
+ .WritebackStage ( WritebackStage ),
+ .ResetAll ( ResetAll ),
+ .RndCnstLfsrSeed ( RndCnstLfsrSeed ),
+ .RndCnstLfsrPerm ( RndCnstLfsrPerm ),
+ .SecureIbex ( SecureIbex ),
+ .DummyInstructions ( DummyInstructions ),
+ .RegFileECC ( RegFileECC ),
+ .RegFileDataWidth ( RegFileDataWidth ),
+ .DmHaltAddr ( DmHaltAddr ),
+ .DmExceptionAddr ( DmExceptionAddr ),
+ .CHERIoTEn ( CHERIoTEn),
+ .DataWidth ( DataWidth),
+ .HeapBase ( HeapBase ),
+ .TSMapBase ( TSMapBase ),
+ .TSMapSize ( TSMapSize),
+ .MemCapFmt ( MemCapFmt ),
+ .CheriPPLBC ( CheriPPLBC),
+ .CheriSBND2 ( CheriSBND2),
+ .CheriTBRE ( CheriTBRE)
+ ) u_shadow_core (
+ .clk_i (clk_i),
+ .rst_ni (rst_shadow_n),
+
+ .hart_id_i (hart_id_i),
+ .boot_addr_i (boot_addr_i),
+
+ .cheri_pmode_i (shadow_inputs_q[0].cheri_pmode),
+ .cheri_tsafe_en_i (shadow_inputs_q[0].cheri_tsafe_en),
+
+ .instr_req_o (shadow_outputs_d.instr_req),
+ .instr_gnt_i (shadow_inputs_q[0].instr_gnt),
+ .instr_rvalid_i (shadow_inputs_q[0].instr_rvalid),
+ .instr_addr_o (shadow_outputs_d.instr_addr),
+ .instr_rdata_i (shadow_inputs_q[0].instr_rdata),
+ .instr_err_i (shadow_inputs_q[0].instr_err),
+
+ .data_req_o (shadow_outputs_d.data_req),
+ .data_gnt_i (shadow_inputs_q[0].data_gnt),
+ .data_rvalid_i (shadow_inputs_q[0].data_rvalid),
+ .data_we_o (shadow_outputs_d.data_we),
+ .data_be_o (shadow_outputs_d.data_be),
+ .data_addr_o (shadow_outputs_d.data_addr),
+ .data_wdata_o (shadow_outputs_d.data_wdata),
+ .data_is_cap_o (shadow_outputs_d.data_is_cap),
+ .data_rdata_i (shadow_inputs_q[0].data_rdata),
+ .data_err_i (shadow_inputs_q[0].data_err),
+
+ .dummy_instr_id_o (shadow_outputs_d.dummy_instr_id),
+ .rf_raddr_a_o (shadow_outputs_d.rf_raddr_a),
+ .rf_raddr_b_o (shadow_outputs_d.rf_raddr_b),
+ .rf_waddr_wb_o (shadow_outputs_d.rf_waddr_wb),
+ .rf_we_wb_o (shadow_outputs_d.rf_we_wb),
+ .rf_wdata_wb_ecc_o (shadow_outputs_d.rf_wdata_wb_ecc),
+ .rf_rdata_a_ecc_i (shadow_inputs_q[0].rf_rdata_a_ecc),
+ .rf_rdata_b_ecc_i (shadow_inputs_q[0].rf_rdata_b_ecc),
+ .rf_wcap_wb_o (shadow_outputs_d.rf_wcap_wb),
+ .rf_rcap_a_i (shadow_inputs_q[0].rf_rcap_a),
+ .rf_rcap_b_i (shadow_inputs_q[0].rf_rcap_b),
+ .rf_reg_rdy_i (shadow_inputs_q[0].rf_reg_rdy),
+ .rf_trsv_en_o (shadow_outputs_d.rf_trsv_en),
+ .rf_trsv_addr_o (shadow_outputs_d.rf_trsv_addr),
+ .rf_trsv_par_o (shadow_outputs_d.rf_trsv_par),
+ .rf_trvk_addr_o (shadow_outputs_d.rf_trvk_addr),
+ .rf_trvk_en_o (shadow_outputs_d.rf_trvk_en),
+ .rf_trvk_clrtag_o (shadow_outputs_d.rf_trvk_clrtag),
+ .rf_trvk_par_o (shadow_outputs_d.rf_trvk_par),
+ .tsmap_cs_o (shadow_outputs_d.tsmap_cs),
+ .tsmap_addr_o (shadow_outputs_d.tsmap_addr),
+ .tsmap_rdata_i (shadow_inputs_q[0].tsmap_rdata),
+ .mmreg_corein_i (shadow_inputs_q[0].mmreg_corein),
+ .mmreg_coreout_o (shadow_outputs_d.mmreg_coreout),
+
+ .ic_tag_req_o (shadow_outputs_d.ic_tag_req),
+ .ic_tag_write_o (shadow_outputs_d.ic_tag_write),
+ .ic_tag_addr_o (shadow_outputs_d.ic_tag_addr),
+ .ic_tag_wdata_o (shadow_outputs_d.ic_tag_wdata),
+ .ic_tag_rdata_i (shadow_tag_rdata_q[0]),
+ .ic_data_req_o (shadow_outputs_d.ic_data_req),
+ .ic_data_write_o (shadow_outputs_d.ic_data_write),
+ .ic_data_addr_o (shadow_outputs_d.ic_data_addr),
+ .ic_data_wdata_o (shadow_outputs_d.ic_data_wdata),
+ .ic_data_rdata_i (shadow_data_rdata_q[0]),
+ .ic_scr_key_valid_i (shadow_inputs_q[0].ic_scr_key_valid),
+
+ .irq_software_i (shadow_inputs_q[0].irq_software),
+ .irq_timer_i (shadow_inputs_q[0].irq_timer),
+ .irq_external_i (shadow_inputs_q[0].irq_external),
+ .irq_fast_i (shadow_inputs_q[0].irq_fast),
+ .irq_nm_i (shadow_inputs_q[0].irq_nm),
+ .irq_pending_o (shadow_outputs_d.irq_pending),
+
+ .debug_req_i (shadow_inputs_q[0].debug_req),
+ .crash_dump_o (shadow_outputs_d.crash_dump),
+ .double_fault_seen_o (shadow_outputs_d.double_fault_seen),
+
+`ifdef RVFI
+ .rvfi_valid (),
+ .rvfi_order (),
+ .rvfi_insn (),
+ .rvfi_trap (),
+ .rvfi_halt (),
+ .rvfi_intr (),
+ .rvfi_mode (),
+ .rvfi_ixl (),
+ .rvfi_rs1_addr (),
+ .rvfi_rs2_addr (),
+ .rvfi_rs3_addr (),
+ .rvfi_rs1_rdata (),
+ .rvfi_rs2_rdata (),
+ .rvfi_rs3_rdata (),
+ .rvfi_rd_addr (),
+ .rvfi_rd_wdata (),
+ .rvfi_pc_rdata (),
+ .rvfi_pc_wdata (),
+ .rvfi_mem_addr (),
+ .rvfi_mem_rmask (),
+ .rvfi_mem_wmask (),
+ .rvfi_mem_rdata (),
+ .rvfi_mem_wdata (),
+ .rvfi_ext_mip (),
+ .rvfi_ext_nmi (),
+ .rvfi_ext_debug_req (),
+ .rvfi_ext_mcycle (),
+ .rvfi_mem_wcap (),
+ .rvfi_mem_rcap (),
+ .rvfi_mem_is_cap (),
+ .rvfi_rd_wcap (),
+ .rvfi_rs2_rcap (),
+ .rvfi_rs1_rcap (),
+`endif
+
+ .fetch_enable_i (shadow_inputs_q[0].fetch_enable),
+ .alert_minor_o (shadow_alert_minor),
+ .alert_major_o (shadow_alert_major),
+ .icache_inval_o (shadow_outputs_d.icache_inval),
+ .core_busy_o (shadow_outputs_d.core_busy)
+ );
+
+ // Register the shadow core outputs
+ always_ff @(posedge clk_i) begin
+ shadow_outputs_q <= shadow_outputs_d;
+ end
+
+ /////////////////////////
+ // Compare the outputs //
+ /////////////////////////
+
+ logic outputs_mismatch;
+
+ assign outputs_mismatch = enable_cmp_q & (shadow_outputs_q != core_outputs_q[0]);
+ assign alert_major_internal_o = outputs_mismatch | shadow_alert_major;
+ assign alert_major_bus_o = bus_intg_err | tsmap_intg_err;
+ assign alert_minor_o = shadow_alert_minor;
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_multdiv_fast.sv b/hw/ip/cheriot-ibex/rtl/cheriot_multdiv_fast.sv
new file mode 100644
index 0000000..522bb6b
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_multdiv_fast.sv
@@ -0,0 +1,531 @@
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+`define OP_L 15:0
+`define OP_H 31:16
+
+/**
+ * Fast Multiplier and Division
+ *
+ * 16x16 kernel multiplier and Long Division
+ */
+
+`include "prim_assert.sv"
+
+module cheriot_multdiv_fast #(
+ parameter cheriot_pkg::rv32m_e RV32M = cheriot_pkg::RV32MFast
+ ) (
+ input logic clk_i,
+ input logic rst_ni,
+ input logic mult_en_i, // dynamic enable signal, for FSM control
+ input logic div_en_i, // dynamic enable signal, for FSM control
+ input logic mult_sel_i, // static decoder output, for data muxes
+ input logic div_sel_i, // static decoder output, for data muxes
+ input cheriot_pkg::md_op_e operator_i,
+ input logic [1:0] signed_mode_i,
+ input logic [31:0] op_a_i,
+ input logic [31:0] op_b_i,
+ input logic [33:0] alu_adder_ext_i,
+ input logic [31:0] alu_adder_i,
+ input logic equal_to_zero_i,
+ input logic data_ind_timing_i,
+
+ output logic [32:0] alu_operand_a_o,
+ output logic [32:0] alu_operand_b_o,
+
+ input logic [33:0] imd_val_q_i[2],
+ output logic [33:0] imd_val_d_o[2],
+ output logic [1:0] imd_val_we_o,
+
+ input logic multdiv_ready_id_i,
+
+ output logic [31:0] multdiv_result_o,
+ output logic valid_o
+);
+
+ import cheriot_pkg::*;
+
+ // Both multiplier variants
+ logic signed [34:0] mac_res_signed;
+ logic [34:0] mac_res_ext;
+ logic [33:0] accum;
+ logic sign_a, sign_b;
+ logic mult_valid;
+ logic signed_mult;
+
+ // Results that become intermediate value depending on whether mul or div is being calculated
+ logic [33:0] mac_res_d, op_remainder_d;
+ // Raw output of MAC calculation
+ logic [33:0] mac_res;
+
+ // Divider signals
+ logic div_sign_a, div_sign_b;
+ logic is_greater_equal;
+ logic div_change_sign, rem_change_sign;
+ logic [31:0] one_shift;
+ logic [31:0] op_denominator_q;
+ logic [31:0] op_numerator_q;
+ logic [31:0] op_quotient_q;
+ logic [31:0] op_denominator_d;
+ logic [31:0] op_numerator_d;
+ logic [31:0] op_quotient_d;
+ logic [31:0] next_remainder;
+ logic [32:0] next_quotient;
+ logic [31:0] res_adder_h;
+ logic div_valid;
+ logic [ 4:0] div_counter_q, div_counter_d;
+ logic multdiv_en;
+ logic mult_hold;
+ logic div_hold;
+ logic div_by_zero_d, div_by_zero_q;
+
+ logic mult_en_internal;
+ logic div_en_internal;
+
+ typedef enum logic [2:0] {
+ MD_IDLE, MD_ABS_A, MD_ABS_B, MD_COMP, MD_LAST, MD_CHANGE_SIGN, MD_FINISH
+ } md_fsm_e;
+ md_fsm_e md_state_q, md_state_d;
+
+ logic unused_mult_sel_i;
+ assign unused_mult_sel_i = mult_sel_i;
+
+ assign mult_en_internal = mult_en_i & ~mult_hold;
+ assign div_en_internal = div_en_i & ~div_hold;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ div_counter_q <= '0;
+ md_state_q <= MD_IDLE;
+ op_numerator_q <= '0;
+ op_quotient_q <= '0;
+ div_by_zero_q <= '0;
+ end else if (div_en_internal) begin
+ div_counter_q <= div_counter_d;
+ op_numerator_q <= op_numerator_d;
+ op_quotient_q <= op_quotient_d;
+ md_state_q <= md_state_d;
+ div_by_zero_q <= div_by_zero_d;
+ end
+ end
+
+ `ASSERT_KNOWN(DivEnKnown, div_en_internal)
+ `ASSERT_KNOWN(MultEnKnown, mult_en_internal)
+ `ASSERT_KNOWN(MultDivEnKnown, multdiv_en)
+
+ assign multdiv_en = mult_en_internal | div_en_internal;
+
+ // Intermediate value register shared with ALU
+ assign imd_val_d_o[0] = div_sel_i ? op_remainder_d : mac_res_d;
+ assign imd_val_we_o[0] = multdiv_en;
+
+ assign imd_val_d_o[1] = {2'b0, op_denominator_d};
+ assign imd_val_we_o[1] = div_en_internal;
+ assign op_denominator_q = imd_val_q_i[1][31:0];
+ logic [1:0] unused_imd_val;
+ assign unused_imd_val = imd_val_q_i[1][33:32];
+ logic unused_mac_res_ext;
+ assign unused_mac_res_ext = mac_res_ext[34];
+
+ assign signed_mult = (signed_mode_i != 2'b00);
+ assign multdiv_result_o = div_sel_i ? imd_val_q_i[0][31:0] : mac_res_d[31:0];
+
+ // The single cycle multiplier uses three 17 bit multipliers to compute MUL instructions in a
+ // single cycle and MULH instructions in two cycles.
+ if (RV32M == RV32MSingleCycle) begin : gen_mult_single_cycle
+
+ typedef enum logic {
+ MULL, MULH
+ } mult_fsm_e;
+ mult_fsm_e mult_state_q, mult_state_d;
+
+ logic signed [33:0] mult1_res, mult2_res, mult3_res;
+ logic [33:0] mult1_res_uns;
+ logic [33:32] unused_mult1_res_uns;
+ logic [15:0] mult1_op_a, mult1_op_b;
+ logic [15:0] mult2_op_a, mult2_op_b;
+ logic [15:0] mult3_op_a, mult3_op_b;
+ logic mult1_sign_a, mult1_sign_b;
+ logic mult2_sign_a, mult2_sign_b;
+ logic mult3_sign_a, mult3_sign_b;
+ logic [33:0] summand1, summand2, summand3;
+
+ assign mult1_res = $signed({mult1_sign_a, mult1_op_a}) * $signed({mult1_sign_b, mult1_op_b});
+ assign mult2_res = $signed({mult2_sign_a, mult2_op_a}) * $signed({mult2_sign_b, mult2_op_b});
+ assign mult3_res = $signed({mult3_sign_a, mult3_op_a}) * $signed({mult3_sign_b, mult3_op_b});
+
+ assign mac_res_signed = $signed(summand1) + $signed(summand2) + $signed(summand3);
+
+ assign mult1_res_uns = $unsigned(mult1_res);
+ assign mac_res_ext = $unsigned(mac_res_signed);
+ assign mac_res = mac_res_ext[33:0];
+
+ assign sign_a = signed_mode_i[0] & op_a_i[31];
+ assign sign_b = signed_mode_i[1] & op_b_i[31];
+
+ // The first two multipliers are only used in state 1 (MULL). We can assign them statically.
+ // al*bl
+ assign mult1_sign_a = 1'b0;
+ assign mult1_sign_b = 1'b0;
+ assign mult1_op_a = op_a_i[`OP_L];
+ assign mult1_op_b = op_b_i[`OP_L];
+
+ // al*bh
+ assign mult2_sign_a = 1'b0;
+ assign mult2_sign_b = sign_b;
+ assign mult2_op_a = op_a_i[`OP_L];
+ assign mult2_op_b = op_b_i[`OP_H];
+
+ // used in MULH
+ assign accum[17:0] = imd_val_q_i[0][33:16];
+ assign accum[33:18] = {16{signed_mult & imd_val_q_i[0][33]}};
+
+ always_comb begin
+ // Default values == MULL
+
+ // ah*bl
+ mult3_sign_a = sign_a;
+ mult3_sign_b = 1'b0;
+ mult3_op_a = op_a_i[`OP_H];
+ mult3_op_b = op_b_i[`OP_L];
+
+ summand1 = {18'h0, mult1_res_uns[`OP_H]};
+ summand2 = $unsigned(mult2_res);
+ summand3 = $unsigned(mult3_res);
+
+ // mac_res = A*B[47:16], mult1_res = A*B[15:0]
+ mac_res_d = {2'b0, mac_res[`OP_L], mult1_res_uns[`OP_L]};
+ mult_valid = mult_en_i;
+ mult_state_d = MULL;
+
+ mult_hold = 1'b0;
+
+ unique case (mult_state_q)
+
+ MULL: begin
+ if (operator_i != MD_OP_MULL) begin
+ mac_res_d = mac_res;
+ mult_valid = 1'b0;
+ mult_state_d = MULH;
+ end else begin
+ mult_hold = ~multdiv_ready_id_i;
+ end
+ end
+
+ MULH: begin
+ // ah*bh
+ mult3_sign_a = sign_a;
+ mult3_sign_b = sign_b;
+ mult3_op_a = op_a_i[`OP_H];
+ mult3_op_b = op_b_i[`OP_H];
+ mac_res_d = mac_res;
+
+ summand1 = '0;
+ summand2 = accum;
+ summand3 = $unsigned(mult3_res);
+
+ mult_state_d = MULL;
+ mult_valid = 1'b1;
+
+ mult_hold = ~multdiv_ready_id_i;
+ end
+
+ default: begin
+ mult_state_d = MULL;
+ end
+
+ endcase // mult_state_q
+ end
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ mult_state_q <= MULL;
+ end else begin
+ if (mult_en_internal) begin
+ mult_state_q <= mult_state_d;
+ end
+ end
+ end
+
+ assign unused_mult1_res_uns = mult1_res_uns[33:32];
+
+ // States must be knwon/valid.
+ `ASSERT_KNOWN(IbexMultStateKnown, mult_state_q)
+
+ // The fast multiplier uses one 17 bit multiplier to compute MUL instructions in 3 cycles
+ // and MULH instructions in 4 cycles.
+ end else begin : gen_mult_fast
+ logic [15:0] mult_op_a;
+ logic [15:0] mult_op_b;
+
+ typedef enum logic [1:0] {
+ ALBL, ALBH, AHBL, AHBH
+ } mult_fsm_e;
+ mult_fsm_e mult_state_q, mult_state_d;
+
+ // The 2 MSBs of mac_res_ext (mac_res_ext[34:33]) are always equal since:
+ // 1. The 2 MSBs of the multiplicants are always equal, and
+ // 2. The 16 MSBs of the addend (accum[33:18]) are always equal.
+ // Thus, it is safe to ignore mac_res_ext[34].
+ assign mac_res_signed =
+ $signed({sign_a, mult_op_a}) * $signed({sign_b, mult_op_b}) + $signed(accum);
+ assign mac_res_ext = $unsigned(mac_res_signed);
+ assign mac_res = mac_res_ext[33:0];
+
+ always_comb begin
+ mult_op_a = op_a_i[`OP_L];
+ mult_op_b = op_b_i[`OP_L];
+ sign_a = 1'b0;
+ sign_b = 1'b0;
+ accum = imd_val_q_i[0];
+ mac_res_d = mac_res;
+ mult_state_d = mult_state_q;
+ mult_valid = 1'b0;
+ mult_hold = 1'b0;
+
+ unique case (mult_state_q)
+
+ ALBL: begin
+ // al*bl
+ mult_op_a = op_a_i[`OP_L];
+ mult_op_b = op_b_i[`OP_L];
+ sign_a = 1'b0;
+ sign_b = 1'b0;
+ accum = '0;
+ mac_res_d = mac_res;
+ mult_state_d = ALBH;
+ end
+
+ ALBH: begin
+ // al*bh<<16
+ mult_op_a = op_a_i[`OP_L];
+ mult_op_b = op_b_i[`OP_H];
+ sign_a = 1'b0;
+ sign_b = signed_mode_i[1] & op_b_i[31];
+ // result of AL*BL (in imd_val_q_i[0]) always unsigned with no carry
+ accum = {18'b0, imd_val_q_i[0][31:16]};
+ if (operator_i == MD_OP_MULL) begin
+ mac_res_d = {2'b0, mac_res[`OP_L], imd_val_q_i[0][`OP_L]};
+ end else begin
+ // MD_OP_MULH
+ mac_res_d = mac_res;
+ end
+ mult_state_d = AHBL;
+ end
+
+ AHBL: begin
+ // ah*bl<<16
+ mult_op_a = op_a_i[`OP_H];
+ mult_op_b = op_b_i[`OP_L];
+ sign_a = signed_mode_i[0] & op_a_i[31];
+ sign_b = 1'b0;
+ if (operator_i == MD_OP_MULL) begin
+ accum = {18'b0, imd_val_q_i[0][31:16]};
+ mac_res_d = {2'b0, mac_res[15:0], imd_val_q_i[0][15:0]};
+ mult_valid = 1'b1;
+
+ // Note no state transition will occur if mult_hold is set
+ mult_state_d = ALBL;
+ mult_hold = ~multdiv_ready_id_i;
+ end else begin
+ accum = imd_val_q_i[0];
+ mac_res_d = mac_res;
+ mult_state_d = AHBH;
+ end
+ end
+
+ AHBH: begin
+ // only MD_OP_MULH here
+ // ah*bh
+ mult_op_a = op_a_i[`OP_H];
+ mult_op_b = op_b_i[`OP_H];
+ sign_a = signed_mode_i[0] & op_a_i[31];
+ sign_b = signed_mode_i[1] & op_b_i[31];
+ accum[17: 0] = imd_val_q_i[0][33:16];
+ accum[33:18] = {16{signed_mult & imd_val_q_i[0][33]}};
+ // result of AH*BL is not signed only if signed_mode_i == 2'b00
+ mac_res_d = mac_res;
+ mult_valid = 1'b1;
+
+ // Note no state transition will occur if mult_hold is set
+ mult_state_d = ALBL;
+ mult_hold = ~multdiv_ready_id_i;
+ end
+ default: begin
+ mult_state_d = ALBL;
+ end
+ endcase // mult_state_q
+ end
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ mult_state_q <= ALBL;
+ end else begin
+ if (mult_en_internal) begin
+ mult_state_q <= mult_state_d;
+ end
+ end
+ end
+
+ // States must be knwon/valid.
+ `ASSERT_KNOWN(IbexMultStateKnown, mult_state_q)
+
+ end // gen_mult_fast
+
+ // Divider
+ assign res_adder_h = alu_adder_ext_i[32:1];
+ logic [1:0] unused_alu_adder_ext;
+ assign unused_alu_adder_ext = {alu_adder_ext_i[33],alu_adder_ext_i[0]};
+
+ assign next_remainder = is_greater_equal ? res_adder_h[31:0] : imd_val_q_i[0][31:0];
+ assign next_quotient = is_greater_equal ? {1'b0, op_quotient_q} | {1'b0, one_shift} :
+ {1'b0, op_quotient_q};
+
+ assign one_shift = {31'b0, 1'b1} << div_counter_q;
+
+ // The adder in the ALU computes alu_operand_a_o + alu_operand_b_o which means
+ // Remainder - Divisor. If Remainder - Divisor >= 0, is_greater_equal is equal to 1,
+ // the next Remainder is Remainder - Divisor contained in res_adder_h and the
+ always_comb begin
+ if ((imd_val_q_i[0][31] ^ op_denominator_q[31]) == 1'b0) begin
+ is_greater_equal = (res_adder_h[31] == 1'b0);
+ end else begin
+ is_greater_equal = imd_val_q_i[0][31];
+ end
+ end
+
+ assign div_sign_a = op_a_i[31] & signed_mode_i[0];
+ assign div_sign_b = op_b_i[31] & signed_mode_i[1];
+ assign div_change_sign = (div_sign_a ^ div_sign_b) & ~div_by_zero_q;
+ assign rem_change_sign = div_sign_a;
+
+
+ always_comb begin
+ div_counter_d = div_counter_q - 5'h1;
+ op_remainder_d = imd_val_q_i[0];
+ op_quotient_d = op_quotient_q;
+ md_state_d = md_state_q;
+ op_numerator_d = op_numerator_q;
+ op_denominator_d = op_denominator_q;
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~op_b_i, 1'b1};
+ div_valid = 1'b0;
+ div_hold = 1'b0;
+ div_by_zero_d = div_by_zero_q;
+
+ unique case (md_state_q)
+ MD_IDLE: begin
+ if (operator_i == MD_OP_DIV) begin
+ // Check if the Denominator is 0
+ // quotient for division by 0 is specified to be -1
+ // Note with data-independent time option, the full divide operation will proceed as
+ // normal and will naturally return -1
+ op_remainder_d = '1;
+ md_state_d = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A;
+ // Record that this is a div by zero to stop the sign change at the end of the
+ // division (in data_ind_timing mode).
+ div_by_zero_d = equal_to_zero_i;
+ end else begin
+ // Check if the Denominator is 0
+ // remainder for division by 0 is specified to be the numerator (operand a)
+ // Note with data-independent time option, the full divide operation will proceed as
+ // normal and will naturally return operand a
+ op_remainder_d = {2'b0, op_a_i};
+ md_state_d = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A;
+ end
+ // 0 - B = 0 iff B == 0
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~op_b_i, 1'b1};
+ div_counter_d = 5'd31;
+ end
+
+ MD_ABS_A: begin
+ // quotient
+ op_quotient_d = '0;
+ // A abs value
+ op_numerator_d = div_sign_a ? alu_adder_i : op_a_i;
+ md_state_d = MD_ABS_B;
+ div_counter_d = 5'd31;
+ // ABS(A) = 0 - A
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~op_a_i, 1'b1};
+ end
+
+ MD_ABS_B: begin
+ // remainder
+ op_remainder_d = { 33'h0, op_numerator_q[31]};
+ // B abs value
+ op_denominator_d = div_sign_b ? alu_adder_i : op_b_i;
+ md_state_d = MD_COMP;
+ div_counter_d = 5'd31;
+ // ABS(B) = 0 - B
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~op_b_i, 1'b1};
+ end
+
+ MD_COMP: begin
+ op_remainder_d = {1'b0, next_remainder[31:0], op_numerator_q[div_counter_d]};
+ op_quotient_d = next_quotient[31:0];
+ md_state_d = (div_counter_q == 5'd1) ? MD_LAST : MD_COMP;
+ // Division
+ alu_operand_a_o = {imd_val_q_i[0][31:0], 1'b1}; // it contains the remainder
+ alu_operand_b_o = {~op_denominator_q[31:0], 1'b1}; // -denominator two's compliment
+ end
+
+ MD_LAST: begin
+ if (operator_i == MD_OP_DIV) begin
+ // this time we save the quotient in op_remainder_d (i.e. imd_val_q_i[0]) since
+ // we do not need anymore the remainder
+ op_remainder_d = {1'b0, next_quotient};
+ end else begin
+ // this time we do not save the quotient anymore since we need only the remainder
+ op_remainder_d = {2'b0, next_remainder[31:0]};
+ end
+ // Division
+ alu_operand_a_o = {imd_val_q_i[0][31:0], 1'b1}; // it contains the remainder
+ alu_operand_b_o = {~op_denominator_q[31:0], 1'b1}; // -denominator two's compliment
+
+ md_state_d = MD_CHANGE_SIGN;
+ end
+
+ MD_CHANGE_SIGN: begin
+ md_state_d = MD_FINISH;
+ if (operator_i == MD_OP_DIV) begin
+ op_remainder_d = (div_change_sign) ? {2'h0, alu_adder_i} : imd_val_q_i[0];
+ end else begin
+ op_remainder_d = (rem_change_sign) ? {2'h0, alu_adder_i} : imd_val_q_i[0];
+ end
+ // ABS(Quotient) = 0 - Quotient (or Remainder)
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~imd_val_q_i[0][31:0], 1'b1};
+ end
+
+ MD_FINISH: begin
+ // Hold result until ID stage is ready to accept it
+ // Note no state transition will occur if div_hold is set
+ md_state_d = MD_IDLE;
+ div_hold = ~multdiv_ready_id_i;
+ div_valid = 1'b1;
+ end
+
+ default: begin
+ md_state_d = MD_IDLE;
+ end
+ endcase // md_state_q
+ end
+
+ assign valid_o = mult_valid | div_valid;
+
+ // States must be knwon/valid.
+ `ASSERT(IbexMultDivStateValid, md_state_q inside {
+ MD_IDLE, MD_ABS_A, MD_ABS_B, MD_COMP, MD_LAST, MD_CHANGE_SIGN, MD_FINISH})
+
+`ifdef FORMAL
+ `ifdef YOSYS
+ `include "formal_tb_frag.svh"
+ `endif
+`endif
+
+endmodule // ibex_mult
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_multdiv_slow.sv b/hw/ip/cheriot-ibex/rtl/cheriot_multdiv_slow.sv
new file mode 100644
index 0000000..8fbc929
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_multdiv_slow.sv
@@ -0,0 +1,374 @@
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Slow Multiplier and Division
+ *
+ * Baugh-Wooley multiplier and Long Division
+ */
+
+`include "prim_assert.sv"
+
+module cheriot_multdiv_slow
+(
+ input logic clk_i,
+ input logic rst_ni,
+ input logic mult_en_i, // dynamic enable signal, for FSM control
+ input logic div_en_i, // dynamic enable signal, for FSM control
+ input logic mult_sel_i, // static decoder output, for data muxes
+ input logic div_sel_i, // static decoder output, for data muxes
+ input cheriot_pkg::md_op_e operator_i,
+ input logic [1:0] signed_mode_i,
+ input logic [31:0] op_a_i,
+ input logic [31:0] op_b_i,
+ input logic [33:0] alu_adder_ext_i,
+ input logic [31:0] alu_adder_i,
+ input logic equal_to_zero_i,
+ input logic data_ind_timing_i,
+
+ output logic [32:0] alu_operand_a_o,
+ output logic [32:0] alu_operand_b_o,
+
+ input logic [33:0] imd_val_q_i[2],
+ output logic [33:0] imd_val_d_o[2],
+ output logic [1:0] imd_val_we_o,
+
+ input logic multdiv_ready_id_i,
+
+ output logic [31:0] multdiv_result_o,
+
+ output logic valid_o
+);
+
+ import cheriot_pkg::*;
+
+ typedef enum logic [2:0] {
+ MD_IDLE, MD_ABS_A, MD_ABS_B, MD_COMP, MD_LAST, MD_CHANGE_SIGN, MD_FINISH
+ } md_fsm_e;
+ md_fsm_e md_state_q, md_state_d;
+
+ logic [32:0] accum_window_q, accum_window_d;
+ logic unused_imd_val0;
+ logic [ 1:0] unused_imd_val1;
+
+ logic [32:0] res_adder_l;
+ logic [32:0] res_adder_h;
+
+ logic [ 4:0] multdiv_count_q, multdiv_count_d;
+ logic [32:0] op_b_shift_q, op_b_shift_d;
+ logic [32:0] op_a_shift_q, op_a_shift_d;
+ logic [32:0] op_a_ext, op_b_ext;
+ logic [32:0] one_shift;
+ logic [32:0] op_a_bw_pp, op_a_bw_last_pp;
+ logic [31:0] b_0;
+ logic sign_a, sign_b;
+ logic [32:0] next_quotient;
+ logic [31:0] next_remainder;
+ logic [31:0] op_numerator_q, op_numerator_d;
+ logic is_greater_equal;
+ logic div_change_sign, rem_change_sign;
+ logic div_by_zero_d, div_by_zero_q;
+ logic multdiv_hold;
+ logic multdiv_en;
+
+ // (accum_window_q + op_a_shift_q)
+ assign res_adder_l = alu_adder_ext_i[32:0];
+ // (accum_window_q + op_a_shift_q)>>1
+ assign res_adder_h = alu_adder_ext_i[33:1];
+
+ /////////////////////
+ // ALU Operand MUX //
+ /////////////////////
+
+ // Intermediate value register shared with ALU
+ assign imd_val_d_o[0] = {1'b0,accum_window_d};
+ assign imd_val_we_o[0] = ~multdiv_hold;
+ assign accum_window_q = imd_val_q_i[0][32:0];
+ assign unused_imd_val0 = imd_val_q_i[0][33];
+
+ assign imd_val_d_o[1] = {2'b00, op_numerator_d};
+ assign imd_val_we_o[1] = multdiv_en;
+ assign op_numerator_q = imd_val_q_i[1][31:0];
+ assign unused_imd_val1 = imd_val_q_i[1][33:32];
+
+ always_comb begin
+ alu_operand_a_o = accum_window_q;
+
+ unique case (operator_i)
+
+ MD_OP_MULL: begin
+ alu_operand_b_o = op_a_bw_pp;
+ end
+
+ MD_OP_MULH: begin
+ alu_operand_b_o = (md_state_q == MD_LAST) ? op_a_bw_last_pp : op_a_bw_pp;
+ end
+
+ MD_OP_DIV,
+ MD_OP_REM: begin
+ unique case (md_state_q)
+ MD_IDLE: begin
+ // 0 - B = 0 iff B == 0
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~op_b_i, 1'b1};
+ end
+ MD_ABS_A: begin
+ // ABS(A) = 0 - A
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~op_a_i, 1'b1};
+ end
+ MD_ABS_B: begin
+ // ABS(B) = 0 - B
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~op_b_i, 1'b1};
+ end
+ MD_CHANGE_SIGN: begin
+ // ABS(Quotient) = 0 - Quotient (or Reminder)
+ alu_operand_a_o = {32'h0 , 1'b1};
+ alu_operand_b_o = {~accum_window_q[31:0], 1'b1};
+ end
+ default: begin
+ // Division
+ alu_operand_a_o = {accum_window_q[31:0], 1'b1}; // it contains the remainder
+ alu_operand_b_o = {~op_b_shift_q[31:0], 1'b1}; // -denominator two's compliment
+ end
+ endcase
+ end
+ default: begin
+ alu_operand_a_o = accum_window_q;
+ alu_operand_b_o = {~op_b_shift_q[31:0], 1'b1};
+ end
+ endcase
+ end
+
+ // Multiplier partial product calculation
+ assign b_0 = {32{op_b_shift_q[0]}};
+ assign op_a_bw_pp = { ~(op_a_shift_q[32] & op_b_shift_q[0]), (op_a_shift_q[31:0] & b_0) };
+ assign op_a_bw_last_pp = { (op_a_shift_q[32] & op_b_shift_q[0]), ~(op_a_shift_q[31:0] & b_0) };
+
+ // Sign extend the input operands
+ assign sign_a = op_a_i[31] & signed_mode_i[0];
+ assign sign_b = op_b_i[31] & signed_mode_i[1];
+
+ assign op_a_ext = {sign_a, op_a_i};
+ assign op_b_ext = {sign_b, op_b_i};
+
+ // Divider calculations
+
+ // The adder in the ALU computes Remainder - Divisor. If Remainder - Divisor >= 0,
+ // is_greater_equal is true, the next Remainder is the subtraction result and the Quotient
+ // multdiv_count_q-th bit is set to 1.
+ assign is_greater_equal = (accum_window_q[31] == op_b_shift_q[31]) ?
+ ~res_adder_h[31] : accum_window_q[31];
+
+ assign one_shift = {32'b0, 1'b1} << multdiv_count_q;
+
+ assign next_remainder = is_greater_equal ? res_adder_h[31:0] : accum_window_q[31:0];
+ assign next_quotient = is_greater_equal ? op_a_shift_q | one_shift : op_a_shift_q;
+
+ assign div_change_sign = (sign_a ^ sign_b) & ~div_by_zero_q;
+ assign rem_change_sign = sign_a;
+
+ always_comb begin
+ multdiv_count_d = multdiv_count_q;
+ accum_window_d = accum_window_q;
+ op_b_shift_d = op_b_shift_q;
+ op_a_shift_d = op_a_shift_q;
+ op_numerator_d = op_numerator_q;
+ md_state_d = md_state_q;
+ multdiv_hold = 1'b0;
+ div_by_zero_d = div_by_zero_q;
+ if (mult_sel_i || div_sel_i) begin
+ unique case (md_state_q)
+ MD_IDLE: begin
+ unique case (operator_i)
+ MD_OP_MULL: begin
+ op_a_shift_d = op_a_ext << 1;
+ accum_window_d = { ~(op_a_ext[32] & op_b_i[0]),
+ op_a_ext[31:0] & {32{op_b_i[0]}} };
+ op_b_shift_d = op_b_ext >> 1;
+ // Proceed with multiplication by 0/1 in data-independent time mode
+ md_state_d = (!data_ind_timing_i && ((op_b_ext >> 1) == 0)) ? MD_LAST : MD_COMP;
+ end
+ MD_OP_MULH: begin
+ op_a_shift_d = op_a_ext;
+ accum_window_d = { 1'b1, ~(op_a_ext[32] & op_b_i[0]),
+ op_a_ext[31:1] & {31{op_b_i[0]}} };
+ op_b_shift_d = op_b_ext >> 1;
+ md_state_d = MD_COMP;
+ end
+ MD_OP_DIV: begin
+ // Check if the denominator is 0
+ // quotient for division by 0 is specified to be -1
+ // Note with data-independent time option, the full divide operation will proceed as
+ // normal and will naturally return -1
+ accum_window_d = {33{1'b1}};
+ md_state_d = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A;
+ // Record that this is a div by zero to stop the sign change at the end of the
+ // division (in data_ind_timing mode).
+ div_by_zero_d = equal_to_zero_i;
+ end
+ MD_OP_REM: begin
+ // Check if the denominator is 0
+ // remainder for division by 0 is specified to be the numerator (operand a)
+ // Note with data-independent time option, the full divide operation will proceed as
+ // normal and will naturally return operand a
+ accum_window_d = op_a_ext;
+ md_state_d = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A;
+ end
+ default:;
+ endcase
+ multdiv_count_d = 5'd31;
+ end
+
+ MD_ABS_A: begin
+ // quotient
+ op_a_shift_d = '0;
+ // A abs value
+ op_numerator_d = sign_a ? alu_adder_i : op_a_i;
+ md_state_d = MD_ABS_B;
+ end
+
+ MD_ABS_B: begin
+ // remainder
+ accum_window_d = {32'h0, op_numerator_q[31]};
+ // B abs value
+ op_b_shift_d = sign_b ? {1'b0, alu_adder_i} : {1'b0, op_b_i};
+ md_state_d = MD_COMP;
+ end
+
+ MD_COMP: begin
+ multdiv_count_d = multdiv_count_q - 5'h1;
+ unique case (operator_i)
+ MD_OP_MULL: begin
+ accum_window_d = res_adder_l;
+ op_a_shift_d = op_a_shift_q << 1;
+ op_b_shift_d = op_b_shift_q >> 1;
+ // Multiplication is complete once op_b is zero, unless in data_ind_timing mode where
+ // the maximum possible shift-add operations will be completed regardless of op_b
+ md_state_d = ((!data_ind_timing_i && (op_b_shift_d == 0)) ||
+ (multdiv_count_q == 5'd1)) ? MD_LAST : MD_COMP;
+ end
+ MD_OP_MULH: begin
+ accum_window_d = res_adder_h;
+ op_a_shift_d = op_a_shift_q;
+ op_b_shift_d = op_b_shift_q >> 1;
+ md_state_d = (multdiv_count_q == 5'd1) ? MD_LAST : MD_COMP;
+ end
+ MD_OP_DIV,
+ MD_OP_REM: begin
+ accum_window_d = {next_remainder[31:0], op_numerator_q[multdiv_count_d]};
+ op_a_shift_d = next_quotient;
+ md_state_d = (multdiv_count_q == 5'd1) ? MD_LAST : MD_COMP;
+ end
+ default: ;
+ endcase
+ end
+
+ MD_LAST: begin
+ unique case (operator_i)
+ MD_OP_MULL: begin
+ accum_window_d = res_adder_l;
+
+ // Note no state transition will occur if multdiv_hold is set
+ md_state_d = MD_IDLE;
+ multdiv_hold = ~multdiv_ready_id_i;
+ end
+ MD_OP_MULH: begin
+ accum_window_d = res_adder_l;
+ md_state_d = MD_IDLE;
+
+ // Note no state transition will occur if multdiv_hold is set
+ md_state_d = MD_IDLE;
+ multdiv_hold = ~multdiv_ready_id_i;
+ end
+ MD_OP_DIV: begin
+ // this time we save the quotient in accum_window_q since we do not need anymore the
+ // remainder
+ accum_window_d = next_quotient;
+ md_state_d = MD_CHANGE_SIGN;
+ end
+ MD_OP_REM: begin
+ // this time we do not save the quotient anymore since we need only the remainder
+ accum_window_d = {1'b0, next_remainder[31:0]};
+ md_state_d = MD_CHANGE_SIGN;
+ end
+ default: ;
+ endcase
+ end
+
+ MD_CHANGE_SIGN: begin
+ md_state_d = MD_FINISH;
+ unique case (operator_i)
+ MD_OP_DIV:
+ accum_window_d = div_change_sign ? {1'b0,alu_adder_i} : accum_window_q;
+ MD_OP_REM:
+ accum_window_d = rem_change_sign ? {1'b0,alu_adder_i} : accum_window_q;
+ default: ;
+ endcase
+ end
+
+ MD_FINISH: begin
+ // Note no state transition will occur if multdiv_hold is set
+ md_state_d = MD_IDLE;
+ multdiv_hold = ~multdiv_ready_id_i;
+ end
+
+ default: begin
+ md_state_d = MD_IDLE;
+ end
+ endcase // md_state_q
+ end // (mult_sel_i || div_sel_i)
+ end
+
+ //////////////////////////////////////////
+ // Mutliplier / Divider state registers //
+ //////////////////////////////////////////
+
+ assign multdiv_en = (mult_en_i | div_en_i) & ~multdiv_hold;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ multdiv_count_q <= 5'h0;
+ op_b_shift_q <= 33'h0;
+ op_a_shift_q <= 33'h0;
+ md_state_q <= MD_IDLE;
+ div_by_zero_q <= 1'b0;
+ end else if (multdiv_en) begin
+ multdiv_count_q <= multdiv_count_d;
+ op_b_shift_q <= op_b_shift_d;
+ op_a_shift_q <= op_a_shift_d;
+ md_state_q <= md_state_d;
+ div_by_zero_q <= div_by_zero_d;
+ end
+ end
+
+ /////////////
+ // Outputs //
+ /////////////
+
+ assign valid_o = (md_state_q == MD_FINISH) |
+ (md_state_q == MD_LAST &
+ (operator_i == MD_OP_MULL |
+ operator_i == MD_OP_MULH));
+
+ assign multdiv_result_o = div_en_i ? accum_window_q[31:0] : res_adder_l[31:0];
+
+ ////////////////
+ // Assertions //
+ ////////////////
+
+ // State must be valid.
+ `ASSERT(IbexMultDivStateValid, md_state_q inside {
+ MD_IDLE, MD_ABS_A, MD_ABS_B, MD_COMP, MD_LAST, MD_CHANGE_SIGN, MD_FINISH
+ }, clk_i, !rst_ni)
+
+`ifdef FORMAL
+ `ifdef YOSYS
+ `include "formal_tb_frag.svh"
+ `endif
+`endif
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_pkg.sv b/hw/ip/cheriot-ibex/rtl/cheriot_pkg.sv
new file mode 100644
index 0000000..d40fd94
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_pkg.sv
@@ -0,0 +1,676 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright lowRISC contributors.
+// Copyright 2017 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Package with constants used by Ibex
+ */
+package cheriot_pkg;
+
+ ////////////////
+ // IO Structs //
+ ////////////////
+
+ typedef struct packed {
+ logic [31:0] current_pc;
+ logic [31:0] next_pc;
+ logic [31:0] last_data_addr;
+ logic [31:0] exception_addr;
+ } crash_dump_t;
+
+ typedef struct packed {
+ logic dummy_instr_id;
+ logic [4:0] raddr_a;
+ logic [4:0] waddr_a;
+ logic we_a;
+ logic [4:0] raddr_b;
+ } core2rf_t;
+
+ /////////////////////
+ // Parameter Enums //
+ /////////////////////
+
+ typedef enum integer {
+ RegFileFF = 0,
+ RegFileFPGA = 1,
+ RegFileLatch = 2
+ } regfile_e;
+
+ typedef enum integer {
+ RV32MNone = 0,
+ RV32MSlow = 1,
+ RV32MFast = 2,
+ RV32MSingleCycle = 3
+ } rv32m_e;
+
+ typedef enum integer {
+ RV32BNone = 0,
+ RV32BBalanced = 1,
+ RV32BOTEarlGrey = 2,
+ RV32BFull = 3
+ } rv32b_e;
+
+ /////////////
+ // Opcodes //
+ /////////////
+
+ typedef enum logic [6:0] {
+ OPCODE_LOAD = 7'h03,
+ OPCODE_MISC_MEM = 7'h0f,
+ OPCODE_OP_IMM = 7'h13,
+ OPCODE_AUIPC = 7'h17,
+ OPCODE_STORE = 7'h23,
+ OPCODE_OP = 7'h33,
+ OPCODE_LUI = 7'h37,
+ OPCODE_BRANCH = 7'h63,
+ OPCODE_JALR = 7'h67,
+ OPCODE_JAL = 7'h6f,
+ OPCODE_SYSTEM = 7'h73,
+ OPCODE_CHERI = 7'h5b,
+ OPCODE_AUICGP = 7'h7b
+ } opcode_e;
+
+
+ ////////////////////
+ // ALU operations //
+ ////////////////////
+
+ typedef enum logic [6:0] {
+ // Arithmetics
+ ALU_ADD,
+ ALU_SUB,
+
+ // Logics
+ ALU_XOR,
+ ALU_OR,
+ ALU_AND,
+ // RV32B
+ ALU_XNOR,
+ ALU_ORN,
+ ALU_ANDN,
+
+ // Shifts
+ ALU_SRA,
+ ALU_SRL,
+ ALU_SLL,
+ // RV32B
+ ALU_SRO,
+ ALU_SLO,
+ ALU_ROR,
+ ALU_ROL,
+ ALU_GREV,
+ ALU_GORC,
+ ALU_SHFL,
+ ALU_UNSHFL,
+ ALU_XPERM_N,
+ ALU_XPERM_B,
+ ALU_XPERM_H,
+
+ // Address Calculations
+ // RV32B
+ ALU_SH1ADD,
+ ALU_SH2ADD,
+ ALU_SH3ADD,
+
+ // Comparisons
+ ALU_LT,
+ ALU_LTU,
+ ALU_GE,
+ ALU_GEU,
+ ALU_EQ,
+ ALU_NE,
+ // RV32B
+ ALU_MIN,
+ ALU_MINU,
+ ALU_MAX,
+ ALU_MAXU,
+
+ // Pack
+ // RV32B
+ ALU_PACK,
+ ALU_PACKU,
+ ALU_PACKH,
+
+ // Sign-Extend
+ // RV32B
+ ALU_SEXTB,
+ ALU_SEXTH,
+
+ // Bitcounting
+ // RV32B
+ ALU_CLZ,
+ ALU_CTZ,
+ ALU_CPOP,
+
+ // Set lower than
+ ALU_SLT,
+ ALU_SLTU,
+
+ // Ternary Bitmanip Operations
+ // RV32B
+ ALU_CMOV,
+ ALU_CMIX,
+ ALU_FSL,
+ ALU_FSR,
+
+ // Single-Bit Operations
+ // RV32B
+ ALU_BSET,
+ ALU_BCLR,
+ ALU_BINV,
+ ALU_BEXT,
+
+ // Bit Compress / Decompress
+ // RV32B
+ ALU_BCOMPRESS,
+ ALU_BDECOMPRESS,
+
+ // Bit Field Place
+ // RV32B
+ ALU_BFP,
+
+ // Carry-less Multiply
+ // RV32B
+ ALU_CLMUL,
+ ALU_CLMULR,
+ ALU_CLMULH,
+
+ // Cyclic Redundancy Check
+ ALU_CRC32_B,
+ ALU_CRC32C_B,
+ ALU_CRC32_H,
+ ALU_CRC32C_H,
+ ALU_CRC32_W,
+ ALU_CRC32C_W
+ } alu_op_e;
+
+ typedef enum logic [1:0] {
+ // Multiplier/divider
+ MD_OP_MULL,
+ MD_OP_MULH,
+ MD_OP_DIV,
+ MD_OP_REM
+ } md_op_e;
+
+
+ //////////////////////////////////
+ // Control and status registers //
+ //////////////////////////////////
+
+ // CSR operations
+ typedef enum logic [1:0] {
+ CSR_OP_READ,
+ CSR_OP_WRITE,
+ CSR_OP_SET,
+ CSR_OP_CLEAR
+ } csr_op_e;
+
+ // Privileged mode
+ typedef enum logic[1:0] {
+ PRIV_LVL_M = 2'b11,
+ PRIV_LVL_H = 2'b10,
+ PRIV_LVL_S = 2'b01,
+ PRIV_LVL_U = 2'b00
+ } priv_lvl_e;
+
+ // Constants for the dcsr.xdebugver fields
+ typedef enum logic[3:0] {
+ XDEBUGVER_NO = 4'd0, // no external debug support
+ XDEBUGVER_STD = 4'd4, // external debug according to RISC-V debug spec
+ XDEBUGVER_NONSTD = 4'd15 // debug not conforming to RISC-V debug spec
+ } x_debug_ver_e;
+
+ //////////////
+ // WB stage //
+ //////////////
+
+ // Type of instruction present in writeback stage
+ typedef enum logic[1:0] {
+ WB_INSTR_LOAD, // Instruction is awaiting load data
+ WB_INSTR_STORE, // Instruction is awaiting store response
+ WB_INSTR_OTHER // Instruction doesn't fit into above categories
+ } wb_instr_type_e;
+
+ //////////////
+ // ID stage //
+ //////////////
+
+ // Operand a selection
+ typedef enum logic[1:0] {
+ OP_A_REG_A,
+ OP_A_FWD,
+ OP_A_CURRPC,
+ OP_A_IMM
+ } op_a_sel_e;
+
+ // Immediate a selection
+ typedef enum logic {
+ IMM_A_Z,
+ IMM_A_ZERO
+ } imm_a_sel_e;
+
+ // Operand b selection
+ typedef enum logic {
+ OP_B_REG_B,
+ OP_B_IMM
+ } op_b_sel_e;
+
+ // Immediate b selection
+ typedef enum logic [2:0] {
+ IMM_B_I,
+ IMM_B_S,
+ IMM_B_B,
+ IMM_B_U,
+ IMM_B_J,
+ IMM_B_INCR_PC,
+ IMM_B_INCR_ADDR
+ } imm_b_sel_e;
+
+ // Regfile write data selection
+ typedef enum logic {
+ RF_WD_EX,
+ RF_WD_CSR
+ } rf_wd_sel_e;
+
+
+ //////////////
+ // IF stage //
+ //////////////
+
+ // PC mux selection
+ typedef enum logic [2:0] {
+ PC_BOOT,
+ PC_JUMP,
+ PC_EXC,
+ PC_ERET,
+ PC_DRET,
+ PC_BP
+ } pc_sel_e;
+
+ // Exception PC mux selection
+ typedef enum logic [1:0] {
+ EXC_PC_EXC,
+ EXC_PC_IRQ,
+ EXC_PC_DBD,
+ EXC_PC_DBG_EXC // Exception while in debug mode
+ } exc_pc_sel_e;
+
+ // Interrupt requests
+ typedef struct packed {
+ logic irq_software;
+ logic irq_timer;
+ logic irq_external;
+ logic [14:0] irq_fast; // 15 fast interrupts,
+ // one interrupt is reserved for NMI (not visible through mip/mie)
+ } irqs_t;
+
+ // Exception cause
+ typedef enum logic [5:0] {
+ EXC_CAUSE_IRQ_SOFTWARE_M = {1'b1, 5'd03},
+ EXC_CAUSE_IRQ_TIMER_M = {1'b1, 5'd07},
+ EXC_CAUSE_IRQ_EXTERNAL_M = {1'b1, 5'd11},
+ // EXC_CAUSE_IRQ_FAST_0 = {1'b1, 5'd16},
+ // EXC_CAUSE_IRQ_FAST_14 = {1'b1, 5'd30},
+ EXC_CAUSE_IRQ_NM = {1'b1, 5'd31}, // == EXC_CAUSE_IRQ_FAST_15
+ EXC_CAUSE_INSN_ADDR_MISA = {1'b0, 5'd00},
+ EXC_CAUSE_INSTR_ACCESS_FAULT = {1'b0, 5'd01},
+ EXC_CAUSE_ILLEGAL_INSN = {1'b0, 5'd02},
+ EXC_CAUSE_BREAKPOINT = {1'b0, 5'd03},
+ EXC_CAUSE_LOAD_ADDR_MISALIGN = {1'b0, 5'd04},
+ EXC_CAUSE_LOAD_ACCESS_FAULT = {1'b0, 5'd05},
+ EXC_CAUSE_STORE_ADDR_MISALIGN = {1'b0, 5'd06},
+ EXC_CAUSE_STORE_ACCESS_FAULT = {1'b0, 5'd07},
+ EXC_CAUSE_ECALL_UMODE = {1'b0, 5'd08},
+ EXC_CAUSE_ECALL_MMODE = {1'b0, 5'd11},
+ EXC_CAUSE_CHERI_FAULT = {1'b0, 5'd28}
+ } exc_cause_e;
+
+ // Debug cause
+ typedef enum logic [2:0] {
+ DBG_CAUSE_NONE = 3'h0,
+ DBG_CAUSE_EBREAK = 3'h1,
+ DBG_CAUSE_TRIGGER = 3'h2,
+ DBG_CAUSE_HALTREQ = 3'h3,
+ DBG_CAUSE_STEP = 3'h4
+ } dbg_cause_e;
+
+ // ICache constants
+ parameter int unsigned ADDR_W = 32;
+ parameter int unsigned BUS_SIZE = 32;
+ parameter int unsigned BUS_BYTES = BUS_SIZE/8;
+ parameter int unsigned BUS_W = $clog2(BUS_BYTES);
+ parameter int unsigned IC_SIZE_BYTES = 4096;
+ parameter int unsigned IC_NUM_WAYS = 2;
+ parameter int unsigned IC_LINE_SIZE = 64;
+ parameter int unsigned IC_LINE_BYTES = IC_LINE_SIZE/8;
+ parameter int unsigned IC_LINE_W = $clog2(IC_LINE_BYTES);
+ parameter int unsigned IC_NUM_LINES = IC_SIZE_BYTES / IC_NUM_WAYS / IC_LINE_BYTES;
+ parameter int unsigned IC_LINE_BEATS = IC_LINE_BYTES / BUS_BYTES;
+ parameter int unsigned IC_LINE_BEATS_W = $clog2(IC_LINE_BEATS);
+ parameter int unsigned IC_INDEX_W = $clog2(IC_NUM_LINES);
+ parameter int unsigned IC_INDEX_HI = IC_INDEX_W + IC_LINE_W - 1;
+ parameter int unsigned IC_TAG_SIZE = ADDR_W - IC_INDEX_W - IC_LINE_W + 1; // 1 valid bit
+ parameter int unsigned IC_OUTPUT_BEATS = (BUS_BYTES / 2); // number of halfwords
+ // ICache Scrambling Parameters
+ parameter int unsigned SCRAMBLE_KEY_W = 128;
+ parameter int unsigned SCRAMBLE_NONCE_W = 64;
+
+ // PMP constants
+ parameter int unsigned PMP_MAX_REGIONS = 16;
+ parameter int unsigned PMP_CFG_W = 8;
+
+ // PMP acces type
+ parameter int unsigned PMP_I = 0;
+ parameter int unsigned PMP_I2 = 1;
+ parameter int unsigned PMP_D = 2;
+
+ typedef enum logic [1:0] {
+ PMP_ACC_EXEC = 2'b00,
+ PMP_ACC_WRITE = 2'b01,
+ PMP_ACC_READ = 2'b10
+ } pmp_req_e;
+
+ // PMP cfg structures
+ typedef enum logic [1:0] {
+ PMP_MODE_OFF = 2'b00,
+ PMP_MODE_TOR = 2'b01,
+ PMP_MODE_NA4 = 2'b10,
+ PMP_MODE_NAPOT = 2'b11
+ } pmp_cfg_mode_e;
+
+ typedef struct packed {
+ logic lock;
+ pmp_cfg_mode_e mode;
+ logic exec;
+ logic write;
+ logic read;
+ } pmp_cfg_t;
+
+ // Machine Security Configuration (ePMP)
+ typedef struct packed {
+ logic rlb; // Rule Locking Bypass
+ logic mmwp; // Machine Mode Whitelist Policy
+ logic mml; // Machine Mode Lockdown
+ } pmp_mseccfg_t;
+
+ // CSRs
+ typedef enum logic[11:0] {
+ // Machine information
+ CSR_MVENDORID = 12'hF11,
+ CSR_MARCHID = 12'hF12,
+ CSR_MIMPID = 12'hF13,
+ CSR_MHARTID = 12'hF14,
+
+ // Machine trap setup
+ CSR_MSTATUS = 12'h300,
+ CSR_MISA = 12'h301,
+ CSR_MIE = 12'h304,
+ CSR_MTVEC = 12'h305,
+ CSR_MCOUNTEREN= 12'h306,
+
+ // Machine trap handling
+ CSR_MSCRATCH = 12'h340,
+ CSR_MEPC = 12'h341,
+ CSR_MCAUSE = 12'h342,
+ CSR_MTVAL = 12'h343,
+ CSR_MIP = 12'h344,
+
+ // Physical memory protection
+ CSR_PMPCFG0 = 12'h3A0,
+ CSR_PMPCFG1 = 12'h3A1,
+ CSR_PMPCFG2 = 12'h3A2,
+ CSR_PMPCFG3 = 12'h3A3,
+ CSR_PMPADDR0 = 12'h3B0,
+ CSR_PMPADDR1 = 12'h3B1,
+ CSR_PMPADDR2 = 12'h3B2,
+ CSR_PMPADDR3 = 12'h3B3,
+ CSR_PMPADDR4 = 12'h3B4,
+ CSR_PMPADDR5 = 12'h3B5,
+ CSR_PMPADDR6 = 12'h3B6,
+ CSR_PMPADDR7 = 12'h3B7,
+ CSR_PMPADDR8 = 12'h3B8,
+ CSR_PMPADDR9 = 12'h3B9,
+ CSR_PMPADDR10 = 12'h3BA,
+ CSR_PMPADDR11 = 12'h3BB,
+ CSR_PMPADDR12 = 12'h3BC,
+ CSR_PMPADDR13 = 12'h3BD,
+ CSR_PMPADDR14 = 12'h3BE,
+ CSR_PMPADDR15 = 12'h3BF,
+
+ // ePMP control
+ CSR_MSECCFG = 12'h747,
+ CSR_MSECCFGH = 12'h757,
+
+ // Debug trigger
+ CSR_TSELECT = 12'h7A0,
+ CSR_TDATA1 = 12'h7A1,
+ CSR_TDATA2 = 12'h7A2,
+ CSR_TDATA3 = 12'h7A3,
+ CSR_MCONTEXT = 12'h7A8,
+ CSR_SCONTEXT = 12'h7AA,
+
+ // Debug/trace
+ CSR_DCSR = 12'h7b0,
+ CSR_DPC = 12'h7b1,
+
+ // Debug
+ CSR_DSCRATCH0 = 12'h7b2, // optional
+ CSR_DSCRATCH1 = 12'h7b3, // optional
+
+ // Machine Counter/Timers
+ CSR_MCOUNTINHIBIT = 12'h320,
+ CSR_MHPMEVENT3 = 12'h323,
+ CSR_MHPMEVENT4 = 12'h324,
+ CSR_MHPMEVENT5 = 12'h325,
+ CSR_MHPMEVENT6 = 12'h326,
+ CSR_MHPMEVENT7 = 12'h327,
+ CSR_MHPMEVENT8 = 12'h328,
+ CSR_MHPMEVENT9 = 12'h329,
+ CSR_MHPMEVENT10 = 12'h32A,
+ CSR_MHPMEVENT11 = 12'h32B,
+ CSR_MHPMEVENT12 = 12'h32C,
+ CSR_MHPMEVENT13 = 12'h32D,
+ CSR_MHPMEVENT14 = 12'h32E,
+ CSR_MHPMEVENT15 = 12'h32F,
+ CSR_MHPMEVENT16 = 12'h330,
+ CSR_MHPMEVENT17 = 12'h331,
+ CSR_MHPMEVENT18 = 12'h332,
+ CSR_MHPMEVENT19 = 12'h333,
+ CSR_MHPMEVENT20 = 12'h334,
+ CSR_MHPMEVENT21 = 12'h335,
+ CSR_MHPMEVENT22 = 12'h336,
+ CSR_MHPMEVENT23 = 12'h337,
+ CSR_MHPMEVENT24 = 12'h338,
+ CSR_MHPMEVENT25 = 12'h339,
+ CSR_MHPMEVENT26 = 12'h33A,
+ CSR_MHPMEVENT27 = 12'h33B,
+ CSR_MHPMEVENT28 = 12'h33C,
+ CSR_MHPMEVENT29 = 12'h33D,
+ CSR_MHPMEVENT30 = 12'h33E,
+ CSR_MHPMEVENT31 = 12'h33F,
+ CSR_MCYCLE = 12'hB00,
+ CSR_MINSTRET = 12'hB02,
+ CSR_MHPMCOUNTER3 = 12'hB03,
+ CSR_MHPMCOUNTER4 = 12'hB04,
+ CSR_MHPMCOUNTER5 = 12'hB05,
+ CSR_MHPMCOUNTER6 = 12'hB06,
+ CSR_MHPMCOUNTER7 = 12'hB07,
+ CSR_MHPMCOUNTER8 = 12'hB08,
+ CSR_MHPMCOUNTER9 = 12'hB09,
+ CSR_MHPMCOUNTER10 = 12'hB0A,
+ CSR_MHPMCOUNTER11 = 12'hB0B,
+ CSR_MHPMCOUNTER12 = 12'hB0C,
+ CSR_MHPMCOUNTER13 = 12'hB0D,
+ CSR_MHPMCOUNTER14 = 12'hB0E,
+ CSR_MHPMCOUNTER15 = 12'hB0F,
+ CSR_MHPMCOUNTER16 = 12'hB10,
+ CSR_MHPMCOUNTER17 = 12'hB11,
+ CSR_MHPMCOUNTER18 = 12'hB12,
+ CSR_MHPMCOUNTER19 = 12'hB13,
+ CSR_MHPMCOUNTER20 = 12'hB14,
+ CSR_MHPMCOUNTER21 = 12'hB15,
+ CSR_MHPMCOUNTER22 = 12'hB16,
+ CSR_MHPMCOUNTER23 = 12'hB17,
+ CSR_MHPMCOUNTER24 = 12'hB18,
+ CSR_MHPMCOUNTER25 = 12'hB19,
+ CSR_MHPMCOUNTER26 = 12'hB1A,
+ CSR_MHPMCOUNTER27 = 12'hB1B,
+ CSR_MHPMCOUNTER28 = 12'hB1C,
+ CSR_MHPMCOUNTER29 = 12'hB1D,
+ CSR_MHPMCOUNTER30 = 12'hB1E,
+ CSR_MHPMCOUNTER31 = 12'hB1F,
+ CSR_MCYCLEH = 12'hB80,
+ CSR_MINSTRETH = 12'hB82,
+ CSR_MHPMCOUNTER3H = 12'hB83,
+ CSR_MHPMCOUNTER4H = 12'hB84,
+ CSR_MHPMCOUNTER5H = 12'hB85,
+ CSR_MHPMCOUNTER6H = 12'hB86,
+ CSR_MHPMCOUNTER7H = 12'hB87,
+ CSR_MHPMCOUNTER8H = 12'hB88,
+ CSR_MHPMCOUNTER9H = 12'hB89,
+ CSR_MHPMCOUNTER10H = 12'hB8A,
+ CSR_MHPMCOUNTER11H = 12'hB8B,
+ CSR_MHPMCOUNTER12H = 12'hB8C,
+ CSR_MHPMCOUNTER13H = 12'hB8D,
+ CSR_MHPMCOUNTER14H = 12'hB8E,
+ CSR_MHPMCOUNTER15H = 12'hB8F,
+ CSR_MHPMCOUNTER16H = 12'hB90,
+ CSR_MHPMCOUNTER17H = 12'hB91,
+ CSR_MHPMCOUNTER18H = 12'hB92,
+ CSR_MHPMCOUNTER19H = 12'hB93,
+ CSR_MHPMCOUNTER20H = 12'hB94,
+ CSR_MHPMCOUNTER21H = 12'hB95,
+ CSR_MHPMCOUNTER22H = 12'hB96,
+ CSR_MHPMCOUNTER23H = 12'hB97,
+ CSR_MHPMCOUNTER24H = 12'hB98,
+ CSR_MHPMCOUNTER25H = 12'hB99,
+ CSR_MHPMCOUNTER26H = 12'hB9A,
+ CSR_MHPMCOUNTER27H = 12'hB9B,
+ CSR_MHPMCOUNTER28H = 12'hB9C,
+ CSR_MHPMCOUNTER29H = 12'hB9D,
+ CSR_MHPMCOUNTER30H = 12'hB9E,
+ CSR_MHPMCOUNTER31H = 12'hB9F,
+ CSR_MSHWM = 12'hBC1,
+ CSR_MSHWMB = 12'hBC2,
+ CSR_CDBG_CTRL = 12'hBC4,
+ CSR_CPUCTRL = 12'h7C0,
+ CSR_SECURESEED = 12'h7C1
+ } csr_num_e;
+
+ // CSR pmp-related offsets
+ parameter logic [11:0] CSR_OFF_PMP_CFG = 12'h3A0; // pmp_cfg @ 12'h3a0 - 12'h3a3
+ parameter logic [11:0] CSR_OFF_PMP_ADDR = 12'h3B0; // pmp_addr @ 12'h3b0 - 12'h3bf
+
+ // CSR status bits
+ parameter int unsigned CSR_MSTATUS_MIE_BIT = 3;
+ parameter int unsigned CSR_MSTATUS_MPIE_BIT = 7;
+ parameter int unsigned CSR_MSTATUS_MPP_BIT_LOW = 11;
+ parameter int unsigned CSR_MSTATUS_MPP_BIT_HIGH = 12;
+ parameter int unsigned CSR_MSTATUS_MPRV_BIT = 17;
+ parameter int unsigned CSR_MSTATUS_TW_BIT = 21;
+
+ // CSR machine ISA
+ parameter logic [1:0] CSR_MISA_MXL = 2'd1; // M-XLEN: XLEN in M-Mode for RV32
+
+ // CSR interrupt pending/enable bits
+ parameter int unsigned CSR_MSIX_BIT = 3;
+ parameter int unsigned CSR_MTIX_BIT = 7;
+ parameter int unsigned CSR_MEIX_BIT = 11;
+ parameter int unsigned CSR_MFIX_BIT_LOW = 16;
+ parameter int unsigned CSR_MFIX_BIT_HIGH = 30;
+
+ // CSR Machine Security Configuration bits
+ parameter int unsigned CSR_MSECCFG_MML_BIT = 0;
+ parameter int unsigned CSR_MSECCFG_MMWP_BIT = 1;
+ parameter int unsigned CSR_MSECCFG_RLB_BIT = 2;
+
+ // Vendor ID
+ // No JEDEC ID has been allocated to lowRISC so the value is 0 to indicate the field is not
+ // implemented
+ localparam logic [31:0] CSR_MVENDORID_VALUE = 32'b0;
+ localparam logic [31:0] CSR_MVENDORID_CHERI_VALUE = 32'h255;
+
+ // Architecture ID
+ // Top bit is unset to indicate an open source project. The lower bits are an ID allocated by the
+ // RISC-V Foundation. Note this is allocated specifically to Ibex, should significant changes be
+ // made a different architecture ID should be supplied.
+ localparam logic [31:0] CSR_MARCHID_VALUE = {1'b0, 31'd22};
+ localparam logic [31:0] CSR_MARCHID_CHERI_VALUE = 32'hce1;
+
+
+ // Implementation ID
+ // 0 indicates this field is not implemeted. Ibex implementors may wish to indicate an RTL/netlist
+ // version here using their own unique encoding (e.g. 32 bits of the git hash of the implemented
+ // commit).
+ localparam logic [31:0] CSR_MIMPID_VALUE = 32'b0;
+
+ // These LFSR parameters have been generated with
+ // $ opentitan/util/design/gen-lfsr-seed.py --width 32 --seed 2480124384 --prefix ""
+ parameter int LfsrWidth = 32;
+ typedef logic [LfsrWidth-1:0] lfsr_seed_t;
+ typedef logic [LfsrWidth-1:0][$clog2(LfsrWidth)-1:0] lfsr_perm_t;
+ parameter lfsr_seed_t RndCnstLfsrSeedDefault = 32'hac533bf4;
+ parameter lfsr_perm_t RndCnstLfsrPermDefault = {
+ 160'h1e35ecba467fd1b12e958152c04fa43878a8daed
+ };
+ parameter logic [SCRAMBLE_KEY_W-1:0] RndCnstIbexKeyDefault =
+ 128'h14e8cecae3040d5e12286bb3cc113298;
+ parameter logic [SCRAMBLE_NONCE_W-1:0] RndCnstIbexNonceDefault =
+ 64'hf79780bc735f3843;
+
+ // Fetch enable. Mult-bit signal used for security hardening. For non-secure implementation all
+ // bits other than the bottom bit are ignored.
+ typedef logic [3:0] fetch_enable_t;
+
+ // Note that if adjusting these parameters it is assumed the bottom bit is set for On and unset
+ // for Off. This allows the use of FetchEnableOn/FetchEnableOff to work for both secure and
+ // non-secure Ibex. If this assumption is broken the RTL that uses the fetch_enable signal within
+ // `cheriot_core` may need adjusting.
+ parameter fetch_enable_t FetchEnableOn = 4'b1001;
+ parameter fetch_enable_t FetchEnableOff = 4'b0110;
+
+ typedef logic [3:0] ibex_mubi_t;
+
+ // Note that if adjusting these parameters it is assumed the bottom bit is set for On and unset
+ // for Off. This allows the use of IbexMuBiOn/IbexMuBiOff to work for both secure and non-secure
+ // Ibex. If this assumption is broken the RTL that uses ibex_mubi_t types such as the fetch_enable
+ // and core_busy signals within `cheriot_core` may need adjusting.
+ parameter ibex_mubi_t IbexMuBiOn = 4'b0101;
+ parameter ibex_mubi_t IbexMuBiOff = 4'b1010;
+
+ //////////////
+ // ID stage //
+ //////////////
+
+ typedef enum logic [3:0] {
+ RESET,
+ BOOT_SET,
+ WAIT_SLEEP,
+ SLEEP,
+ FIRST_FETCH,
+ DECODE,
+ FLUSH,
+ IRQ_TAKEN,
+ DBG_TAKEN_IF,
+ DBG_TAKEN_ID
+ } ctrl_fsm_e;
+
+ //////////////
+ // LSU //
+ //////////////
+
+ typedef enum logic [3:0] {
+ IDLE, WAIT_GNT_MIS, WAIT_RVALID_MIS, WAIT_GNT,
+ WAIT_RVALID_MIS_GNTS_DONE,
+ CTX_WAIT_GNT1, CTX_WAIT_GNT2, CTX_WAIT_RESP
+ } ls_fsm_e;
+
+ typedef enum logic [2:0] {CRX_IDLE, CRX_WAIT_RESP1, CRX_WAIT_RESP2} cap_rx_fsm_t;
+
+
+endpackage
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_pmp.sv b/hw/ip/cheriot-ibex/rtl/cheriot_pmp.sv
new file mode 100644
index 0000000..6363e70
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_pmp.sv
@@ -0,0 +1,184 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+module cheriot_pmp #(
+ // Granularity of NAPOT access,
+ // 0 = No restriction, 1 = 8 byte, 2 = 16 byte, 3 = 32 byte, etc.
+ parameter int unsigned PMPGranularity = 0,
+ // Number of access channels (e.g. i-side + d-side)
+ parameter int unsigned PMPNumChan = 2,
+ // Number of implemented regions
+ parameter int unsigned PMPNumRegions = 4
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ // Interface to CSRs
+ input cheriot_pkg::pmp_cfg_t csr_pmp_cfg_i [PMPNumRegions],
+ input logic [33:0] csr_pmp_addr_i [PMPNumRegions],
+ input cheriot_pkg::pmp_mseccfg_t csr_pmp_mseccfg_i,
+
+ input cheriot_pkg::priv_lvl_e priv_mode_i [PMPNumChan],
+ // Access checking channels
+ input logic [33:0] pmp_req_addr_i [PMPNumChan],
+ input cheriot_pkg::pmp_req_e pmp_req_type_i [PMPNumChan],
+ output logic pmp_req_err_o [PMPNumChan]
+
+);
+
+ import cheriot_pkg::*;
+
+ // Access Checking Signals
+ logic [33:0] region_start_addr [PMPNumRegions];
+ logic [33:PMPGranularity+2] region_addr_mask [PMPNumRegions];
+ logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_match_gt;
+ logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_match_lt;
+ logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_match_eq;
+ logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_match_all;
+ logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_basic_perm_check;
+ logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_mml_perm_check;
+ logic [PMPNumChan-1:0] access_fault;
+
+
+ // ---------------
+ // Access checking
+ // ---------------
+
+ for (genvar r = 0; r < PMPNumRegions; r++) begin : g_addr_exp
+ // Start address for TOR matching
+ if (r == 0) begin : g_entry0
+ assign region_start_addr[r] = (csr_pmp_cfg_i[r].mode == PMP_MODE_TOR) ? 34'h000000000 :
+ csr_pmp_addr_i[r];
+ end else begin : g_oth
+ assign region_start_addr[r] = (csr_pmp_cfg_i[r].mode == PMP_MODE_TOR) ? csr_pmp_addr_i[r-1] :
+ csr_pmp_addr_i[r];
+ end
+ // Address mask for NA matching
+ for (genvar b = PMPGranularity + 2; b < 34; b++) begin : g_bitmask
+ if (b == 2) begin : g_bit0
+ // Always mask bit 2 for NAPOT
+ assign region_addr_mask[r][b] = (csr_pmp_cfg_i[r].mode != PMP_MODE_NAPOT);
+ end else begin : g_others
+ // We will mask this bit if it is within the programmed granule
+ // i.e. addr = yyyy 0111
+ // ^
+ // | This bit pos is the top of the mask, all lower bits set
+ // thus mask = 1111 0000
+ if (PMPGranularity == 0) begin : g_region_addr_mask_zero_granularity
+ assign region_addr_mask[r][b] = (csr_pmp_cfg_i[r].mode != PMP_MODE_NAPOT) |
+ ~&csr_pmp_addr_i[r][b-1:2];
+ end else begin : g_region_addr_mask_other_granularity
+ assign region_addr_mask[r][b] = (csr_pmp_cfg_i[r].mode != PMP_MODE_NAPOT) |
+ ~&csr_pmp_addr_i[r][b-1:PMPGranularity+1];
+ end
+ end
+ end
+ end
+
+ for (genvar c = 0; c < PMPNumChan; c++) begin : g_access_check
+ for (genvar r = 0; r < PMPNumRegions; r++) begin : g_regions
+ // Comparators are sized according to granularity
+ assign region_match_eq[c][r] = (pmp_req_addr_i[c][33:PMPGranularity+2] &
+ region_addr_mask[r]) ==
+ (region_start_addr[r][33:PMPGranularity+2] &
+ region_addr_mask[r]);
+ assign region_match_gt[c][r] = pmp_req_addr_i[c][33:PMPGranularity+2] >
+ region_start_addr[r][33:PMPGranularity+2];
+ assign region_match_lt[c][r] = pmp_req_addr_i[c][33:PMPGranularity+2] <
+ csr_pmp_addr_i[r][33:PMPGranularity+2];
+
+ always_comb begin
+ region_match_all[c][r] = 1'b0;
+ unique case (csr_pmp_cfg_i[r].mode)
+ PMP_MODE_OFF: region_match_all[c][r] = 1'b0;
+ PMP_MODE_NA4: region_match_all[c][r] = region_match_eq[c][r];
+ PMP_MODE_NAPOT: region_match_all[c][r] = region_match_eq[c][r];
+ PMP_MODE_TOR: begin
+ region_match_all[c][r] = (region_match_eq[c][r] | region_match_gt[c][r]) &
+ region_match_lt[c][r];
+ end
+ default: region_match_all[c][r] = 1'b0;
+ endcase
+ end
+
+ // Check specific required permissions
+ assign region_basic_perm_check[c][r] =
+ ((pmp_req_type_i[c] == PMP_ACC_EXEC) & csr_pmp_cfg_i[r].exec) |
+ ((pmp_req_type_i[c] == PMP_ACC_WRITE) & csr_pmp_cfg_i[r].write) |
+ ((pmp_req_type_i[c] == PMP_ACC_READ) & csr_pmp_cfg_i[r].read);
+
+
+ // Compute permission checks that apply when MSECCFG.MML is set.
+ always_comb begin
+ region_mml_perm_check[c][r] = 1'b0;
+
+ if (!csr_pmp_cfg_i[r].read && csr_pmp_cfg_i[r].write) begin
+ // Special-case shared regions where R = 0, W = 1
+ unique case ({csr_pmp_cfg_i[r].lock, csr_pmp_cfg_i[r].exec})
+ // Read/write in M, read only in S/U
+ 2'b00: region_mml_perm_check[c][r] =
+ (pmp_req_type_i[c] == PMP_ACC_READ) |
+ ((pmp_req_type_i[c] == PMP_ACC_WRITE) & (priv_mode_i[c] == PRIV_LVL_M));
+ // Read/write in M/S/U
+ 2'b01: region_mml_perm_check[c][r] =
+ (pmp_req_type_i[c] == PMP_ACC_READ) | (pmp_req_type_i[c] == PMP_ACC_WRITE);
+ // Execute only on M/S/U
+ 2'b10: region_mml_perm_check[c][r] = (pmp_req_type_i[c] == PMP_ACC_EXEC);
+ // Read/execute in M, execute only on S/U
+ 2'b11: region_mml_perm_check[c][r] =
+ (pmp_req_type_i[c] == PMP_ACC_EXEC) |
+ ((pmp_req_type_i[c] == PMP_ACC_READ) & (priv_mode_i[c] == PRIV_LVL_M));
+ default: ;
+ endcase
+ end else begin
+ if (csr_pmp_cfg_i[r].read & csr_pmp_cfg_i[r].write & csr_pmp_cfg_i[r].exec
+ & csr_pmp_cfg_i[r].lock) begin
+ // Special-case shared read only region when R = 1, W = 1, X = 1, L = 1
+ region_mml_perm_check[c][r] = pmp_req_type_i[c] == PMP_ACC_READ;
+ end else begin
+ // Otherwise use basic permission check. Permission is always denied if in S/U mode and
+ // L is set or if in M mode and L is unset.
+ region_mml_perm_check[c][r] =
+ priv_mode_i[c] == PRIV_LVL_M ? csr_pmp_cfg_i[r].lock & region_basic_perm_check[c][r] :
+ ~csr_pmp_cfg_i[r].lock & region_basic_perm_check[c][r];
+ end
+ end
+ end
+ end
+
+ // Access fault determination / prioritization
+ always_comb begin
+ // When MSECCFG.MMWP is set default deny always, otherwise allow for M-mode, deny for other
+ // modes
+ access_fault[c] = csr_pmp_mseccfg_i.mmwp | (priv_mode_i[c] != PRIV_LVL_M);
+
+ // PMP entries are statically prioritized, from 0 to N-1
+ // The lowest-numbered PMP entry which matches an address determines accessability
+ for (int r = PMPNumRegions - 1; r >= 0; r--) begin
+ if (region_match_all[c][r]) begin
+ if (csr_pmp_mseccfg_i.mml) begin
+ // When MSECCFG.MML is set use MML specific permission check
+ access_fault[c] = ~region_mml_perm_check[c][r];
+ end else begin
+ // Otherwise use original PMP behaviour
+ access_fault[c] = (priv_mode_i[c] == PRIV_LVL_M) ?
+ // For M-mode, any region which matches with the L-bit clear, or with sufficient
+ // access permissions will be allowed
+ (csr_pmp_cfg_i[r].lock & ~region_basic_perm_check[c][r]) :
+ // For other modes, the lock bit doesn't matter
+ ~region_basic_perm_check[c][r];
+ end
+ end
+ end
+ end
+
+ assign pmp_req_err_o[c] = access_fault[c];
+ end
+
+ // RLB, rule locking bypass, is only relevant to cheriot_cs_registers which controls writes to the
+ // PMP CSRs. Tie to unused signal here to prevent lint warnings.
+ logic unused_csr_pmp_mseccfg_rlb;
+ assign unused_csr_pmp_mseccfg_rlb = csr_pmp_mseccfg_i.rlb;
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_pmp_reset_default.svh b/hw/ip/cheriot-ibex/rtl/cheriot_pmp_reset_default.svh
new file mode 100644
index 0000000..cda701b
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_pmp_reset_default.svh
@@ -0,0 +1,53 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Default reset values for PMP CSRs. Where the number of regions
+// (PMPNumRegions) is less than 16 the reset values for the higher numbered
+// regions are ignored.
+//
+// See the Ibex Reference Guide (Custom Reset Values under Physical Memory
+// Protection) for more information.
+
+localparam pmp_cfg_t pmp_cfg_rst[16] = '{
+ '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 0
+ '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 1
+ '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 2
+ '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 3
+ '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 4
+ '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 5
+ '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 6
+ '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 7
+ '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 8
+ '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 9
+ '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 10
+ '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 11
+ '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 12
+ '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 13
+ '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 14
+ '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0} // region 15
+};
+
+// Addresses are given in byte granularity for readibility. A minimum of two
+// bits will be stripped off the bottom (PMPGranularity == 0) with more stripped
+// off at coarser granularities.
+localparam [33:0] pmp_addr_rst[16] = '{
+ 34'h0, // region 0
+ 34'h0, // region 1
+ 34'h0, // region 2
+ 34'h0, // region 3
+ 34'h0, // region 4
+ 34'h0, // region 5
+ 34'h0, // region 6
+ 34'h0, // region 7
+ 34'h0, // region 8
+ 34'h0, // region 9
+ 34'h0, // region 10
+ 34'h0, // region 11
+ 34'h0, // region 12
+ 34'h0, // region 13
+ 34'h0, // region 14
+ 34'h0 // region 15
+};
+
+localparam pmp_mseccfg_t pmp_mseccfg_rst = '{rlb : 1'b0, mmwp: 1'b0, mml: 1'b0};
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_prefetch_buffer.sv b/hw/ip/cheriot-ibex/rtl/cheriot_prefetch_buffer.sv
new file mode 100644
index 0000000..00de519
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_prefetch_buffer.sv
@@ -0,0 +1,281 @@
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Prefetcher Buffer for 32 bit memory interface
+ *
+ * Prefetch Buffer that caches instructions. This cuts overly long critical
+ * paths to the instruction cache.
+ */
+module cheriot_prefetch_buffer #(
+ parameter bit ResetAll = 1'b0
+) (
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic req_i,
+
+ input logic branch_i,
+ input logic branch_mispredict_i,
+ input logic [31:0] mispredict_addr_i,
+ input logic [31:0] addr_i,
+
+
+ input logic ready_i,
+ output logic valid_o,
+ output logic [31:0] rdata_o,
+ output logic [31:0] addr_o,
+ output logic err_o,
+ output logic err_plus2_o,
+
+ input logic cheri_force_uc_i,
+
+ // goes to instruction memory / instruction cache
+ output logic instr_req_o,
+ input logic instr_gnt_i,
+ output logic [31:0] instr_addr_o,
+ input logic [31:0] instr_rdata_i,
+ input logic instr_err_i,
+ input logic instr_rvalid_i,
+
+ // Prefetch Buffer Status
+ output logic busy_o
+);
+
+ localparam int unsigned NUM_REQS = 2;
+
+ logic valid_new_req, valid_req;
+ logic valid_req_d, valid_req_q;
+ logic discard_req_d, discard_req_q;
+ logic [NUM_REQS-1:0] rdata_outstanding_n, rdata_outstanding_s, rdata_outstanding_q;
+ logic [NUM_REQS-1:0] branch_discard_n, branch_discard_s, branch_discard_q;
+ logic [NUM_REQS-1:0] rdata_outstanding_rev;
+
+ logic [31:0] stored_addr_d, stored_addr_q;
+ logic stored_addr_en;
+ logic [31:0] fetch_addr_d, fetch_addr_q;
+ logic fetch_addr_en;
+ logic [31:0] instr_addr, instr_addr_w_aligned;
+
+ logic fifo_valid;
+ logic [31:0] fifo_addr;
+ logic fifo_ready;
+ logic fifo_clear;
+ logic [NUM_REQS-1:0] fifo_busy;
+
+ logic valid_raw;
+
+ logic branch_or_mispredict;
+
+ ////////////////////////////
+ // Prefetch buffer status //
+ ////////////////////////////
+
+ assign busy_o = (|rdata_outstanding_q) | instr_req_o;
+
+ assign branch_or_mispredict = branch_i | branch_mispredict_i;
+
+ //////////////////////////////////////////////
+ // Fetch fifo - consumes addresses and data //
+ //////////////////////////////////////////////
+
+ // A branch will invalidate any previously fetched instructions.
+ // Note that the FENCE.I instruction relies on this flushing behaviour on branch. If it is
+ // altered the FENCE.I implementation may require changes.
+ assign fifo_clear = branch_or_mispredict;
+
+ // Reversed version of rdata_outstanding_q which can be overlaid with fifo fill state
+ for (genvar i = 0; i < NUM_REQS; i++) begin : gen_rd_rev
+ assign rdata_outstanding_rev[i] = rdata_outstanding_q[NUM_REQS-1-i];
+ end
+
+ // The fifo is ready to accept a new request if it is not full - including space reserved for
+ // requests already outstanding.
+ // Overlay the fifo fill state with the outstanding requests to see if there is space.
+ assign fifo_ready = ~&(fifo_busy | rdata_outstanding_rev);
+
+ cheriot_fetch_fifo #(
+ .NUM_REQS (NUM_REQS),
+ .ResetAll (ResetAll)
+ ) fifo_i (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+
+ .clear_i ( fifo_clear ),
+ .busy_o ( fifo_busy ),
+
+ .in_valid_i ( fifo_valid ),
+ .in_addr_i ( fifo_addr ),
+ .in_rdata_i ( instr_rdata_i ),
+ .in_err_i ( instr_err_i ),
+ .cheri_force_uc_i ( cheri_force_uc_i ),
+
+ .out_valid_o ( valid_raw ),
+ .out_ready_i ( ready_i ),
+ .out_rdata_o ( rdata_o ),
+ .out_addr_o ( addr_o ),
+ .out_err_o ( err_o ),
+ .out_err_plus2_o ( err_plus2_o )
+ );
+
+ //////////////
+ // Requests //
+ //////////////
+
+ // Make a new request any time there is space in the FIFO, and space in the request queue
+ assign valid_new_req = req_i & (fifo_ready | branch_or_mispredict) &
+ ~rdata_outstanding_q[NUM_REQS-1];
+
+ assign valid_req = valid_req_q | valid_new_req;
+
+ // Hold the request stable for requests that didn't get granted
+ assign valid_req_d = valid_req & ~instr_gnt_i;
+
+ // Record whether an outstanding bus request is cancelled by a branch
+ assign discard_req_d = valid_req_q & (branch_or_mispredict | discard_req_q);
+
+ ////////////////
+ // Fetch addr //
+ ////////////////
+
+ // Two addresses are tracked in the prefetch buffer:
+ // 1. stored_addr_q - This is the address issued on the bus. It stays stable until
+ // the request is granted.
+ // 2. fetch_addr_q - This is our next address to fetch from. It is updated on branches to
+ // capture the new address, and then for each new request issued.
+ // A third address is tracked in the fetch FIFO itself:
+ // 3. instr_addr_q - This is the address at the head of the FIFO, efectively our oldest fetched
+ // address. This address is updated on branches, and does its own increment
+ // each time the FIFO is popped.
+
+ // 1. stored_addr_q
+
+ // Only update stored_addr_q for new ungranted requests
+ assign stored_addr_en = valid_new_req & ~valid_req_q & ~instr_gnt_i;
+
+ // Store whatever address was issued on the bus
+ assign stored_addr_d = instr_addr;
+
+ // CPU resets with a branch, so no need to reset these addresses
+ if (ResetAll) begin : g_stored_addr_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ stored_addr_q <= '0;
+ end else if (stored_addr_en) begin
+ stored_addr_q <= stored_addr_d;
+ end
+ end
+ end else begin : g_stored_addr_nr
+ always_ff @(posedge clk_i) begin
+ if (stored_addr_en) begin
+ stored_addr_q <= stored_addr_d;
+ end
+ end
+ end
+ // 2. fetch_addr_q
+
+ // Update on a branch or as soon as a request is issued
+ assign fetch_addr_en = branch_or_mispredict | (valid_new_req & ~valid_req_q);
+
+ assign fetch_addr_d = (branch_i ? addr_i :
+ branch_mispredict_i ? {mispredict_addr_i[31:2], 2'b00} :
+ {fetch_addr_q[31:2], 2'b00}) +
+ // Current address + 4
+ {{29{1'b0}},(valid_new_req & ~valid_req_q),2'b00};
+
+ if (ResetAll) begin : g_fetch_addr_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ fetch_addr_q <= '0;
+ end else if (fetch_addr_en) begin
+ fetch_addr_q <= fetch_addr_d;
+ end
+ end
+ end else begin : g_fetch_addr_nr
+ always_ff @(posedge clk_i) begin
+ if (fetch_addr_en) begin
+ fetch_addr_q <= fetch_addr_d;
+ end
+ end
+ end
+
+ // Address mux
+ assign instr_addr = valid_req_q ? stored_addr_q :
+ branch_i ? addr_i :
+ branch_mispredict_i ? mispredict_addr_i :
+ fetch_addr_q;
+
+ assign instr_addr_w_aligned = {instr_addr[31:2], 2'b00};
+
+ ///////////////////////////////
+ // Request outstanding queue //
+ ///////////////////////////////
+
+ for (genvar i = 0; i < NUM_REQS; i++) begin : g_outstanding_reqs
+ // Request 0 (always the oldest outstanding request)
+ if (i == 0) begin : g_req0
+ // A request becomes outstanding once granted, and is cleared once the rvalid is received.
+ // Outstanding requests shift down the queue towards entry 0.
+ assign rdata_outstanding_n[i] = (valid_req & instr_gnt_i) |
+ rdata_outstanding_q[i];
+ // If a branch is received at any point while a request is outstanding, it must be tracked
+ // to ensure we discard the data once received
+ assign branch_discard_n[i] = (valid_req & instr_gnt_i & discard_req_d) |
+ (branch_or_mispredict & rdata_outstanding_q[i]) |
+ branch_discard_q[i];
+
+ end else begin : g_reqtop
+ // Entries > 0 consider the FIFO fill state to calculate their next state (by checking
+ // whether the previous entry is valid)
+
+ assign rdata_outstanding_n[i] = (valid_req & instr_gnt_i &
+ rdata_outstanding_q[i-1]) |
+ rdata_outstanding_q[i];
+ assign branch_discard_n[i] = (valid_req & instr_gnt_i & discard_req_d &
+ rdata_outstanding_q[i-1]) |
+ (branch_or_mispredict & rdata_outstanding_q[i]) |
+ branch_discard_q[i];
+ end
+ end
+
+ // Shift the entries down on each instr_rvalid_i
+ assign rdata_outstanding_s = instr_rvalid_i ? {1'b0,rdata_outstanding_n[NUM_REQS-1:1]} :
+ rdata_outstanding_n;
+ assign branch_discard_s = instr_rvalid_i ? {1'b0,branch_discard_n[NUM_REQS-1:1]} :
+ branch_discard_n;
+
+ // Push a new entry to the FIFO once complete (and not cancelled by a branch)
+ assign fifo_valid = instr_rvalid_i & ~branch_discard_q[0];
+
+ assign fifo_addr = branch_i ? addr_i : mispredict_addr_i;
+
+ ///////////////
+ // Registers //
+ ///////////////
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ valid_req_q <= 1'b0;
+ discard_req_q <= 1'b0;
+ rdata_outstanding_q <= 'b0;
+ branch_discard_q <= 'b0;
+ end else begin
+ valid_req_q <= valid_req_d;
+ discard_req_q <= discard_req_d;
+ rdata_outstanding_q <= rdata_outstanding_s;
+ branch_discard_q <= branch_discard_s;
+ end
+ end
+
+ /////////////
+ // Outputs //
+ /////////////
+
+ assign instr_req_o = valid_req;
+ assign instr_addr_o = instr_addr_w_aligned;
+
+ assign valid_o = valid_raw & ~branch_mispredict_i;
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_register_file_ff.sv b/hw/ip/cheriot-ibex/rtl/cheriot_register_file_ff.sv
new file mode 100644
index 0000000..1da818e
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_register_file_ff.sv
@@ -0,0 +1,103 @@
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * RISC-V register file
+ *
+ * Register file with 31 or 15x 32 bit wide registers. Register 0 is fixed to 0.
+ * This register file is based on flip flops. Use this register file when
+ * targeting FPGA synthesis or Verilator simulation.
+ */
+module cheriot_register_file_ff #(
+ parameter bit RV32E = 0,
+ parameter int unsigned DataWidth = 32,
+ parameter bit DummyInstructions = 0,
+ parameter logic [DataWidth-1:0] WordZeroVal = '0
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic test_en_i,
+ input logic dummy_instr_id_i,
+
+ //Read port R1
+ input logic [4:0] raddr_a_i,
+ output logic [DataWidth-1:0] rdata_a_o,
+
+ //Read port R2
+ input logic [4:0] raddr_b_i,
+ output logic [DataWidth-1:0] rdata_b_o,
+
+
+ // Write port W1
+ input logic [4:0] waddr_a_i,
+ input logic [DataWidth-1:0] wdata_a_i,
+ input logic we_a_i
+
+);
+
+ localparam int unsigned ADDR_WIDTH = RV32E ? 4 : 5;
+ localparam int unsigned NUM_WORDS = 2**ADDR_WIDTH;
+
+ logic [NUM_WORDS-1:0][DataWidth-1:0] rf_reg;
+ logic [NUM_WORDS-1:1][DataWidth-1:0] rf_reg_q;
+ logic [NUM_WORDS-1:1] we_a_dec;
+
+ always_comb begin : we_a_decoder
+ for (int unsigned i = 1; i < NUM_WORDS; i++) begin
+ we_a_dec[i] = (waddr_a_i == 5'(i)) ? we_a_i : 1'b0;
+ end
+ end
+
+ // No flops for R0 as it's hard-wired to 0
+ for (genvar i = 1; i < NUM_WORDS; i++) begin : g_rf_flops
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rf_reg_q[i] <= WordZeroVal;
+ end else if (we_a_dec[i]) begin
+ rf_reg_q[i] <= wdata_a_i;
+ end
+ end
+ end
+
+ // With dummy instructions enabled, R0 behaves as a real register but will always return 0 for
+ // real instructions.
+ if (DummyInstructions) begin : g_dummy_r0
+ logic we_r0_dummy;
+ logic [DataWidth-1:0] rf_r0_q;
+
+ // Write enable for dummy R0 register (waddr_a_i will always be 0 for dummy instructions)
+ assign we_r0_dummy = we_a_i & dummy_instr_id_i;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rf_r0_q <= WordZeroVal;
+ end else if (we_r0_dummy) begin
+ rf_r0_q <= wdata_a_i;
+ end
+ end
+
+ // Output the dummy data for dummy instructions, otherwise R0 reads as zero
+ assign rf_reg[0] = dummy_instr_id_i ? rf_r0_q : WordZeroVal;
+
+ end else begin : g_normal_r0
+ logic unused_dummy_instr_id;
+ assign unused_dummy_instr_id = dummy_instr_id_i;
+
+ // R0 is nil
+ assign rf_reg[0] = WordZeroVal;
+ end
+
+ assign rf_reg[NUM_WORDS-1:1] = rf_reg_q[NUM_WORDS-1:1];
+
+ assign rdata_a_o = rf_reg[raddr_a_i];
+ assign rdata_b_o = rf_reg[raddr_b_i];
+
+ // Signal not used in FF register file
+ logic unused_test_en;
+ assign unused_test_en = test_en_i;
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_register_file_fpga.sv b/hw/ip/cheriot-ibex/rtl/cheriot_register_file_fpga.sv
new file mode 100644
index 0000000..2c00bc6
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_register_file_fpga.sv
@@ -0,0 +1,83 @@
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * RISC-V register file
+ *
+ * Register file with 31 or 15x 32 bit wide registers. Register 0 is fixed to 0.
+ *
+ * This register file is designed to make FPGA synthesis tools infer RAM primitives. For Xilinx
+ * FPGA architectures, it will produce RAM32M primitives. Other vendors have not yet been tested.
+ */
+module cheriot_register_file_fpga #(
+ parameter bit RV32E = 0,
+ parameter int unsigned DataWidth = 32,
+ parameter bit DummyInstructions = 0,
+ parameter logic [DataWidth-1:0] WordZeroVal = '0
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic test_en_i,
+ input logic dummy_instr_id_i,
+
+ //Read port R1
+ input logic [ 4:0] raddr_a_i,
+ output logic [DataWidth-1:0] rdata_a_o,
+ //Read port R2
+ input logic [ 4:0] raddr_b_i,
+ output logic [DataWidth-1:0] rdata_b_o,
+ // Write port W1
+ input logic [ 4:0] waddr_a_i,
+ input logic [DataWidth-1:0] wdata_a_i,
+ input logic we_a_i
+);
+
+ localparam int ADDR_WIDTH = RV32E ? 4 : 5;
+ localparam int NUM_WORDS = 2 ** ADDR_WIDTH;
+
+ logic [DataWidth-1:0] mem[NUM_WORDS];
+ logic we; // write enable if writing to any register other than R0
+
+ // async_read a
+ assign rdata_a_o = (raddr_a_i == '0) ? '0 : mem[raddr_a_i];
+
+ // async_read b
+ assign rdata_b_o = (raddr_b_i == '0) ? '0 : mem[raddr_b_i];
+
+ // we select
+ assign we = (waddr_a_i == '0) ? 1'b0 : we_a_i;
+
+ // Note that the SystemVerilog LRM requires variables on the LHS of assignments within
+ // "always_ff" to not be written to by any other process. However, to enable the initialization
+ // of the inferred RAM32M primitives with non-zero values, below "initial" procedure is needed.
+ // Therefore, we use "always" instead of the generally preferred "always_ff" for the synchronous
+ // write procedure.
+ always @(posedge clk_i) begin : sync_write
+ if (we == 1'b1) begin
+ mem[waddr_a_i] <= wdata_a_i;
+ end
+ end : sync_write
+
+ // Make sure we initialize the BRAM with the correct register reset value.
+ initial begin
+ for (int k = 0; k < NUM_WORDS; k++) begin
+ mem[k] = WordZeroVal;
+ end
+ end
+
+ // Reset not used in this register file version
+ logic unused_rst_ni;
+ assign unused_rst_ni = rst_ni;
+
+ // Dummy instruction changes not relevant for FPGA implementation
+ logic unused_dummy_instr;
+ assign unused_dummy_instr = dummy_instr_id_i;
+ // Test enable signal not used in FPGA implementation
+ logic unused_test_en;
+ assign unused_test_en = test_en_i;
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_register_file_latch.sv b/hw/ip/cheriot-ibex/rtl/cheriot_register_file_latch.sv
new file mode 100644
index 0000000..d953b79
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_register_file_latch.sv
@@ -0,0 +1,163 @@
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * RISC-V register file
+ *
+ * Register file with 31 or 15x 32 bit wide registers. Register 0 is fixed to 0.
+ * This register file is based on latches and is thus smaller than the flip-flop
+ * based RF. It requires a target technology-specific clock gating cell. Use this
+ * register file when targeting ASIC synthesis or event-based simulators.
+ */
+module cheriot_register_file_latch #(
+ parameter bit RV32E = 0,
+ parameter int unsigned DataWidth = 32,
+ parameter bit DummyInstructions = 0,
+ parameter logic [DataWidth-1:0] WordZeroVal = '0
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic test_en_i,
+ input logic dummy_instr_id_i,
+
+ //Read port R1
+ input logic [4:0] raddr_a_i,
+ output logic [DataWidth-1:0] rdata_a_o,
+
+ //Read port R2
+ input logic [4:0] raddr_b_i,
+ output logic [DataWidth-1:0] rdata_b_o,
+
+ // Write port W1
+ input logic [4:0] waddr_a_i,
+ input logic [DataWidth-1:0] wdata_a_i,
+ input logic we_a_i
+
+);
+
+ localparam int unsigned ADDR_WIDTH = RV32E ? 4 : 5;
+ localparam int unsigned NUM_WORDS = 2**ADDR_WIDTH;
+
+ logic [DataWidth-1:0] mem[NUM_WORDS];
+
+ logic [NUM_WORDS-1:1] waddr_onehot_a;
+
+ logic [NUM_WORDS-1:1] mem_clocks;
+ logic [DataWidth-1:0] wdata_a_q;
+
+ // internal addresses
+ logic [ADDR_WIDTH-1:0] raddr_a_int, raddr_b_int, waddr_a_int;
+
+ assign raddr_a_int = raddr_a_i[ADDR_WIDTH-1:0];
+ assign raddr_b_int = raddr_b_i[ADDR_WIDTH-1:0];
+ assign waddr_a_int = waddr_a_i[ADDR_WIDTH-1:0];
+
+ logic clk_int;
+
+ //////////
+ // READ //
+ //////////
+ assign rdata_a_o = mem[raddr_a_int];
+ assign rdata_b_o = mem[raddr_b_int];
+
+ ///////////
+ // WRITE //
+ ///////////
+ // Global clock gating
+ prim_clock_gating cg_we_global (
+ .clk_i ( clk_i ),
+ .en_i ( we_a_i ),
+ .test_en_i ( test_en_i ),
+ .clk_o ( clk_int )
+ );
+
+ // Sample input data
+ // Use clk_int here, since otherwise we don't want to write anything anyway.
+ always_ff @(posedge clk_int or negedge rst_ni) begin : sample_wdata
+ if (!rst_ni) begin
+ wdata_a_q <= WordZeroVal;
+ end else begin
+ if (we_a_i) begin
+ wdata_a_q <= wdata_a_i;
+ end
+ end
+ end
+
+ // Write address decoding
+ always_comb begin : wad
+ for (int i = 1; i < NUM_WORDS; i++) begin : wad_word_iter
+ if (we_a_i && (waddr_a_int == 5'(i))) begin
+ waddr_onehot_a[i] = 1'b1;
+ end else begin
+ waddr_onehot_a[i] = 1'b0;
+ end
+ end
+ end
+
+ // Individual clock gating (if integrated clock-gating cells are available)
+ for (genvar x = 1; x < NUM_WORDS; x++) begin : gen_cg_word_iter
+ prim_clock_gating cg_i (
+ .clk_i ( clk_int ),
+ .en_i ( waddr_onehot_a[x] ),
+ .test_en_i ( test_en_i ),
+ .clk_o ( mem_clocks[x] )
+ );
+ end
+
+ // Actual write operation:
+ // Generate the sequential process for the NUM_WORDS words of the memory.
+ // The process is synchronized with the clocks mem_clocks[i], i = 1, ..., NUM_WORDS-1.
+ for (genvar i = 1; i < NUM_WORDS; i++) begin : g_rf_latches
+ always_latch begin
+ if (mem_clocks[i]) begin
+ mem[i] = wdata_a_q;
+ end
+ end
+ end
+
+ // With dummy instructions enabled, R0 behaves as a real register but will always return 0 for
+ // real instructions.
+ if (DummyInstructions) begin : g_dummy_r0
+ logic we_r0_dummy;
+ logic r0_clock;
+ logic [DataWidth-1:0] mem_r0;
+
+ // Write enable for dummy R0 register (waddr_a_i will always be 0 for dummy instructions)
+ assign we_r0_dummy = we_a_i & dummy_instr_id_i;
+
+ // R0 clock gate
+ prim_clock_gating cg_i (
+ .clk_i ( clk_int ),
+ .en_i ( we_r0_dummy ),
+ .test_en_i ( test_en_i ),
+ .clk_o ( r0_clock )
+ );
+
+ always_latch begin : latch_wdata
+ if (r0_clock) begin
+ mem_r0 = wdata_a_q;
+ end
+ end
+
+ // Output the dummy data for dummy instructions, otherwise R0 reads as zero
+ assign mem[0] = dummy_instr_id_i ? mem_r0 : WordZeroVal;
+
+ end else begin : g_normal_r0
+ logic unused_dummy_instr_id;
+ assign unused_dummy_instr_id = dummy_instr_id_i;
+
+ assign mem[0] = WordZeroVal;
+ end
+
+`ifdef VERILATOR
+ initial begin
+ $display("Latch-based register file not supported for Verilator simulation");
+ $fatal;
+ end
+`endif
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_top.sv b/hw/ip/cheriot-ibex/rtl/cheriot_top.sv
new file mode 100644
index 0000000..7dd2663
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_top.sv
@@ -0,0 +1,1191 @@
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+`ifdef RISCV_FORMAL
+ `define RVFI
+`endif
+
+`include "prim_assert.sv"
+
+/**
+ * Top level module of the ibex RISC-V core
+ */
+module cheriot_top import cheriot_pkg::*; import cheri_pkg::*; #(
+ parameter bit PMPEnable = 1'b0,
+ parameter int unsigned PMPGranularity = 0,
+ parameter int unsigned PMPNumRegions = 4,
+ parameter int unsigned MHPMCounterNum = 0,
+ parameter int unsigned MHPMCounterWidth = 40,
+ parameter bit RV32E = 1'b0,
+ parameter rv32m_e RV32M = RV32MFast,
+ parameter rv32b_e RV32B = RV32BNone,
+ parameter regfile_e RegFile = RegFileFF,
+ parameter bit BranchTargetALU = 1'b0,
+ parameter bit WritebackStage = 1'b0,
+ parameter bit ICache = 1'b0,
+ parameter bit ICacheECC = 1'b0,
+ parameter bit BranchPredictor = 1'b0,
+ parameter bit DbgTriggerEn = 1'b0,
+ parameter int unsigned DbgHwBreakNum = 1,
+ parameter bit SecureIbex = 1'b0,
+ parameter bit ICacheScramble = 1'b0,
+ parameter lfsr_seed_t RndCnstLfsrSeed = RndCnstLfsrSeedDefault,
+ parameter lfsr_perm_t RndCnstLfsrPerm = RndCnstLfsrPermDefault,
+ parameter int unsigned DmHaltAddr = 32'h1A110800,
+ parameter int unsigned DmExceptionAddr = 32'h1A110808,
+ // Default seed and nonce for scrambling
+ parameter logic [SCRAMBLE_KEY_W-1:0] RndCnstIbexKey = RndCnstIbexKeyDefault,
+ parameter logic [SCRAMBLE_NONCE_W-1:0] RndCnstIbexNonce = RndCnstIbexNonceDefault,
+ // CHERIoT paramters
+ parameter bit CHERIoTEn = 1'b1,
+ parameter int unsigned DataWidth = 33,
+ parameter int unsigned HeapBase = 32'h2001_0000,
+ parameter int unsigned TSMapBase = 32'h2002_f000, // 4kB default
+ parameter int unsigned TSMapSize = 1024, // 32-bit words
+ parameter bit MemCapFmt = 1'b0,
+ parameter bit CheriPPLBC = 1'b1,
+ parameter bit CheriSBND2 = 1'b0,
+ parameter bit CheriTBRE = 1'b1,
+ parameter bit CheriStkZ = 1'b1,
+ parameter int unsigned MMRegDinW = 128,
+ parameter int unsigned MMRegDoutW = 64
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic test_en_i, // enable all clock gates for testing
+ input prim_ram_1p_pkg::ram_1p_cfg_t ram_cfg_i,
+
+ input logic cheri_pmode_i,
+ input logic cheri_tsafe_en_i,
+
+ input logic [31:0] hart_id_i,
+ input logic [31:0] boot_addr_i,
+
+ // Instruction memory interface
+ output logic instr_req_o,
+ input logic instr_gnt_i,
+ input logic instr_rvalid_i,
+ output logic [31:0] instr_addr_o,
+ input logic [31:0] instr_rdata_i,
+ input logic [6:0] instr_rdata_intg_i,
+ input logic instr_err_i,
+
+ // Data memory interface
+ output logic data_req_o,
+ output logic data_is_cap_o,
+ input logic data_gnt_i,
+ input logic data_rvalid_i,
+ output logic data_we_o,
+ output logic [3:0] data_be_o,
+ output logic [31:0] data_addr_o,
+ output logic [DataWidth-1:0] data_wdata_o,
+ output logic [6:0] data_wdata_intg_o,
+ input logic [DataWidth-1:0] data_rdata_i,
+ input logic [6:0] data_rdata_intg_i,
+ input logic data_err_i,
+
+ // TS map memory interface
+ output logic tsmap_cs_o,
+ output logic [15:0] tsmap_addr_o,
+ input logic [31:0] tsmap_rdata_i,
+ input logic [6:0] tsmap_rdata_intg_i,
+ input logic [MMRegDinW-1:0] mmreg_corein_i,
+ output logic [MMRegDoutW-1:0] mmreg_coreout_o,
+
+ // Interrupt inputs
+ input logic irq_software_i,
+ input logic irq_timer_i,
+ input logic irq_external_i,
+ input logic [14:0] irq_fast_i,
+ input logic irq_nm_i, // non-maskeable interrupt
+
+ // Scrambling Interface
+ input logic scramble_key_valid_i,
+ input logic [SCRAMBLE_KEY_W-1:0] scramble_key_i,
+ input logic [SCRAMBLE_NONCE_W-1:0] scramble_nonce_i,
+ output logic scramble_req_o,
+
+ // Debug Interface
+ input logic debug_req_i,
+ output crash_dump_t crash_dump_o,
+ output logic double_fault_seen_o,
+
+ // RISC-V Formal Interface
+ // Does not comply with the coding standards of _i/_o suffixes, but follows
+ // the convention of RISC-V Formal Interface Specification.
+`ifdef RVFI
+ output logic rvfi_valid,
+ output logic [63:0] rvfi_order,
+ output logic [31:0] rvfi_insn,
+ output logic rvfi_trap,
+ output logic rvfi_halt,
+ output logic rvfi_intr,
+ output logic [ 1:0] rvfi_mode,
+ output logic [ 1:0] rvfi_ixl,
+ output logic [ 4:0] rvfi_rs1_addr,
+ output logic [ 4:0] rvfi_rs2_addr,
+ output logic [ 4:0] rvfi_rs3_addr,
+ output logic [31:0] rvfi_rs1_rdata,
+ output logic [31:0] rvfi_rs2_rdata,
+ output logic [31:0] rvfi_rs3_rdata,
+ output reg_cap_t rvfi_rs1_rcap,
+ output reg_cap_t rvfi_rs2_rcap,
+ output reg_cap_t rvfi_rd_wcap,
+ output logic [ 4:0] rvfi_rd_addr,
+ output logic [31:0] rvfi_rd_wdata,
+ output logic [31:0] rvfi_pc_rdata,
+ output logic [31:0] rvfi_pc_wdata,
+ output logic [31:0] rvfi_mem_addr,
+ output logic [ 3:0] rvfi_mem_rmask,
+ output logic [ 3:0] rvfi_mem_wmask,
+ output logic [DataWidth-1:0] rvfi_mem_rdata,
+ output logic [DataWidth-1:0] rvfi_mem_wdata,
+ output logic rvfi_mem_is_cap,
+ output reg_cap_t rvfi_mem_rcap,
+ output reg_cap_t rvfi_mem_wcap,
+
+ output logic [31:0] rvfi_ext_mip,
+ output logic rvfi_ext_nmi,
+ output logic rvfi_ext_debug_req,
+ output logic [63:0] rvfi_ext_mcycle,
+`endif
+
+ // CPU Control Signals
+ input fetch_enable_t fetch_enable_i,
+ output logic alert_minor_o,
+ output logic alert_major_internal_o,
+ output logic alert_major_bus_o,
+ output logic core_sleep_o,
+
+ // DFT bypass controls
+ input logic scan_rst_ni
+);
+
+ localparam bit Lockstep = SecureIbex;
+ localparam bit ResetAll = Lockstep;
+ localparam bit DummyInstructions = SecureIbex;
+ localparam bit RegFileECC = SecureIbex;
+ localparam int unsigned RegFileDataWidth = RegFileECC ? 32 + 7 : 32;
+ // Icache parameters
+ localparam int unsigned BusSizeECC = ICacheECC ? (BUS_SIZE + 7) : BUS_SIZE;
+ localparam int unsigned LineSizeECC = BusSizeECC * IC_LINE_BEATS;
+ localparam int unsigned TagSizeECC = ICacheECC ? (IC_TAG_SIZE + 6) : IC_TAG_SIZE;
+ // Scrambling Parameter
+ localparam int unsigned NumAddrScrRounds = ICacheScramble ? 2 : 0;
+ localparam int unsigned NumDiffRounds = NumAddrScrRounds;
+
+ // Clock signals
+ logic clk;
+ logic core_busy_d, core_busy_q;
+ logic clock_en;
+ logic irq_pending;
+ // Core <-> Register file signals
+ logic dummy_instr_id;
+ logic [4:0] rf_raddr_a;
+ logic [4:0] rf_raddr_b;
+ logic [4:0] rf_waddr_wb;
+ logic rf_we_wb;
+ logic [RegFileDataWidth-1:0] rf_wdata_wb_ecc;
+ logic [RegFileDataWidth-1:0] rf_rdata_a_ecc, rf_rdata_a_ecc_buf;
+ logic [RegFileDataWidth-1:0] rf_rdata_b_ecc, rf_rdata_b_ecc_buf;
+ reg_cap_t rf_rcap_a, rf_rcap_b;
+ reg_cap_t rf_wcap;
+
+ // Core <-> RAMs signals
+ logic [IC_NUM_WAYS-1:0] ic_tag_req;
+ logic ic_tag_write;
+ logic [IC_INDEX_W-1:0] ic_tag_addr;
+ logic [TagSizeECC-1:0] ic_tag_wdata;
+ logic [TagSizeECC-1:0] ic_tag_rdata [IC_NUM_WAYS];
+ logic [IC_NUM_WAYS-1:0] ic_data_req;
+ logic ic_data_write;
+ logic [IC_INDEX_W-1:0] ic_data_addr;
+ logic [LineSizeECC-1:0] ic_data_wdata;
+ logic [LineSizeECC-1:0] ic_data_rdata [IC_NUM_WAYS];
+ // Alert signals
+ logic core_alert_major, core_alert_minor;
+ logic lockstep_alert_major_internal, lockstep_alert_major_bus;
+ logic lockstep_alert_minor;
+ // Scramble signals
+ logic icache_inval;
+ logic [SCRAMBLE_KEY_W-1:0] scramble_key_q;
+ logic [SCRAMBLE_NONCE_W-1:0] scramble_nonce_q;
+ logic scramble_key_valid_d, scramble_key_valid_q;
+ logic scramble_req_d, scramble_req_q;
+
+ fetch_enable_t fetch_enable_buf;
+
+ logic [31:0] rf_reg_rdy;
+ logic [4:0] rf_trvk_addr;
+ logic rf_trvk_en;
+ logic rf_trvk_clrtag;
+ logic [6:0] rf_trvk_par;
+ logic [4:0] rf_trsv_addr;
+ logic rf_trsv_en;
+ logic [6:0] rf_trsv_par;
+ logic rf_alert;
+
+ /////////////////////
+ // Main clock gate //
+ /////////////////////
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ core_busy_q <= 1'b0;
+ end else begin
+ core_busy_q <= core_busy_d;
+ end
+ end
+
+ assign clock_en = core_busy_q | debug_req_i | irq_pending | irq_nm_i;
+ assign core_sleep_o = ~clock_en;
+
+ prim_clock_gating core_clock_gate_i (
+ .clk_i (clk_i),
+ .en_i (clock_en),
+ .test_en_i(test_en_i),
+ .clk_o (clk)
+ );
+
+ ////////////////////////
+ // Core instantiation //
+ ////////////////////////
+
+ // Buffer security critical signals to prevent synthesis optimisation removing them
+ prim_buf #(.Width($bits(fetch_enable_t))) u_fetch_enable_buf (
+ .in_i (fetch_enable_i),
+ .out_o(fetch_enable_buf)
+ );
+
+ prim_buf #(.Width(RegFileDataWidth)) u_rf_rdata_a_ecc_buf (
+ .in_i (rf_rdata_a_ecc),
+ .out_o(rf_rdata_a_ecc_buf)
+ );
+
+ prim_buf #(.Width(RegFileDataWidth)) u_rf_rdata_b_ecc_buf (
+ .in_i (rf_rdata_b_ecc),
+ .out_o(rf_rdata_b_ecc_buf)
+ );
+
+ cheriot_core #(
+ .PMPEnable (PMPEnable),
+ .PMPGranularity (PMPGranularity),
+ .PMPNumRegions (PMPNumRegions),
+ .MHPMCounterNum (MHPMCounterNum),
+ .MHPMCounterWidth (MHPMCounterWidth),
+ .RV32E (RV32E),
+ .RV32M (RV32M),
+ .RV32B (RV32B),
+ .BranchTargetALU (BranchTargetALU),
+ .ICache (ICache),
+ .ICacheECC (ICacheECC),
+ .BusSizeECC (BusSizeECC),
+ .TagSizeECC (TagSizeECC),
+ .LineSizeECC (LineSizeECC),
+ .BranchPredictor (BranchPredictor),
+ .DbgTriggerEn (DbgTriggerEn),
+ .DbgHwBreakNum (DbgHwBreakNum),
+ .WritebackStage (WritebackStage),
+ .ResetAll (ResetAll),
+ .RndCnstLfsrSeed (RndCnstLfsrSeed),
+ .RndCnstLfsrPerm (RndCnstLfsrPerm),
+ .SecureIbex (SecureIbex),
+ .DummyInstructions(DummyInstructions),
+ .RegFileECC (RegFileECC),
+ .RegFileDataWidth (RegFileDataWidth),
+ .DmHaltAddr (DmHaltAddr),
+ .DmExceptionAddr (DmExceptionAddr),
+ .CHERIoTEn (CHERIoTEn),
+ .DataWidth (DataWidth),
+ .HeapBase (HeapBase ),
+ .TSMapBase (TSMapBase ),
+ .TSMapSize (TSMapSize),
+ .MemCapFmt (MemCapFmt ),
+ .CheriPPLBC (CheriPPLBC),
+ .CheriSBND2 (CheriSBND2),
+ .CheriTBRE (CheriTBRE),
+ .CheriStkZ (CheriStkZ)
+ ) u_cheriot_core (
+ .clk_i(clk),
+ .rst_ni,
+
+ .hart_id_i,
+ .boot_addr_i,
+ .cheri_pmode_i,
+ .cheri_tsafe_en_i,
+
+ .instr_req_o,
+ .instr_gnt_i,
+ .instr_rvalid_i,
+ .instr_addr_o,
+ .instr_rdata_i,
+ .instr_err_i,
+
+ .data_req_o,
+ .data_is_cap_o,
+ .data_gnt_i,
+ .data_rvalid_i,
+ .data_we_o,
+ .data_be_o,
+ .data_addr_o,
+ .data_wdata_o,
+ .data_rdata_i,
+ .data_err_i,
+
+ .dummy_instr_id_o (dummy_instr_id),
+ .rf_raddr_a_o (rf_raddr_a),
+ .rf_raddr_b_o (rf_raddr_b),
+ .rf_waddr_wb_o (rf_waddr_wb),
+ .rf_we_wb_o (rf_we_wb),
+ .rf_wdata_wb_ecc_o(rf_wdata_wb_ecc),
+ .rf_rdata_a_ecc_i (rf_rdata_a_ecc_buf),
+ .rf_rdata_b_ecc_i (rf_rdata_b_ecc_buf),
+ .rf_wcap_wb_o (rf_wcap),
+ .rf_rcap_a_i (rf_rcap_a),
+ .rf_rcap_b_i (rf_rcap_b),
+ .rf_reg_rdy_i (rf_reg_rdy),
+ .rf_trsv_en_o (rf_trsv_en),
+ .rf_trsv_addr_o (rf_trsv_addr),
+ .rf_trsv_par_o (rf_trsv_par),
+ .rf_trvk_addr_o (rf_trvk_addr),
+ .rf_trvk_en_o (rf_trvk_en ),
+ .rf_trvk_clrtag_o (rf_trvk_clrtag),
+ .rf_trvk_par_o (rf_trvk_par),
+ .tsmap_cs_o,
+ .tsmap_addr_o,
+ .tsmap_rdata_i,
+ .mmreg_corein_i,
+ .mmreg_coreout_o,
+
+ .ic_tag_req_o (ic_tag_req),
+ .ic_tag_write_o (ic_tag_write),
+ .ic_tag_addr_o (ic_tag_addr),
+ .ic_tag_wdata_o (ic_tag_wdata),
+ .ic_tag_rdata_i (ic_tag_rdata),
+ .ic_data_req_o (ic_data_req),
+ .ic_data_write_o (ic_data_write),
+ .ic_data_addr_o (ic_data_addr),
+ .ic_data_wdata_o (ic_data_wdata),
+ .ic_data_rdata_i (ic_data_rdata),
+ .ic_scr_key_valid_i(scramble_key_valid_q),
+
+ .irq_software_i,
+ .irq_timer_i,
+ .irq_external_i,
+ .irq_fast_i,
+ .irq_nm_i,
+ .irq_pending_o(irq_pending),
+
+ .debug_req_i,
+ .crash_dump_o,
+ .double_fault_seen_o,
+
+`ifdef RVFI
+ .rvfi_valid,
+ .rvfi_order,
+ .rvfi_insn,
+ .rvfi_trap,
+ .rvfi_halt,
+ .rvfi_intr,
+ .rvfi_mode,
+ .rvfi_ixl,
+ .rvfi_rs1_addr,
+ .rvfi_rs2_addr,
+ .rvfi_rs3_addr,
+ .rvfi_rs1_rdata,
+ .rvfi_rs1_rcap,
+ .rvfi_rs2_rdata,
+ .rvfi_rs2_rcap,
+ .rvfi_rs3_rdata,
+ .rvfi_rd_addr,
+ .rvfi_rd_wdata,
+ .rvfi_rd_wcap,
+ .rvfi_pc_rdata,
+ .rvfi_pc_wdata,
+ .rvfi_mem_addr,
+ .rvfi_mem_rmask,
+ .rvfi_mem_wmask,
+ .rvfi_mem_rdata,
+ .rvfi_mem_wdata,
+ .rvfi_mem_rcap,
+ .rvfi_mem_wcap,
+ .rvfi_mem_is_cap,
+ .rvfi_ext_mip,
+ .rvfi_ext_nmi,
+ .rvfi_ext_debug_req,
+ .rvfi_ext_mcycle,
+`endif
+
+ .fetch_enable_i(fetch_enable_buf),
+ .alert_minor_o (core_alert_minor),
+ .alert_major_o (core_alert_major),
+ .icache_inval_o(icache_inval),
+ .core_busy_o (core_busy_d)
+ );
+
+ /////////////////////////////////
+ // Register file Instantiation //
+ /////////////////////////////////
+ if (!CHERIoTEn) begin
+ assign rf_alert = 1'b0; // rf_alert only available in cheri_regfile
+ end
+
+ if (CHERIoTEn) begin : gen_regfile_cheriot
+
+ localparam int unsigned NRegs = RV32E? 16 : 32;
+ localparam int unsigned NCaps = 16;
+
+ cheri_regfile #(
+ .NREGS (NRegs),
+ .NCAPS (NCaps),
+ .RegFileECC(RegFileECC),
+ .DataWidth (RegFileDataWidth),
+ .CheriPPLBC(CheriPPLBC)
+ ) register_file_i (
+ .clk_i (clk),
+ .rst_ni (rst_ni),
+ .par_rst_ni (rst_ni),
+ .raddr_a_i (rf_raddr_a),
+ .rdata_a_o (rf_rdata_a_ecc),
+ .rcap_a_o (rf_rcap_a),
+ .raddr_b_i (rf_raddr_b),
+ .rdata_b_o (rf_rdata_b_ecc),
+ .rcap_b_o (rf_rcap_b),
+ .waddr_a_i (rf_waddr_wb),
+ .wdata_a_i (rf_wdata_wb_ecc),
+ .wcap_a_i (rf_wcap),
+ .we_a_i (rf_we_wb),
+ .reg_rdy_o (rf_reg_rdy),
+ .trvk_addr_i (rf_trvk_addr),
+ .trvk_en_i (rf_trvk_en),
+ .trvk_clrtag_i (rf_trvk_clrtag),
+ .trvk_par_i (rf_trvk_par),
+ .trsv_addr_i (rf_trsv_addr),
+ .trsv_en_i (rf_trsv_en),
+ .trsv_par_i (rf_trsv_par),
+ .alert_o (rf_alert)
+ );
+
+ end else if (RegFile == RegFileFF) begin : gen_regfile_ff
+ cheriot_register_file_ff #(
+ .RV32E (RV32E),
+ .DataWidth (RegFileDataWidth),
+ .DummyInstructions(DummyInstructions),
+ .WordZeroVal (RegFileDataWidth'(prim_secded_pkg::SecdedInv3932ZeroWord))
+ ) register_file_i (
+ .clk_i (clk),
+ .rst_ni(rst_ni),
+
+ .test_en_i (test_en_i),
+ .dummy_instr_id_i(dummy_instr_id),
+
+ .raddr_a_i(rf_raddr_a),
+ .rdata_a_o(rf_rdata_a_ecc),
+ .raddr_b_i(rf_raddr_b),
+ .rdata_b_o(rf_rdata_b_ecc),
+ .waddr_a_i(rf_waddr_wb),
+ .wdata_a_i(rf_wdata_wb_ecc),
+ .we_a_i (rf_we_wb)
+ );
+
+ assign rf_rcap_a = NULL_REG_CAP;
+ assign rf_rcap_b = NULL_REG_CAP;
+ assign rf_reg_rdy = {32{1'b1}};
+
+ end else if (RegFile == RegFileFPGA) begin : gen_regfile_fpga
+ cheriot_register_file_fpga #(
+ .RV32E (RV32E),
+ .DataWidth (RegFileDataWidth),
+ .DummyInstructions(DummyInstructions),
+ .WordZeroVal (RegFileDataWidth'(prim_secded_pkg::SecdedInv3932ZeroWord))
+ ) register_file_i (
+ .clk_i (clk),
+ .rst_ni(rst_ni),
+
+ .test_en_i (test_en_i),
+ .dummy_instr_id_i(dummy_instr_id),
+
+ .raddr_a_i(rf_raddr_a),
+ .rdata_a_o(rf_rdata_a_ecc),
+ .raddr_b_i(rf_raddr_b),
+ .rdata_b_o(rf_rdata_b_ecc),
+ .waddr_a_i(rf_waddr_wb),
+ .wdata_a_i(rf_wdata_wb_ecc),
+ .we_a_i (rf_we_wb)
+ );
+
+ assign rf_rcap_a = NULL_REG_CAP;
+ assign rf_rcap_b = NULL_REG_CAP;
+ assign rf_reg_rdy = {32{1'b1}};
+
+ end else if (RegFile == RegFileLatch) begin : gen_regfile_latch
+ cheriot_register_file_latch #(
+ .RV32E (RV32E),
+ .DataWidth (RegFileDataWidth),
+ .DummyInstructions(DummyInstructions),
+ .WordZeroVal (RegFileDataWidth'(prim_secded_pkg::SecdedInv3932ZeroWord))
+ ) register_file_i (
+ .clk_i (clk),
+ .rst_ni(rst_ni),
+
+ .test_en_i (test_en_i),
+ .dummy_instr_id_i(dummy_instr_id),
+
+ .raddr_a_i(rf_raddr_a),
+ .rdata_a_o(rf_rdata_a_ecc),
+ .raddr_b_i(rf_raddr_b),
+ .rdata_b_o(rf_rdata_b_ecc),
+ .waddr_a_i(rf_waddr_wb),
+ .wdata_a_i(rf_wdata_wb_ecc),
+ .we_a_i (rf_we_wb)
+ );
+
+ assign rf_rcap_a = NULL_REG_CAP;
+ assign rf_rcap_b = NULL_REG_CAP;
+ assign rf_reg_rdy = {32{1'b1}};
+
+ end
+
+ ///////////////////////////////
+ // Scrambling Infrastructure //
+ ///////////////////////////////
+
+ if (ICacheScramble) begin : gen_scramble
+
+ // Scramble key valid starts with OTP returning new valid key and stays high
+ // until we request a new valid key.
+ assign scramble_key_valid_d = scramble_req_q ? scramble_key_valid_i :
+ icache_inval ? 1'b0 :
+ scramble_key_valid_q;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ scramble_key_q <= RndCnstIbexKey;
+ scramble_nonce_q <= RndCnstIbexNonce;
+ end else if (scramble_key_valid_i) begin
+ scramble_key_q <= scramble_key_i;
+ scramble_nonce_q <= scramble_nonce_i;
+ end
+ end
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ scramble_key_valid_q <= 1'b1;
+ scramble_req_q <= '0;
+ end else begin
+ scramble_key_valid_q <= scramble_key_valid_d;
+ scramble_req_q <= scramble_req_d;
+ end
+ end
+
+ // Scramble key request starts with invalidate signal from ICache and stays high
+ // until we got a valid key.
+ assign scramble_req_d = scramble_req_q ? ~scramble_key_valid_i : icache_inval;
+ assign scramble_req_o = scramble_req_q;
+
+ end else begin : gen_noscramble
+
+ logic unused_scramble_inputs = scramble_key_valid_i & (|scramble_key_i) & (|RndCnstIbexKey) &
+ (|scramble_nonce_i) & (|RndCnstIbexNonce) & scramble_req_q &
+ icache_inval & scramble_key_valid_d & scramble_req_d;
+
+ assign scramble_req_d = 1'b0;
+ assign scramble_req_q = 1'b0;
+ assign scramble_req_o = 1'b0;
+ assign scramble_key_q = '0;
+ assign scramble_nonce_q = '0;
+ assign scramble_key_valid_q = 1'b1;
+ assign scramble_key_valid_d = 1'b1;
+ end
+
+ ////////////////////////
+ // Rams Instantiation //
+ ////////////////////////
+
+ if (ICache) begin : gen_rams
+
+ for (genvar way = 0; way < IC_NUM_WAYS; way++) begin : gen_rams_inner
+
+ // Tag RAM instantiation
+ prim_ram_1p_scr #(
+ .Width (TagSizeECC),
+ .Depth (IC_NUM_LINES),
+ .DataBitsPerMask (TagSizeECC),
+ .EnableParity (0),
+ .DiffWidth (TagSizeECC),
+ .NumAddrScrRounds (NumAddrScrRounds),
+ .NumDiffRounds (NumDiffRounds)
+ ) tag_bank (
+ .clk_i,
+ .rst_ni,
+
+ .key_valid_i (scramble_key_valid_q),
+ .key_i (scramble_key_q),
+ .nonce_i (scramble_nonce_q),
+
+ .req_i (ic_tag_req[way]),
+
+ .gnt_o (),
+ .write_i (ic_tag_write),
+ .addr_i (ic_tag_addr),
+ .wdata_i (ic_tag_wdata),
+ .wmask_i ({TagSizeECC{1'b1}}),
+ .intg_error_i(1'b0),
+
+ .rdata_o (ic_tag_rdata[way]),
+ .rvalid_o (),
+ .raddr_o (),
+ .rerror_o (),
+ .cfg_i (ram_cfg_i)
+ );
+
+ // Data RAM instantiation
+ prim_ram_1p_scr #(
+ .Width (LineSizeECC),
+ .Depth (IC_NUM_LINES),
+ .DataBitsPerMask (LineSizeECC),
+ .ReplicateKeyStream (1),
+ .EnableParity (0),
+ .DiffWidth (LineSizeECC),
+ .NumAddrScrRounds (NumAddrScrRounds),
+ .NumDiffRounds (NumDiffRounds)
+ ) data_bank (
+ .clk_i,
+ .rst_ni,
+
+ .key_valid_i (scramble_key_valid_q),
+ .key_i (scramble_key_q),
+ .nonce_i (scramble_nonce_q),
+
+ .req_i (ic_data_req[way]),
+
+ .gnt_o (),
+ .write_i (ic_data_write),
+ .addr_i (ic_data_addr),
+ .wdata_i (ic_data_wdata),
+ .wmask_i ({LineSizeECC{1'b1}}),
+ .intg_error_i(1'b0),
+
+ .rdata_o (ic_data_rdata[way]),
+ .rvalid_o (),
+ .raddr_o (),
+ .rerror_o (),
+ .cfg_i (ram_cfg_i)
+ );
+ end
+
+ end else begin : gen_norams
+
+ prim_ram_1p_pkg::ram_1p_cfg_t unused_ram_cfg;
+ logic unused_ram_inputs;
+
+ assign unused_ram_cfg = ram_cfg_i;
+ assign unused_ram_inputs = (|ic_tag_req) & ic_tag_write & (|ic_tag_addr) & (|ic_tag_wdata) &
+ (|ic_data_req) & ic_data_write & (|ic_data_addr) & (|ic_data_wdata) &
+ (|scramble_key_q) & (|scramble_nonce_q) & scramble_key_valid_q &
+ scramble_key_valid_d & (|scramble_nonce_q) &
+ (|NumAddrScrRounds);
+
+ assign ic_tag_rdata = '{default:'b0};
+ assign ic_data_rdata = '{default:'b0};
+
+ end
+
+ // Redundant lockstep core implementation
+ if (Lockstep) begin : gen_lockstep
+ // Note: certain synthesis tools like DC are very smart at optimizing away redundant logic.
+ // Hence, we have to insert an optimization barrier at the IOs of the lockstep Ibex.
+ // This is achieved by manually buffering each bit using prim_buf.
+ // Our Xilinx and DC synthesis flows make sure that these buffers cannot be optimized away
+ // using keep attributes (Vivado) and size_only constraints (DC).
+ logic [37:0] rf_wcap_vec, rf_rcap_a_vec, rf_rcap_b_vec;
+
+ localparam int NumBufferBits = $bits({
+ hart_id_i,
+ boot_addr_i,
+ instr_req_o,
+ instr_gnt_i,
+ instr_rvalid_i,
+ instr_addr_o,
+ instr_rdata_i,
+ instr_rdata_intg_i,
+ instr_err_i,
+ data_req_o,
+ data_gnt_i,
+ data_rvalid_i,
+ data_we_o,
+ data_be_o,
+ data_addr_o,
+ data_wdata_o,
+ data_is_cap_o,
+ data_rdata_i,
+ data_rdata_intg_i,
+ data_err_i,
+ dummy_instr_id,
+ rf_raddr_a,
+ rf_raddr_b,
+ rf_waddr_wb,
+ rf_we_wb,
+ rf_wdata_wb_ecc,
+ rf_rdata_a_ecc,
+ rf_rdata_b_ecc,
+ ic_tag_req,
+ ic_tag_write,
+ ic_tag_addr,
+ ic_tag_wdata,
+ ic_data_req,
+ ic_data_write,
+ ic_data_addr,
+ ic_data_wdata,
+ scramble_key_valid_i,
+ irq_software_i,
+ irq_timer_i,
+ irq_external_i,
+ irq_fast_i,
+ irq_nm_i,
+ irq_pending,
+ debug_req_i,
+ crash_dump_o,
+ double_fault_seen_o,
+ fetch_enable_i,
+ icache_inval,
+ core_busy_d,
+ cheri_pmode_i,
+ cheri_tsafe_en_i,
+ rf_wcap_vec,
+ rf_rcap_a_vec,
+ rf_rcap_b_vec,
+ rf_reg_rdy,
+ rf_trsv_en,
+ rf_trsv_addr,
+ rf_trsv_par,
+ rf_trvk_addr,
+ rf_trvk_en,
+ rf_trvk_clrtag,
+ rf_trvk_par,
+ tsmap_cs_o,
+ tsmap_addr_o,
+ tsmap_rdata_i,
+ tsmap_rdata_intg_i,
+ mmreg_corein_i,
+ mmreg_coreout_o
+ });
+
+ logic [NumBufferBits-1:0] buf_in, buf_out;
+
+ logic [31:0] hart_id_local;
+ logic [31:0] boot_addr_local;
+
+ logic instr_req_local;
+ logic instr_gnt_local;
+ logic instr_rvalid_local;
+ logic [31:0] instr_addr_local;
+ logic [31:0] instr_rdata_local;
+ logic [6:0] instr_rdata_intg_local;
+ logic instr_err_local;
+
+ logic data_req_local;
+ logic data_gnt_local;
+ logic data_rvalid_local;
+ logic data_we_local;
+ logic [3:0] data_be_local;
+ logic [31:0] data_addr_local;
+ logic [DataWidth-1:0] data_wdata_local;
+ logic data_is_cap_local;
+ logic [6:0] data_wdata_intg_local;
+ logic [DataWidth-1:0] data_rdata_local;
+ logic [6:0] data_rdata_intg_local;
+ logic data_err_local;
+
+ logic dummy_instr_id_local;
+ logic [4:0] rf_raddr_a_local;
+ logic [4:0] rf_raddr_b_local;
+ logic [4:0] rf_waddr_wb_local;
+ logic rf_we_wb_local;
+ logic [RegFileDataWidth-1:0] rf_wdata_wb_ecc_local;
+ logic [RegFileDataWidth-1:0] rf_rdata_a_ecc_local;
+ logic [RegFileDataWidth-1:0] rf_rdata_b_ecc_local;
+
+ logic cheri_pmode_local;
+ logic cheri_tsafe_en_local;
+ logic [37:0] rf_wcap_vec_local;
+ logic [37:0] rf_rcap_a_vec_local;
+ logic [37:0] rf_rcap_b_vec_local;
+ logic [31:0] rf_reg_rdy_local;
+ logic rf_trsv_en_local;
+ logic [4:0] rf_trsv_addr_local;
+ logic [6:0] rf_trsv_par_local;
+ logic [4:0] rf_trvk_addr_local;
+ logic rf_trvk_en_local;
+ logic rf_trvk_clrtag_local;
+ logic [6:0] rf_trvk_par_local;
+ logic tsmap_cs_local;
+ logic [15:0] tsmap_addr_local;
+ logic [31:0] tsmap_rdata_local;
+ logic [6:0] tsmap_rdata_intg_local;
+ logic [MMRegDinW-1:0] mmreg_corein_local;
+ logic [MMRegDoutW-1:0] mmreg_coreout_local;
+ reg_cap_t rf_wcap_local, rf_rcap_a_local, rf_rcap_b_local;
+
+ logic [IC_NUM_WAYS-1:0] ic_tag_req_local;
+ logic ic_tag_write_local;
+ logic [IC_INDEX_W-1:0] ic_tag_addr_local;
+ logic [TagSizeECC-1:0] ic_tag_wdata_local;
+ logic [IC_NUM_WAYS-1:0] ic_data_req_local;
+ logic ic_data_write_local;
+ logic [IC_INDEX_W-1:0] ic_data_addr_local;
+ logic [LineSizeECC-1:0] ic_data_wdata_local;
+ logic scramble_key_valid_local;
+
+ logic irq_software_local;
+ logic irq_timer_local;
+ logic irq_external_local;
+ logic [14:0] irq_fast_local;
+ logic irq_nm_local;
+ logic irq_pending_local;
+
+ logic debug_req_local;
+ crash_dump_t crash_dump_local;
+ logic double_fault_seen_local;
+ fetch_enable_t fetch_enable_local;
+
+ logic icache_inval_local;
+ logic core_busy_local;
+
+ assign buf_in = {
+ hart_id_i,
+ boot_addr_i,
+ instr_req_o,
+ instr_gnt_i,
+ instr_rvalid_i,
+ instr_addr_o,
+ instr_rdata_i,
+ instr_rdata_intg_i,
+ instr_err_i,
+ data_req_o,
+ data_gnt_i,
+ data_rvalid_i,
+ data_we_o,
+ data_be_o,
+ data_addr_o,
+ data_wdata_o,
+ data_is_cap_o,
+ data_rdata_i,
+ data_rdata_intg_i,
+ data_err_i,
+ dummy_instr_id,
+ rf_raddr_a,
+ rf_raddr_b,
+ rf_waddr_wb,
+ rf_we_wb,
+ rf_wdata_wb_ecc,
+ rf_rdata_a_ecc,
+ rf_rdata_b_ecc,
+ ic_tag_req,
+ ic_tag_write,
+ ic_tag_addr,
+ ic_tag_wdata,
+ ic_data_req,
+ ic_data_write,
+ ic_data_addr,
+ ic_data_wdata,
+ scramble_key_valid_q,
+ irq_software_i,
+ irq_timer_i,
+ irq_external_i,
+ irq_fast_i,
+ irq_nm_i,
+ irq_pending,
+ debug_req_i,
+ crash_dump_o,
+ double_fault_seen_o,
+ fetch_enable_i,
+ icache_inval,
+ core_busy_d,
+ cheri_pmode_i,
+ cheri_tsafe_en_i,
+ rf_wcap_vec,
+ rf_rcap_a_vec,
+ rf_rcap_b_vec,
+ rf_reg_rdy,
+ rf_trsv_en,
+ rf_trsv_addr,
+ rf_trsv_par,
+ rf_trvk_addr,
+ rf_trvk_en,
+ rf_trvk_clrtag,
+ rf_trvk_par,
+ tsmap_cs_o,
+ tsmap_addr_o,
+ tsmap_rdata_i,
+ tsmap_rdata_intg_i,
+ mmreg_corein_i,
+ mmreg_coreout_o
+ };
+
+ assign {
+ hart_id_local,
+ boot_addr_local,
+ instr_req_local,
+ instr_gnt_local,
+ instr_rvalid_local,
+ instr_addr_local,
+ instr_rdata_local,
+ instr_rdata_intg_local,
+ instr_err_local,
+ data_req_local,
+ data_gnt_local,
+ data_rvalid_local,
+ data_we_local,
+ data_be_local,
+ data_addr_local,
+ data_wdata_local,
+ data_is_cap_local,
+ data_rdata_local,
+ data_rdata_intg_local,
+ data_err_local,
+ dummy_instr_id_local,
+ rf_raddr_a_local,
+ rf_raddr_b_local,
+ rf_waddr_wb_local,
+ rf_we_wb_local,
+ rf_wdata_wb_ecc_local,
+ rf_rdata_a_ecc_local,
+ rf_rdata_b_ecc_local,
+ ic_tag_req_local,
+ ic_tag_write_local,
+ ic_tag_addr_local,
+ ic_tag_wdata_local,
+ ic_data_req_local,
+ ic_data_write_local,
+ ic_data_addr_local,
+ ic_data_wdata_local,
+ scramble_key_valid_local,
+ irq_software_local,
+ irq_timer_local,
+ irq_external_local,
+ irq_fast_local,
+ irq_nm_local,
+ irq_pending_local,
+ debug_req_local,
+ crash_dump_local,
+ double_fault_seen_local,
+ fetch_enable_local,
+ icache_inval_local,
+ core_busy_local,
+ cheri_pmode_local,
+ cheri_tsafe_en_local,
+ rf_wcap_vec_local,
+ rf_rcap_a_vec_local,
+ rf_rcap_b_vec_local,
+ rf_reg_rdy_local,
+ rf_trsv_en_local,
+ rf_trsv_addr_local,
+ rf_trsv_par_local,
+ rf_trvk_addr_local,
+ rf_trvk_en_local,
+ rf_trvk_clrtag_local,
+ rf_trvk_par_local,
+ tsmap_cs_local,
+ tsmap_addr_local,
+ tsmap_rdata_local,
+ tsmap_rdata_intg_local,
+ mmreg_corein_local,
+ mmreg_coreout_local
+ } = buf_out;
+
+ assign rf_wcap_vec = reg2vec(rf_wcap);
+ assign rf_rcap_a_vec = reg2vec(rf_rcap_a);
+ assign rf_rcap_b_vec = reg2vec(rf_rcap_b);
+ assign rf_wcap_local = vec2reg(rf_wcap_vec_local);
+ assign rf_rcap_a_local = vec2reg(rf_rcap_a_vec_local);
+ assign rf_rcap_b_local = vec2reg(rf_rcap_b_vec_local);
+
+ // Manually buffer all input signals.
+ prim_buf #(.Width(NumBufferBits)) u_signals_prim_buf (
+ .in_i(buf_in),
+ .out_o(buf_out)
+ );
+
+ logic [TagSizeECC-1:0] ic_tag_rdata_local [IC_NUM_WAYS];
+ logic [LineSizeECC-1:0] ic_data_rdata_local [IC_NUM_WAYS];
+ for (genvar k = 0; k < IC_NUM_WAYS; k++) begin : gen_ways
+ prim_buf #(.Width(TagSizeECC)) u_tag_prim_buf (
+ .in_i(ic_tag_rdata[k]),
+ .out_o(ic_tag_rdata_local[k])
+ );
+ prim_buf #(.Width(LineSizeECC)) u_data_prim_buf (
+ .in_i(ic_data_rdata[k]),
+ .out_o(ic_data_rdata_local[k])
+ );
+ end
+
+ logic lockstep_alert_minor_local, lockstep_alert_major_internal_local;
+ logic lockstep_alert_major_bus_local;
+
+ cheriot_lockstep #(
+ .PMPEnable (PMPEnable),
+ .PMPGranularity (PMPGranularity),
+ .PMPNumRegions (PMPNumRegions),
+ .MHPMCounterNum (MHPMCounterNum),
+ .MHPMCounterWidth (MHPMCounterWidth),
+ .RV32E (RV32E),
+ .RV32M (RV32M),
+ .RV32B (RV32B),
+ .BranchTargetALU (BranchTargetALU),
+ .ICache (ICache),
+ .ICacheECC (ICacheECC),
+ .BusSizeECC (BusSizeECC),
+ .TagSizeECC (TagSizeECC),
+ .LineSizeECC (LineSizeECC),
+ .BranchPredictor (BranchPredictor),
+ .DbgTriggerEn (DbgTriggerEn),
+ .DbgHwBreakNum (DbgHwBreakNum),
+ .WritebackStage (WritebackStage),
+ .ResetAll (ResetAll),
+ .RndCnstLfsrSeed (RndCnstLfsrSeed),
+ .RndCnstLfsrPerm (RndCnstLfsrPerm),
+ .SecureIbex (SecureIbex),
+ .DummyInstructions(DummyInstructions),
+ .RegFileECC (RegFileECC),
+ .RegFileDataWidth (RegFileDataWidth),
+ .DmHaltAddr (DmHaltAddr),
+ .DmExceptionAddr (DmExceptionAddr),
+ .CHERIoTEn (CHERIoTEn),
+ .DataWidth (DataWidth),
+ .HeapBase (HeapBase ),
+ .TSMapBase (TSMapBase ),
+ .TSMapSize (TSMapSize),
+ .MemCapFmt (MemCapFmt ),
+ .CheriPPLBC (CheriPPLBC),
+ .CheriSBND2 (CheriSBND2),
+ .CheriTBRE (CheriTBRE)
+ ) u_cheriot_lockstep (
+ .clk_i (clk),
+ .rst_ni (rst_ni), // should use a different reset tree
+
+ .hart_id_i (hart_id_local),
+ .boot_addr_i (boot_addr_local),
+ .cheri_pmode_i (cheri_pmode_local),
+ .cheri_tsafe_en_i (cheri_tsafe_en_local),
+
+ .instr_req_i (instr_req_local),
+ .instr_gnt_i (instr_gnt_local),
+ .instr_rvalid_i (instr_rvalid_local),
+ .instr_addr_i (instr_addr_local),
+ .instr_rdata_i (instr_rdata_local),
+ .instr_rdata_intg_i (instr_rdata_intg_local),
+ .instr_err_i (instr_err_local),
+
+ .data_req_i (data_req_local),
+ .data_gnt_i (data_gnt_local),
+ .data_rvalid_i (data_rvalid_local),
+ .data_we_i (data_we_local),
+ .data_be_i (data_be_local),
+ .data_addr_i (data_addr_local),
+ .data_wdata_i (data_wdata_local),
+ .data_is_cap_i (data_is_cap_local),
+ .data_wdata_intg_o (data_wdata_intg_local),
+ .data_rdata_i (data_rdata_local),
+ .data_rdata_intg_i (data_rdata_intg_local),
+ .data_err_i (data_err_local),
+
+ .dummy_instr_id_i (dummy_instr_id_local),
+ .rf_raddr_a_i (rf_raddr_a_local),
+ .rf_raddr_b_i (rf_raddr_b_local),
+ .rf_waddr_wb_i (rf_waddr_wb_local),
+ .rf_we_wb_i (rf_we_wb_local),
+ .rf_wdata_wb_ecc_i (rf_wdata_wb_ecc_local),
+ .rf_rdata_a_ecc_i (rf_rdata_a_ecc_local),
+ .rf_rdata_b_ecc_i (rf_rdata_b_ecc_local),
+ .rf_wcap_wb_i (rf_wcap_local ),
+ .rf_rcap_a_i (rf_rcap_a_local ),
+ .rf_rcap_b_i (rf_rcap_b_local ),
+ .rf_reg_rdy_i (rf_reg_rdy_local ),
+ .rf_trsv_en_i (rf_trsv_en_local ),
+ .rf_trsv_addr_i (rf_trsv_addr_local ),
+ .rf_trsv_par_i (rf_trsv_par_local ),
+ .rf_trvk_addr_i (rf_trvk_addr_local ),
+ .rf_trvk_en_i (rf_trvk_en_local ),
+ .rf_trvk_clrtag_i (rf_trvk_clrtag_local ),
+ .rf_trvk_par_i (rf_trvk_par_local ),
+ .tsmap_cs_i (tsmap_cs_local ),
+ .tsmap_addr_i (tsmap_addr_local ),
+ .tsmap_rdata_i (tsmap_rdata_local ),
+ .tsmap_rdata_intg_i (tsmap_rdata_intg_local),
+ .mmreg_corein_i (mmreg_corein_local ),
+ .mmreg_coreout_i (mmreg_coreout_local ),
+
+ .ic_tag_req_i (ic_tag_req_local),
+ .ic_tag_write_i (ic_tag_write_local),
+ .ic_tag_addr_i (ic_tag_addr_local),
+ .ic_tag_wdata_i (ic_tag_wdata_local),
+ .ic_tag_rdata_i (ic_tag_rdata_local),
+ .ic_data_req_i (ic_data_req_local),
+ .ic_data_write_i (ic_data_write_local),
+ .ic_data_addr_i (ic_data_addr_local),
+ .ic_data_wdata_i (ic_data_wdata_local),
+ .ic_data_rdata_i (ic_data_rdata_local),
+ .ic_scr_key_valid_i (scramble_key_valid_local),
+
+ .irq_software_i (irq_software_local),
+ .irq_timer_i (irq_timer_local),
+ .irq_external_i (irq_external_local),
+ .irq_fast_i (irq_fast_local),
+ .irq_nm_i (irq_nm_local),
+ .irq_pending_i (irq_pending_local),
+
+ .debug_req_i (debug_req_local),
+ .crash_dump_i (crash_dump_local),
+ .double_fault_seen_i (double_fault_seen_local),
+
+ .fetch_enable_i (fetch_enable_local),
+ .alert_minor_o (lockstep_alert_minor_local),
+ .alert_major_internal_o (lockstep_alert_major_internal_local),
+ .alert_major_bus_o (lockstep_alert_major_bus_local),
+ .icache_inval_i (icache_inval_local),
+ .core_busy_i (core_busy_local),
+ .test_en_i (test_en_i),
+ .scan_rst_ni (scan_rst_ni)
+ );
+
+ // Manually buffer the output signals.
+ prim_buf #(.Width (7)) u_prim_buf_wdata_intg (
+ .in_i(data_wdata_intg_local),
+ .out_o(data_wdata_intg_o)
+ );
+
+ prim_buf u_prim_buf_alert_minor (
+ .in_i (lockstep_alert_minor_local),
+ .out_o(lockstep_alert_minor)
+ );
+
+ prim_buf u_prim_buf_alert_major_internal (
+ .in_i (lockstep_alert_major_internal_local),
+ .out_o(lockstep_alert_major_internal)
+ );
+
+ prim_buf u_prim_buf_alert_major_bus (
+ .in_i (lockstep_alert_major_bus_local),
+ .out_o(lockstep_alert_major_bus)
+ );
+
+ end else begin : gen_no_lockstep
+ assign lockstep_alert_major_internal = 1'b0;
+ assign lockstep_alert_major_bus = 1'b0;
+ assign lockstep_alert_minor = 1'b0;
+ assign data_wdata_intg_o = 'b0;
+ logic unused_scan, unused_intg;
+ assign unused_scan = scan_rst_ni;
+ assign unused_intg = |{instr_rdata_intg_i, data_rdata_intg_i};
+ end
+
+ assign alert_major_internal_o = core_alert_major | lockstep_alert_major_internal | rf_alert;
+ assign alert_major_bus_o = lockstep_alert_major_bus;
+ assign alert_minor_o = core_alert_minor | lockstep_alert_minor;
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_top_tracing.sv b/hw/ip/cheriot-ibex/rtl/cheriot_top_tracing.sv
new file mode 100644
index 0000000..aa74060
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_top_tracing.sv
@@ -0,0 +1,347 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Top level module of the ibex RISC-V core with tracing enabled
+ */
+
+module cheriot_top_tracing import cheriot_pkg::*; import cheri_pkg::*; #(
+ parameter bit PMPEnable = 1'b0,
+ parameter int unsigned PMPGranularity = 0,
+ parameter int unsigned PMPNumRegions = 4,
+ parameter int unsigned MHPMCounterNum = 0,
+ parameter int unsigned MHPMCounterWidth = 40,
+ parameter bit RV32E = 1'b0,
+ parameter rv32m_e RV32M = RV32MFast,
+ parameter rv32b_e RV32B = RV32BNone,
+ parameter regfile_e RegFile = RegFileFF,
+ parameter bit BranchTargetALU = 1'b1,
+ parameter bit WritebackStage = 1'b1,
+ parameter bit ICache = 1'b0,
+ parameter bit ICacheECC = 1'b0,
+ parameter bit BranchPredictor = 1'b0,
+ parameter bit DbgTriggerEn = 1'b0,
+ parameter int unsigned DbgHwBreakNum = 1,
+ parameter bit SecureIbex = 1'b0,
+ parameter bit ICacheScramble = 1'b0,
+ parameter lfsr_seed_t RndCnstLfsrSeed = RndCnstLfsrSeedDefault,
+ parameter lfsr_perm_t RndCnstLfsrPerm = RndCnstLfsrPermDefault,
+ parameter bit HWTraceEn = 1'b0,
+ parameter int unsigned DmHaltAddr = 32'h1A110800,
+ parameter int unsigned DmExceptionAddr = 32'h1A110808,
+ parameter bit CHERIoTEn = 1'b1,
+ parameter int unsigned DataWidth = 33,
+ parameter int unsigned HeapBase = 32'h2001_0000,
+ parameter int unsigned TSMapBase = 32'h2004_0000, // 4kB default
+ parameter int unsigned TSMapSize = 1024,
+ parameter bit MemCapFmt = 1'b0,
+ parameter bit CheriPPLBC = 1'b1,
+ parameter bit CheriSBND2 = 1'b0,
+ parameter bit CheriTBRE = 1'b1,
+ parameter bit CheriStkZ = 1'b1,
+ parameter int unsigned MMRegDinW = 128,
+ parameter int unsigned MMRegDoutW = 64
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic test_en_i, // enable all clock gates for testing
+ input logic scan_rst_ni,
+ input prim_ram_1p_pkg::ram_1p_cfg_t ram_cfg_i,
+
+ input logic cheri_pmode_i,
+ input logic cheri_tsafe_en_i,
+ input logic [31:0] hart_id_i,
+ input logic [31:0] boot_addr_i,
+
+ // Instruction memory interface
+ output logic instr_req_o,
+ input logic instr_gnt_i,
+ input logic instr_rvalid_i,
+ output logic [31:0] instr_addr_o,
+ input logic [31:0] instr_rdata_i,
+ input logic [6:0] instr_rdata_intg_i,
+ input logic instr_err_i,
+
+ // Data memory interface
+ output logic data_req_o,
+ output logic data_is_cap_o,
+ input logic data_gnt_i,
+ input logic data_rvalid_i,
+ output logic data_we_o,
+ output logic [3:0] data_be_o,
+ output logic [31:0] data_addr_o,
+ output logic [DataWidth-1:0] data_wdata_o,
+ output logic [6:0] data_wdata_intg_o,
+ input logic [DataWidth-1:0] data_rdata_i,
+ input logic [6:0] data_rdata_intg_i,
+ input logic data_err_i,
+
+ // TS map memory interface
+ output logic tsmap_cs_o,
+ output logic [15:0] tsmap_addr_o,
+ input logic [31:0] tsmap_rdata_i,
+ input logic [6:0] tsmap_rdata_intg_i,
+ input logic [MMRegDinW-1:0] mmreg_corein_i,
+ output logic [MMRegDoutW-1:0] mmreg_coreout_o,
+
+ // Interrupt inputs
+ input logic irq_software_i,
+ input logic irq_timer_i,
+ input logic irq_external_i,
+ input logic [14:0] irq_fast_i,
+ input logic irq_nm_i, // non-maskeable interrupt
+
+ // Scrambling Interface
+ input logic scramble_key_valid_i,
+ input logic [SCRAMBLE_KEY_W-1:0] scramble_key_i,
+ input logic [SCRAMBLE_NONCE_W-1:0] scramble_nonce_i,
+ output logic scramble_req_o,
+
+ // Debug Interface
+ input logic debug_req_i,
+ output crash_dump_t crash_dump_o,
+ output logic double_fault_seen_o,
+
+ // CPU Control Signals
+ input fetch_enable_t fetch_enable_i,
+ output logic core_sleep_o
+);
+
+ // cheriot_tracer relies on the signals from the RISC-V Formal Interface
+ `ifndef RVFI
+ $fatal("Fatal error: RVFI needs to be defined globally.");
+ `endif
+
+ logic rvfi_valid;
+ logic [63:0] rvfi_order;
+ logic [31:0] rvfi_insn;
+ logic rvfi_trap;
+ logic rvfi_halt;
+ logic rvfi_intr;
+ logic [ 1:0] rvfi_mode;
+ logic [ 1:0] rvfi_ixl;
+ logic [ 4:0] rvfi_rs1_addr;
+ logic [ 4:0] rvfi_rs2_addr;
+ logic [ 4:0] rvfi_rs3_addr;
+ logic [31:0] rvfi_rs1_rdata;
+ reg_cap_t rvfi_rs1_rcap;
+ reg_cap_t rvfi_rs2_rcap;
+ logic [31:0] rvfi_rs2_rdata;
+ logic [31:0] rvfi_rs3_rdata;
+ logic [ 4:0] rvfi_rd_addr;
+ logic [31:0] rvfi_rd_wdata;
+ reg_cap_t rvfi_rd_wcap;
+ logic [31:0] rvfi_pc_rdata;
+ logic [31:0] rvfi_pc_wdata;
+ logic [31:0] rvfi_mem_addr;
+ logic [ 3:0] rvfi_mem_rmask;
+ logic [ 3:0] rvfi_mem_wmask;
+ logic [DataWidth-1:0] rvfi_mem_rdata;
+ logic [DataWidth-1:0] rvfi_mem_wdata;
+ logic rvfi_mem_is_cap;
+ reg_cap_t rvfi_mem_rcap;
+ reg_cap_t rvfi_mem_wcap;
+ logic [31:0] rvfi_mem2_addr;
+ logic rvfi_mem2_we;
+ logic [65:0] rvfi_mem2_rdata;
+ logic [65:0] rvfi_mem2_wdata;
+ logic [31:0] rvfi_ext_mip;
+ logic rvfi_ext_nmi;
+ logic rvfi_ext_debug_req;
+ logic [63:0] rvfi_ext_mcycle;
+
+ logic [31:0] unused_rvfi_ext_mip;
+ logic unused_rvfi_ext_nmi;
+ logic unused_rvfi_ext_debug_req;
+ logic [63:0] unused_rvfi_ext_mcycle;
+
+
+ // Tracer doesn't use these signals, though other modules may probe down into tracer to observe
+ // them.
+ assign unused_rvfi_ext_mip = rvfi_ext_mip;
+ assign unused_rvfi_ext_nmi = rvfi_ext_nmi;
+ assign unused_rvfi_ext_debug_req = rvfi_ext_debug_req;
+ assign unused_rvfi_ext_mcycle = rvfi_ext_mcycle;
+
+ cheriot_top #(
+ .PMPEnable ( PMPEnable ),
+ .PMPGranularity ( PMPGranularity ),
+ .PMPNumRegions ( PMPNumRegions ),
+ .MHPMCounterNum ( MHPMCounterNum ),
+ .MHPMCounterWidth ( MHPMCounterWidth ),
+ .RV32E ( RV32E ),
+ .RV32M ( RV32M ),
+ .RV32B ( RV32B ),
+ .RegFile ( RegFile ),
+ .BranchTargetALU ( BranchTargetALU ),
+ .ICache ( ICache ),
+ .ICacheECC ( ICacheECC ),
+ .BranchPredictor ( BranchPredictor ),
+ .DbgTriggerEn ( DbgTriggerEn ),
+ .DbgHwBreakNum ( DbgHwBreakNum ),
+ .WritebackStage ( WritebackStage ),
+ .SecureIbex ( SecureIbex ),
+ .ICacheScramble ( ICacheScramble ),
+ .RndCnstLfsrSeed ( RndCnstLfsrSeed ),
+ .RndCnstLfsrPerm ( RndCnstLfsrPerm ),
+ .DmHaltAddr (DmHaltAddr ),
+ .DmExceptionAddr (DmExceptionAddr ),
+ .CHERIoTEn (CHERIoTEn),
+ .DataWidth (DataWidth),
+ .HeapBase (HeapBase ),
+ .TSMapBase (TSMapBase ),
+ .TSMapSize (TSMapSize),
+ .MemCapFmt (MemCapFmt ),
+ .CheriPPLBC (CheriPPLBC),
+ .CheriSBND2 (CheriSBND2),
+ .CheriTBRE (CheriTBRE),
+ .CheriStkZ (CheriStkZ)
+ ) u_cheriot_top (
+ .clk_i,
+ .rst_ni,
+
+ .test_en_i,
+ .scan_rst_ni,
+ .ram_cfg_i,
+
+ .cheri_pmode_i,
+ .cheri_tsafe_en_i,
+ .hart_id_i,
+ .boot_addr_i,
+
+ .instr_req_o,
+ .instr_gnt_i,
+ .instr_rvalid_i,
+ .instr_addr_o,
+ .instr_rdata_i,
+ .instr_rdata_intg_i,
+ .instr_err_i,
+
+ .data_req_o,
+ .data_is_cap_o,
+ .data_gnt_i,
+ .data_rvalid_i,
+ .data_we_o,
+ .data_be_o,
+ .data_addr_o,
+ .data_wdata_o,
+ .data_wdata_intg_o,
+ .data_rdata_i,
+ .data_rdata_intg_i,
+ .data_err_i,
+
+ .tsmap_cs_o,
+ .tsmap_addr_o,
+ .tsmap_rdata_i,
+ .tsmap_rdata_intg_i,
+ .mmreg_corein_i,
+ .mmreg_coreout_o,
+
+ .irq_software_i,
+ .irq_timer_i,
+ .irq_external_i,
+ .irq_fast_i,
+ .irq_nm_i,
+
+ .scramble_key_valid_i,
+ .scramble_key_i,
+ .scramble_nonce_i,
+ .scramble_req_o,
+
+ .debug_req_i,
+ .crash_dump_o,
+ .double_fault_seen_o,
+
+`ifdef RVFI
+ .rvfi_valid,
+ .rvfi_order,
+ .rvfi_insn,
+ .rvfi_trap,
+ .rvfi_halt,
+ .rvfi_intr,
+ .rvfi_mode,
+ .rvfi_ixl,
+ .rvfi_rs1_addr,
+ .rvfi_rs2_addr,
+ .rvfi_rs3_addr,
+ .rvfi_rs1_rdata,
+ .rvfi_rs1_rcap,
+ .rvfi_rs2_rdata,
+ .rvfi_rs2_rcap,
+ .rvfi_rs3_rdata,
+ .rvfi_rd_addr,
+ .rvfi_rd_wdata,
+ .rvfi_rd_wcap,
+ .rvfi_pc_rdata,
+ .rvfi_pc_wdata,
+ .rvfi_mem_addr,
+ .rvfi_mem_rmask,
+ .rvfi_mem_wmask,
+ .rvfi_mem_rdata,
+ .rvfi_mem_wdata,
+ .rvfi_mem_rcap,
+ .rvfi_mem_wcap,
+ .rvfi_mem_is_cap,
+ .rvfi_ext_mip,
+ .rvfi_ext_nmi,
+ .rvfi_ext_debug_req,
+ .rvfi_ext_mcycle,
+`endif
+ .fetch_enable_i,
+ .core_sleep_o,
+ .alert_major_bus_o(),
+ .alert_major_internal_o(),
+ .alert_minor_o()
+ );
+
+`ifdef RVFI
+ cheriot_tracer #(
+ .DataWidth (DataWidth)
+ ) u_cheriot_tracer (
+ .clk_i,
+ .rst_ni,
+
+ .cheri_pmode_i,
+ .cheri_tsafe_en_i,
+ .hart_id_i,
+
+ .rvfi_valid,
+ .rvfi_order,
+ .rvfi_insn,
+ .rvfi_trap,
+ .rvfi_halt,
+ .rvfi_intr,
+ .rvfi_mode,
+ .rvfi_ixl,
+ .rvfi_rs1_addr,
+ .rvfi_rs2_addr,
+ .rvfi_rs3_addr,
+ .rvfi_rs1_rdata,
+ .rvfi_rs2_rdata,
+ .rvfi_rs3_rdata,
+ .rvfi_rs1_rcap,
+ .rvfi_rs2_rcap,
+ .rvfi_rd_wcap,
+ .rvfi_rd_addr,
+ .rvfi_rd_wdata,
+ .rvfi_pc_rdata,
+ .rvfi_pc_wdata,
+ .rvfi_mem_addr,
+ .rvfi_mem_rmask,
+ .rvfi_mem_wmask,
+ .rvfi_mem_rdata,
+ .rvfi_mem_wdata,
+ .rvfi_mem_rcap,
+ .rvfi_mem_wcap,
+ .rvfi_mem_is_cap
+ );
+`endif
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_tracer.sv b/hw/ip/cheriot-ibex/rtl/cheriot_tracer.sv
new file mode 100644
index 0000000..2f08ba9
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_tracer.sv
@@ -0,0 +1,1410 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Trace executed instructions in simulation
+ *
+ * This tracer takes execution information from the RISC-V Verification Interface (RVFI) and
+ * produces a text file with a human-readable trace.
+ *
+ * All traced instructions are written to a log file. By default, the log file is named
+ * trace_core_<HARTID>.log, with <HARTID> being the 8 digit hart ID of the core being traced.
+ *
+ * The file name base, defaulting to "trace_core" can be set using the "cheriot_tracer_file_base"
+ * plusarg passed to the simulation, e.g. "+cheriot_tracer_file_base=ibex_my_trace". The exact syntax
+ * of passing plusargs to a simulation depends on the simulator.
+ *
+ * The creation of the instruction trace is enabled by default but can be disabled for a simulation.
+ * This behaviour is controlled by the plusarg "cheriot_tracer_enable". Use "cheriot_tracer_enable=0" to
+ * disable the tracer.
+ *
+ * The trace contains six columns, separated by tabs:
+ * - The simulation time
+ * - The clock cycle count since reset
+ * - The program counter (PC)
+ * - The instruction
+ * - The decoded instruction in the same format as objdump, together with the accessed registers and
+ * read/written memory values. Jumps and branches show the target address.
+ * This column may be omitted if the instruction does not decode into a long form.
+ * - Accessed registers and memory locations.
+ *
+ * Significant effort is spent to make the decoding produced by this tracer as similar as possible
+ * to the one produced by objdump. This simplifies the correlation between the static program
+ * information from the objdump-generated disassembly, and the runtime information from this tracer.
+ */
+
+module cheriot_tracer import cheri_pkg::*; # (
+ parameter int unsigned DataWidth = 32,
+ parameter bit CheriCapIT8 = 1'b0
+) (
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic cheri_pmode_i,
+ input logic cheri_tsafe_en_i,
+ input logic [31:0] hart_id_i,
+
+ // RVFI as described at https://github.com/SymbioticEDA/riscv-formal/blob/master/docs/rvfi.md
+ // The standard interface does not have _i/_o suffixes. For consistency with the standard the
+ // signals in this module don't have the suffixes either.
+ input logic rvfi_valid,
+ input logic [63:0] rvfi_order,
+ input logic [31:0] rvfi_insn,
+ input logic rvfi_trap,
+ input logic rvfi_halt,
+ input logic rvfi_intr,
+ input logic [ 1:0] rvfi_mode,
+ input logic [ 1:0] rvfi_ixl,
+ input logic [ 4:0] rvfi_rs1_addr,
+ input logic [ 4:0] rvfi_rs2_addr,
+ input logic [ 4:0] rvfi_rs3_addr,
+ input logic [31:0] rvfi_rs1_rdata,
+ input reg_cap_t rvfi_rs1_rcap,
+ input logic [31:0] rvfi_rs2_rdata,
+ input reg_cap_t rvfi_rs2_rcap,
+ input logic [31:0] rvfi_rs3_rdata,
+ input logic [ 4:0] rvfi_rd_addr,
+ input logic [31:0] rvfi_rd_wdata,
+ input reg_cap_t rvfi_rd_wcap,
+ input logic [31:0] rvfi_pc_rdata,
+ input logic [31:0] rvfi_pc_wdata,
+ input logic [31:0] rvfi_mem_addr,
+ input logic [ 3:0] rvfi_mem_rmask,
+ input logic [ 3:0] rvfi_mem_wmask,
+ input logic [DataWidth-1:0] rvfi_mem_rdata,
+ input logic [DataWidth-1:0] rvfi_mem_wdata,
+ input logic rvfi_mem_is_cap,
+ input reg_cap_t rvfi_mem_rcap,
+ input reg_cap_t rvfi_mem_wcap
+);
+
+// synthesis translate_off
+
+ // These signals are part of RVFI, but not used in this module currently.
+ // Keep them as part of the interface to change the tracer more easily in the future. Assigning
+ // these signals to unused_* signals marks them explicitly as unused, an annotation picked up by
+ // linters, including Verilator lint.
+ logic [63:0] unused_rvfi_order = rvfi_order;
+ logic unused_rvfi_trap = rvfi_trap;
+ logic unused_rvfi_halt = rvfi_halt;
+ logic unused_rvfi_intr = rvfi_intr;
+ logic [ 1:0] unused_rvfi_mode = rvfi_mode;
+ logic [ 1:0] unused_rvfi_ixl = rvfi_ixl;
+
+ import cheriot_tracer_pkg::*;
+
+ int file_handle;
+ string file_name;
+
+ int unsigned cycle;
+ string decoded_str;
+ logic insn_is_compressed;
+ logic rvfi_mem_wdata_bit32;
+
+ // Data items accessed during this instruction
+ localparam logic [9:0] RS1 = (1 << 0);
+ localparam logic [9:0] RS2 = (1 << 1);
+ localparam logic [9:0] RS3 = (1 << 2);
+ localparam logic [9:0] RD = (1 << 3);
+ localparam logic [9:0] MEM = (1 << 4);
+ localparam logic [9:0] CS1 = (1 << 5);
+ localparam logic [9:0] CS2 = (1 << 6);
+ localparam logic [9:0] CD = (1 << 7);
+ localparam logic [9:0] MEMC = (1 << 8);
+ localparam logic [9:0] MEM2 = (1 << 9);
+ logic [9:0] data_accessed;
+
+ logic trace_log_enable;
+ initial begin
+ if ($value$plusargs("cheriot_tracer_enable=%b", trace_log_enable)) begin
+ if (trace_log_enable == 1'b0) begin
+ $display("%m: Instruction trace disabled.");
+ end
+ end else begin
+ trace_log_enable = 1'b1;
+ end
+ end
+
+ function automatic void printbuffer_dumpline();
+ string rvfi_insn_str;
+ string disp_str;
+ logic [32:0] tmp33;
+
+ if (file_handle == 32'h0) begin
+ string file_name_base = "trace_core";
+ void'($value$plusargs("cheriot_tracer_file_base=%s", file_name_base));
+ $sformat(file_name, "%s_%h.log", file_name_base, hart_id_i);
+
+ $display("%m: Writing execution trace to %s", file_name);
+ file_handle = $fopen(file_name, "w");
+ $fwrite(file_handle,
+ "Time\tCycle\tPC\tInsn\tDecoded instruction\tRegister and memory contents\n");
+ end
+
+ // Write compressed instructions as four hex digits (16 bit word), and
+ // uncompressed ones as 8 hex digits (32 bit words).
+ if (insn_is_compressed) begin
+ rvfi_insn_str = $sformatf(" %4h", rvfi_insn[15:0]);
+ end else begin
+ rvfi_insn_str = $sformatf("%8h", rvfi_insn);
+ end
+
+ if (rvfi_trap) disp_str = $sformatf("-->%s", decoded_str);
+ else if (rvfi_intr) disp_str = $sformatf("==>%s", decoded_str);
+ else disp_str = decoded_str;
+
+ $fwrite(file_handle, "%15t\t%d\t%h\t%s\t%s\t",
+ $time, cycle, rvfi_pc_rdata, rvfi_insn_str, disp_str);
+
+ if ((data_accessed & RS1) != 0) begin
+ $fwrite(file_handle, " %s:0x%08x", reg_addr_to_str(rvfi_rs1_addr), rvfi_rs1_rdata);
+ end
+ if ((data_accessed & CS1) != 0) begin
+ tmp33 = CheriCapIT8 ? reg2memcap_it8_fmt0(rvfi_rs1_rcap) : reg2memcap_fmt0(rvfi_rs1_rcap);
+ $fwrite(file_handle, " %s:0x%08x+0x%09x", reg_addr_to_str(rvfi_rs1_addr), rvfi_rs1_rdata, tmp33);
+ end
+ if ((data_accessed & RS2) != 0) begin
+ $fwrite(file_handle, " %s:0x%08x", reg_addr_to_str(rvfi_rs2_addr), rvfi_rs2_rdata);
+ end
+ if ((data_accessed & CS2) != 0) begin
+ tmp33 = CheriCapIT8 ? reg2memcap_it8_fmt0(rvfi_rs2_rcap) : reg2memcap_fmt0(rvfi_rs2_rcap);
+ $fwrite(file_handle, " %s:0x%08x+0x%09x", reg_addr_to_str(rvfi_rs2_addr), rvfi_rs2_rdata, tmp33);
+ end
+ if ((data_accessed & RS3) != 0) begin
+ $fwrite(file_handle, " %s:0x%08x", reg_addr_to_str(rvfi_rs3_addr), rvfi_rs3_rdata);
+ end
+ if ((data_accessed & RD) != 0) begin
+ $fwrite(file_handle, " %s=0x%08x", reg_addr_to_str(rvfi_rd_addr), rvfi_rd_wdata);
+ end
+
+ if ((data_accessed & CD) != 0) begin
+ tmp33 = CheriCapIT8 ? reg2memcap_it8_fmt0(rvfi_rd_wcap) : reg2memcap_fmt0(rvfi_rd_wcap);
+ $fwrite(file_handle, " %s=0x%08x+0x%09x", reg_addr_to_str(rvfi_rd_addr), rvfi_rd_wdata, tmp33);
+ end
+
+ if ((data_accessed & MEM) != 0) begin
+ $fwrite(file_handle, " PA:0x%08x", rvfi_mem_addr);
+
+ if (rvfi_mem_wmask == 4'b0001)
+ $fwrite(file_handle, " store:0x%1b??????%02x", rvfi_mem_wdata_bit32, rvfi_mem_wdata[7:0]);
+ else if (rvfi_mem_wmask == 4'b0011)
+ $fwrite(file_handle, " store:0x%1b????%04x", rvfi_mem_wdata_bit32, rvfi_mem_wdata[15:0]);
+ else if (rvfi_mem_wmask != 4'b0000)
+ $fwrite(file_handle, " store:0x%09x", rvfi_mem_wdata);
+
+ if (rvfi_mem_rmask != 4'b0000)
+ $fwrite(file_handle, " load:0x%08x", rvfi_mem_rdata);
+ end
+
+ if ((data_accessed & MEMC) != 0) begin
+ $fwrite(file_handle, " PA:0x%08x", rvfi_mem_addr);
+
+ if (rvfi_mem_wmask != 0) begin
+ tmp33 = CheriCapIT8 ? reg2memcap_it8_fmt0(rvfi_mem_wcap) : reg2memcap_fmt0(rvfi_mem_wcap);
+ $fwrite(file_handle, " store:0x%09x+0x%09x", rvfi_mem_wdata, tmp33);
+ end else begin
+ tmp33 = CheriCapIT8 ? reg2memcap_it8_fmt0(rvfi_mem_rcap) : reg2memcap_fmt0(rvfi_mem_rcap);
+ $fwrite(file_handle, " load:0x%09x+0x%09x", rvfi_mem_rdata, tmp33);
+ end
+ end
+
+ $fwrite(file_handle, "\n");
+ endfunction
+
+
+ // Format register address with "x" prefix, left-aligned to a fixed width of 3 characters.
+ function automatic string reg_addr_to_str(input logic [4:0] addr);
+ if (addr < 10) begin
+ return $sformatf(" x%0d", addr);
+ end else begin
+ return $sformatf("x%0d", addr);
+ end
+ endfunction
+
+ // Get a SCR name for a CHERI SCR address.
+ function automatic string get_scr_name(input logic [4:0] scr_addr);
+ unique case (scr_addr)
+ 5'd27: return "ztopc";
+ 5'd28: return "mtcc";
+ 5'd29: return "mtdc";
+ 5'd30: return "mscratchc";
+ 5'd31: return "mepcc";
+ default: return $sformatf("scr%d", scr_addr);
+ endcase
+ endfunction
+
+ // Get a CSR name for a CSR address.
+ function automatic string get_csr_name(input logic [11:0] csr_addr);
+ unique case (csr_addr)
+ 12'd0: return "ustatus";
+ 12'd4: return "uie";
+ 12'd5: return "utvec";
+ 12'd64: return "uscratch";
+ 12'd65: return "uepc";
+ 12'd66: return "ucause";
+ 12'd67: return "utval";
+ 12'd68: return "uip";
+ 12'd1: return "fflags";
+ 12'd2: return "frm";
+ 12'd3: return "fcsr";
+ 12'd3072: return "cycle";
+ 12'd3073: return "time";
+ 12'd3074: return "instret";
+ 12'd3075: return "hpmcounter3";
+ 12'd3076: return "hpmcounter4";
+ 12'd3077: return "hpmcounter5";
+ 12'd3078: return "hpmcounter6";
+ 12'd3079: return "hpmcounter7";
+ 12'd3080: return "hpmcounter8";
+ 12'd3081: return "hpmcounter9";
+ 12'd3082: return "hpmcounter10";
+ 12'd3083: return "hpmcounter11";
+ 12'd3084: return "hpmcounter12";
+ 12'd3085: return "hpmcounter13";
+ 12'd3086: return "hpmcounter14";
+ 12'd3087: return "hpmcounter15";
+ 12'd3088: return "hpmcounter16";
+ 12'd3089: return "hpmcounter17";
+ 12'd3090: return "hpmcounter18";
+ 12'd3091: return "hpmcounter19";
+ 12'd3092: return "hpmcounter20";
+ 12'd3093: return "hpmcounter21";
+ 12'd3094: return "hpmcounter22";
+ 12'd3095: return "hpmcounter23";
+ 12'd3096: return "hpmcounter24";
+ 12'd3097: return "hpmcounter25";
+ 12'd3098: return "hpmcounter26";
+ 12'd3099: return "hpmcounter27";
+ 12'd3100: return "hpmcounter28";
+ 12'd3101: return "hpmcounter29";
+ 12'd3102: return "hpmcounter30";
+ 12'd3103: return "hpmcounter31";
+ 12'd3200: return "cycleh";
+ 12'd3201: return "timeh";
+ 12'd3202: return "instreth";
+ 12'd3203: return "hpmcounter3h";
+ 12'd3204: return "hpmcounter4h";
+ 12'd3205: return "hpmcounter5h";
+ 12'd3206: return "hpmcounter6h";
+ 12'd3207: return "hpmcounter7h";
+ 12'd3208: return "hpmcounter8h";
+ 12'd3209: return "hpmcounter9h";
+ 12'd3210: return "hpmcounter10h";
+ 12'd3211: return "hpmcounter11h";
+ 12'd3212: return "hpmcounter12h";
+ 12'd3213: return "hpmcounter13h";
+ 12'd3214: return "hpmcounter14h";
+ 12'd3215: return "hpmcounter15h";
+ 12'd3216: return "hpmcounter16h";
+ 12'd3217: return "hpmcounter17h";
+ 12'd3218: return "hpmcounter18h";
+ 12'd3219: return "hpmcounter19h";
+ 12'd3220: return "hpmcounter20h";
+ 12'd3221: return "hpmcounter21h";
+ 12'd3222: return "hpmcounter22h";
+ 12'd3223: return "hpmcounter23h";
+ 12'd3224: return "hpmcounter24h";
+ 12'd3225: return "hpmcounter25h";
+ 12'd3226: return "hpmcounter26h";
+ 12'd3227: return "hpmcounter27h";
+ 12'd3228: return "hpmcounter28h";
+ 12'd3229: return "hpmcounter29h";
+ 12'd3230: return "hpmcounter30h";
+ 12'd3231: return "hpmcounter31h";
+ 12'd256: return "sstatus";
+ 12'd258: return "sedeleg";
+ 12'd259: return "sideleg";
+ 12'd260: return "sie";
+ 12'd261: return "stvec";
+ 12'd262: return "scounteren";
+ 12'd320: return "sscratch";
+ 12'd321: return "sepc";
+ 12'd322: return "scause";
+ 12'd323: return "stval";
+ 12'd324: return "sip";
+ 12'd384: return "satp";
+ 12'd3857: return "mvendorid";
+ 12'd3858: return "marchid";
+ 12'd3859: return "mimpid";
+ 12'd3860: return "mhartid";
+ 12'd768: return "mstatus";
+ 12'd769: return "misa";
+ 12'd770: return "medeleg";
+ 12'd771: return "mideleg";
+ 12'd772: return "mie";
+ 12'd773: return "mtvec";
+ 12'd774: return "mcounteren";
+ 12'd832: return "mscratch";
+ 12'd833: return "mepc";
+ 12'd834: return "mcause";
+ 12'd835: return "mtval";
+ 12'd836: return "mip";
+ 12'd928: return "pmpcfg0";
+ 12'd929: return "pmpcfg1";
+ 12'd930: return "pmpcfg2";
+ 12'd931: return "pmpcfg3";
+ 12'd944: return "pmpaddr0";
+ 12'd945: return "pmpaddr1";
+ 12'd946: return "pmpaddr2";
+ 12'd947: return "pmpaddr3";
+ 12'd948: return "pmpaddr4";
+ 12'd949: return "pmpaddr5";
+ 12'd950: return "pmpaddr6";
+ 12'd951: return "pmpaddr7";
+ 12'd952: return "pmpaddr8";
+ 12'd953: return "pmpaddr9";
+ 12'd954: return "pmpaddr10";
+ 12'd955: return "pmpaddr11";
+ 12'd956: return "pmpaddr12";
+ 12'd957: return "pmpaddr13";
+ 12'd958: return "pmpaddr14";
+ 12'd959: return "pmpaddr15";
+ 12'd2816: return "mcycle";
+ 12'd2818: return "minstret";
+ 12'd2819: return "mhpmcounter3";
+ 12'd2820: return "mhpmcounter4";
+ 12'd2821: return "mhpmcounter5";
+ 12'd2822: return "mhpmcounter6";
+ 12'd2823: return "mhpmcounter7";
+ 12'd2824: return "mhpmcounter8";
+ 12'd2825: return "mhpmcounter9";
+ 12'd2826: return "mhpmcounter10";
+ 12'd2827: return "mhpmcounter11";
+ 12'd2828: return "mhpmcounter12";
+ 12'd2829: return "mhpmcounter13";
+ 12'd2830: return "mhpmcounter14";
+ 12'd2831: return "mhpmcounter15";
+ 12'd2832: return "mhpmcounter16";
+ 12'd2833: return "mhpmcounter17";
+ 12'd2834: return "mhpmcounter18";
+ 12'd2835: return "mhpmcounter19";
+ 12'd2836: return "mhpmcounter20";
+ 12'd2837: return "mhpmcounter21";
+ 12'd2838: return "mhpmcounter22";
+ 12'd2839: return "mhpmcounter23";
+ 12'd2840: return "mhpmcounter24";
+ 12'd2841: return "mhpmcounter25";
+ 12'd2842: return "mhpmcounter26";
+ 12'd2843: return "mhpmcounter27";
+ 12'd2844: return "mhpmcounter28";
+ 12'd2845: return "mhpmcounter29";
+ 12'd2846: return "mhpmcounter30";
+ 12'd2847: return "mhpmcounter31";
+ 12'd2944: return "mcycleh";
+ 12'd2946: return "minstreth";
+ 12'd2947: return "mhpmcounter3h";
+ 12'd2948: return "mhpmcounter4h";
+ 12'd2949: return "mhpmcounter5h";
+ 12'd2950: return "mhpmcounter6h";
+ 12'd2951: return "mhpmcounter7h";
+ 12'd2952: return "mhpmcounter8h";
+ 12'd2953: return "mhpmcounter9h";
+ 12'd2954: return "mhpmcounter10h";
+ 12'd2955: return "mhpmcounter11h";
+ 12'd2956: return "mhpmcounter12h";
+ 12'd2957: return "mhpmcounter13h";
+ 12'd2958: return "mhpmcounter14h";
+ 12'd2959: return "mhpmcounter15h";
+ 12'd2960: return "mhpmcounter16h";
+ 12'd2961: return "mhpmcounter17h";
+ 12'd2962: return "mhpmcounter18h";
+ 12'd2963: return "mhpmcounter19h";
+ 12'd2964: return "mhpmcounter20h";
+ 12'd2965: return "mhpmcounter21h";
+ 12'd2966: return "mhpmcounter22h";
+ 12'd2967: return "mhpmcounter23h";
+ 12'd2968: return "mhpmcounter24h";
+ 12'd2969: return "mhpmcounter25h";
+ 12'd2970: return "mhpmcounter26h";
+ 12'd2971: return "mhpmcounter27h";
+ 12'd2972: return "mhpmcounter28h";
+ 12'd2973: return "mhpmcounter29h";
+ 12'd2974: return "mhpmcounter30h";
+ 12'd2975: return "mhpmcounter31h";
+ 12'd803: return "mhpmevent3";
+ 12'd804: return "mhpmevent4";
+ 12'd805: return "mhpmevent5";
+ 12'd806: return "mhpmevent6";
+ 12'd807: return "mhpmevent7";
+ 12'd808: return "mhpmevent8";
+ 12'd809: return "mhpmevent9";
+ 12'd810: return "mhpmevent10";
+ 12'd811: return "mhpmevent11";
+ 12'd812: return "mhpmevent12";
+ 12'd813: return "mhpmevent13";
+ 12'd814: return "mhpmevent14";
+ 12'd815: return "mhpmevent15";
+ 12'd816: return "mhpmevent16";
+ 12'd817: return "mhpmevent17";
+ 12'd818: return "mhpmevent18";
+ 12'd819: return "mhpmevent19";
+ 12'd820: return "mhpmevent20";
+ 12'd821: return "mhpmevent21";
+ 12'd822: return "mhpmevent22";
+ 12'd823: return "mhpmevent23";
+ 12'd824: return "mhpmevent24";
+ 12'd825: return "mhpmevent25";
+ 12'd826: return "mhpmevent26";
+ 12'd827: return "mhpmevent27";
+ 12'd828: return "mhpmevent28";
+ 12'd829: return "mhpmevent29";
+ 12'd830: return "mhpmevent30";
+ 12'd831: return "mhpmevent31";
+ 12'd1952: return "tselect";
+ 12'd1953: return "tdata1";
+ 12'd1954: return "tdata2";
+ 12'd1955: return "tdata3";
+ 12'd1968: return "dcsr";
+ 12'd1969: return "dpc";
+ 12'd1970: return "dscratch";
+ 12'd512: return "hstatus";
+ 12'd514: return "hedeleg";
+ 12'd515: return "hideleg";
+ 12'd516: return "hie";
+ 12'd517: return "htvec";
+ 12'd576: return "hscratch";
+ 12'd577: return "hepc";
+ 12'd578: return "hcause";
+ 12'd579: return "hbadaddr";
+ 12'd580: return "hip";
+ 12'd896: return "mbase";
+ 12'd897: return "mbound";
+ 12'd898: return "mibase";
+ 12'd899: return "mibound";
+ 12'd900: return "mdbase";
+ 12'd901: return "mdbound";
+ 12'd800: return "mcountinhibit";
+ 12'd3009: return "mshwm";
+ 12'd3010: return "mshwmb";
+ 12'd3012: return "cdbgctrl";
+ default: return $sformatf("0x%x", csr_addr);
+ endcase
+ endfunction
+
+ function automatic void decode_mnemonic(input string mnemonic);
+ decoded_str = mnemonic;
+ endfunction
+
+ function automatic void decode_r_insn(input string mnemonic);
+ data_accessed = RS1 | RS2 | RD;
+ decoded_str = $sformatf("%s\tx%0d,x%0d,x%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr,
+ rvfi_rs2_addr);
+ endfunction
+
+ function automatic void decode_r1_insn(input string mnemonic);
+ data_accessed = RS1 | RD;
+ decoded_str = $sformatf("%s\tx%0d,x%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr);
+ endfunction
+
+ function automatic void decode_r_cmixcmov_insn(input string mnemonic);
+ data_accessed = RS1 | RS2 | RS3 | RD;
+ decoded_str = $sformatf("%s\tx%0d,x%0d,x%0d,x%0d", mnemonic, rvfi_rd_addr, rvfi_rs2_addr,
+ rvfi_rs1_addr, rvfi_rs3_addr);
+ endfunction
+
+ function automatic void decode_r_funnelshift_insn(input string mnemonic);
+ data_accessed = RS1 | RS2 | RS3 | RD;
+ decoded_str = $sformatf("%s\tx%0d,x%0d,x%0d,x%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr,
+ rvfi_rs3_addr, rvfi_rs2_addr);
+ endfunction
+
+ function automatic void decode_i_insn(input string mnemonic);
+ data_accessed = RS1 | RD;
+ decoded_str = $sformatf("%s\tx%0d,x%0d,%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr,
+ $signed({{20 {rvfi_insn[31]}}, rvfi_insn[31:20]}));
+ endfunction
+
+ function automatic void decode_i_shift_insn(input string mnemonic);
+ // SLLI, SRLI, SRAI, SROI, SLOI, RORI
+ logic [4:0] shamt;
+ shamt = {rvfi_insn[24:20]};
+ data_accessed = RS1 | RD;
+ decoded_str = $sformatf("%s\tx%0d,x%0d,0x%0x", mnemonic, rvfi_rd_addr, rvfi_rs1_addr, shamt);
+ endfunction
+
+ function automatic void decode_i_funnelshift_insn( input string mnemonic);
+ // fsri
+ logic [5:0] shamt;
+ shamt = {rvfi_insn[25:20]};
+ data_accessed = RS1 | RS3 | RD;
+ decoded_str = $sformatf("%s\tx%0d,x%0d,x%0d,0x%0x", mnemonic, rvfi_rd_addr, rvfi_rs1_addr,
+ rvfi_rs3_addr, shamt);
+ endfunction
+
+ function automatic void decode_i_jalr_insn(input string mnemonic);
+ // JALR
+ if (cheri_pmode_i) begin
+ data_accessed = CS1 | CD;
+ // CH.cjalr
+ decoded_str = $sformatf("CH.c%s\tc%0d,%0d(c%0d)", mnemonic, rvfi_rd_addr,
+ $signed({{20 {rvfi_insn[31]}}, rvfi_insn[31:20]}), rvfi_rs1_addr);
+ end else begin
+ // jalr
+ data_accessed = RS1 | RD;
+ decoded_str = $sformatf("%s\tx%0d,%0d(x%0d)", mnemonic, rvfi_rd_addr,
+ $signed({{20 {rvfi_insn[31]}}, rvfi_insn[31:20]}), rvfi_rs1_addr);
+ end
+ endfunction
+
+ function automatic void decode_u_insn(input string mnemonic);
+ data_accessed = RD;
+ decoded_str = $sformatf("%s\tx%0d,0x%0x", mnemonic, rvfi_rd_addr, {rvfi_insn[31:12]});
+ endfunction
+
+ function automatic void decode_j_insn(input string mnemonic);
+ // JAL
+ if (cheri_pmode_i) begin
+ data_accessed = CD;
+ decoded_str = $sformatf("%s\tc%0d,%0x", "CH.cjal", rvfi_rd_addr, rvfi_pc_wdata);
+ end else begin
+ data_accessed = RD;
+ decoded_str = $sformatf("%s\tx%0d,%0x", mnemonic, rvfi_rd_addr, rvfi_pc_wdata);
+ end
+ endfunction
+
+ function automatic void decode_b_insn(input string mnemonic);
+ logic [31:0] branch_target;
+ logic [31:0] imm;
+
+ // We cannot use rvfi_pc_wdata for conditional jumps.
+ imm = $signed({ {19 {rvfi_insn[31]}}, rvfi_insn[31], rvfi_insn[7],
+ rvfi_insn[30:25], rvfi_insn[11:8], 1'b0 });
+ branch_target = rvfi_pc_rdata + imm;
+
+ data_accessed = RS1 | RS2;
+ decoded_str = $sformatf("%s\tx%0d,x%0d,%0x",
+ mnemonic, rvfi_rs1_addr, rvfi_rs2_addr, branch_target);
+ endfunction
+
+ function automatic void decode_csr_insn(input string mnemonic);
+ logic [11:0] csr;
+ string csr_name;
+ csr = rvfi_insn[31:20];
+ csr_name = get_csr_name(csr);
+
+ data_accessed = RD;
+
+ if (!rvfi_insn[14]) begin
+ data_accessed |= RS1;
+ decoded_str = $sformatf("%s\tx%0d,%s,x%0d",
+ mnemonic, rvfi_rd_addr, csr_name, rvfi_rs1_addr);
+ end else begin
+ decoded_str = $sformatf("%s\tx%0d,%s,%0d",
+ mnemonic, rvfi_rd_addr, csr_name, {27'b0, rvfi_insn[19:15]});
+ end
+ endfunction
+
+ function automatic void decode_cr_insn(input string mnemonic);
+ if (rvfi_rs2_addr == 5'b0) begin
+ if ((rvfi_insn[12] == 1'b1) && cheri_pmode_i) begin
+ // C.CH.JALR
+ data_accessed = CS1 | CD;
+ decoded_str = $sformatf("%s\tc%0d", "c.CH.cjalr", rvfi_rs1_addr);
+ end else if (rvfi_insn[12] == 1'b1) begin
+ // C.JALR
+ data_accessed = RS1 | RD;
+ decoded_str = $sformatf("%s\tx%0d", mnemonic, rvfi_rs1_addr);
+ end else if (cheri_pmode_i) begin
+ // C.CH.JR
+ data_accessed = CS1;
+ decoded_str = $sformatf("%s\tc%0d", "c.CH.cjr" , rvfi_rs1_addr);
+ end else begin
+ // C.JR
+ data_accessed = RS1;
+ decoded_str = $sformatf("%s\tx%0d", mnemonic, rvfi_rs1_addr);
+ end
+ end else begin
+ data_accessed = RS1 | RS2 | RD; // RS1 == RD
+ decoded_str = $sformatf("%s\tx%0d,x%0d", mnemonic, rvfi_rd_addr, rvfi_rs2_addr);
+ end
+ endfunction
+
+ function automatic void decode_ci_cli_insn(input string mnemonic);
+ logic [5:0] imm;
+ imm = {rvfi_insn[12], rvfi_insn[6:2]};
+ data_accessed = RD;
+ decoded_str = $sformatf("%s\tx%0d,%0d", mnemonic, rvfi_rd_addr, $signed(imm));
+ endfunction
+
+ function automatic void decode_ci_caddi_insn(input string mnemonic);
+ logic [5:0] nzimm;
+ nzimm = {rvfi_insn[12], rvfi_insn[6:2]};
+ data_accessed = RS1 | RD;
+ decoded_str = $sformatf("%s\tx%0d,%0d", mnemonic, rvfi_rd_addr, $signed(nzimm));
+ endfunction
+
+ function automatic void decode_ci_caddi16sp_insn(input string mnemonic);
+ logic [9:0] nzimm;
+ nzimm = {rvfi_insn[12], rvfi_insn[4:3], rvfi_insn[5], rvfi_insn[2], rvfi_insn[6], 4'b0};
+ if (cheri_pmode_i) begin
+ data_accessed = CS1 | CD;
+ decoded_str = $sformatf("%s\tc%0d,%0d", "c.CH.cinc16csp", rvfi_rd_addr, $signed(nzimm));
+ end else begin
+ data_accessed = RS1 | RD;
+ decoded_str = $sformatf("%s\tx%0d,%0d", mnemonic, rvfi_rd_addr, $signed(nzimm));
+ end
+ endfunction
+
+ function automatic void decode_ci_clui_insn(input string mnemonic);
+ logic [5:0] nzimm;
+ nzimm = {rvfi_insn[12], rvfi_insn[6:2]};
+ data_accessed = RD;
+ decoded_str = $sformatf("%s\tx%0d,0x%0x", mnemonic, rvfi_rd_addr, 20'($signed(nzimm)));
+ endfunction
+
+ function automatic void decode_ci_cslli_insn(input string mnemonic);
+ logic [5:0] shamt;
+ shamt = {rvfi_insn[12], rvfi_insn[6:2]};
+ data_accessed = RS1 | RD;
+ decoded_str = $sformatf("%s\tx%0d,0x%0x", mnemonic, rvfi_rd_addr, shamt);
+ endfunction
+
+ function automatic void decode_ciw_insn(input string mnemonic);
+ // C.ADDI4SPN
+ logic [9:0] nzuimm;
+ nzuimm = {rvfi_insn[10:7], rvfi_insn[12:11], rvfi_insn[5], rvfi_insn[6], 2'b00};
+ if (cheri_pmode_i) begin
+ // c.CH.incaddr4spn
+ data_accessed = CD | CS1;
+ decoded_str = $sformatf("%s\tc%0d,csp,%0d", mnemonic, rvfi_rd_addr, nzuimm);
+ end else begin
+ // c.addi4spn
+ data_accessed = RD;
+ decoded_str = $sformatf("%s\tx%0d,x2,%0d", mnemonic, rvfi_rd_addr, nzuimm);
+ end
+ endfunction
+
+ function automatic void decode_cb_sr_insn(input string mnemonic);
+ logic [5:0] shamt;
+ shamt = {rvfi_insn[12], rvfi_insn[6:2]};
+ data_accessed = RS1 | RD;
+ decoded_str = $sformatf("%s\tx%0d,0x%0x", mnemonic, rvfi_rs1_addr, shamt);
+ endfunction
+
+ function automatic void decode_cb_insn(input string mnemonic);
+ logic [7:0] imm;
+ logic [31:0] jump_target;
+ if (rvfi_insn[15:13] == 3'b110 || rvfi_insn[15:13] == 3'b111) begin
+ // C.BNEZ and C.BEQZ
+ // We cannot use rvfi_pc_wdata for conditional jumps.
+ imm = {rvfi_insn[12], rvfi_insn[6:5], rvfi_insn[2], rvfi_insn[11:10], rvfi_insn[4:3]};
+ jump_target = rvfi_pc_rdata + 32'($signed({imm, 1'b0}));
+ data_accessed = RS1;
+ decoded_str = $sformatf("%s\tx%0d,%0x", mnemonic, rvfi_rs1_addr, jump_target);
+ end else if (rvfi_insn[15:13] == 3'b100) begin
+ // C.ANDI
+ imm = {{2{rvfi_insn[12]}}, rvfi_insn[12], rvfi_insn[6:2]};
+ data_accessed = RS1 | RD; // RS1 == RD
+ decoded_str = $sformatf("%s\tx%0d,%0d", mnemonic, rvfi_rd_addr, $signed(imm));
+ end else begin
+ imm = {rvfi_insn[12], rvfi_insn[6:2], 2'b00};
+ data_accessed = RS1;
+ decoded_str = $sformatf("%s\tx%0d,0x%0x", mnemonic, rvfi_rs1_addr, imm);
+ end
+ endfunction
+
+ function automatic void decode_cs_insn(input string mnemonic);
+ data_accessed = RS1 | RS2 | RD; // RS1 == RD
+ decoded_str = $sformatf("%s\tx%0d,x%0d", mnemonic, rvfi_rd_addr, rvfi_rs2_addr);
+ endfunction
+
+ function automatic void decode_cj_insn(input string mnemonic);
+ if (rvfi_insn[15:13] == 3'b001) begin
+ // C.JAL
+ if (cheri_pmode_i) begin
+ data_accessed = CD;
+ decoded_str = $sformatf("%s\t%0x", "c.CH.cjal", rvfi_pc_wdata);
+ end else begin
+ data_accessed = RD;
+ decoded_str = $sformatf("%s\t%0x", mnemonic, rvfi_pc_wdata);
+ end
+ end else begin
+ // C.J
+ if (cheri_pmode_i)
+ decoded_str = $sformatf("%s\t%0x", "c.CH.cj", rvfi_pc_wdata);
+ else
+ decoded_str = $sformatf("%s\t%0x", mnemonic, rvfi_pc_wdata);
+ end
+ endfunction
+
+ function automatic void decode_compressed_load_insn(input string mnemonic);
+ logic [7:0] imm;
+
+ if ((rvfi_insn[15:13] == 3'b011) && (rvfi_insn[1:0] == OPCODE_C0)) begin
+ // CHERI: c.clc, use RV64 c.ld encoding
+ imm = {rvfi_insn[6:5], rvfi_insn[12:10], 3'b000};
+ data_accessed = CS1 | CD | MEMC;
+ decoded_str = $sformatf("%s\tc%0d,%0d(c%0d)", mnemonic, rvfi_rd_addr, imm, rvfi_rs1_addr);
+ end else if ((rvfi_insn[15:13] == 3'b011) && (rvfi_insn[1:0] == OPCODE_C2)) begin
+ // CHERI: c.clcsp, RV32: c.ldsp
+ imm = {rvfi_insn[4:2], rvfi_insn[12], rvfi_insn[6:5], 3'b000};
+ data_accessed = CS1 | CD | MEMC;
+ decoded_str = $sformatf("%s\tc%0d,%0d(c%0d)", mnemonic, rvfi_rd_addr, imm, rvfi_rs1_addr);
+ end else begin
+ if (rvfi_insn[1:0] == OPCODE_C0) begin
+ // C.LW
+ imm = {1'b0, rvfi_insn[5], rvfi_insn[12:10], rvfi_insn[6], 2'b00};
+ end else begin
+ // C.LWSP
+ imm = {rvfi_insn[3:2], rvfi_insn[12], rvfi_insn[6:4], 2'b00};
+ end
+ if (cheri_pmode_i) begin
+ data_accessed = CS1 | RD | MEM;
+ decoded_str = $sformatf("%s\tx%0d,%0d(c%0d)", mnemonic, rvfi_rd_addr, imm, rvfi_rs1_addr);
+ end else begin
+ data_accessed = RS1 | RD | MEM;
+ decoded_str = $sformatf("%s\tx%0d,%0d(x%0d)", mnemonic, rvfi_rd_addr, imm, rvfi_rs1_addr);
+ end
+ end
+ endfunction
+
+ function automatic void decode_compressed_store_insn(input string mnemonic);
+ logic [7:0] imm;
+
+
+ if ((rvfi_insn[15:13] == 3'b111) && (rvfi_insn[1:0] == OPCODE_C0)) begin
+ // CHERI: c.csc, use RV64 c.sd encoding
+ imm = {rvfi_insn[6:5], rvfi_insn[12:10], 3'b000};
+ data_accessed = CS1 | CS2 | MEMC;
+ decoded_str = $sformatf("%s\tc%0d,%0d(c%0d)", mnemonic, rvfi_rs2_addr, imm, rvfi_rs1_addr);
+ end else if ((rvfi_insn[15:13] == 3'b111) && (rvfi_insn[1:0] == OPCODE_C2)) begin
+ // CHERI: c.cscsp, RV32: c.sdsp
+ imm = {rvfi_insn[9:7], rvfi_insn[12:10], 3'b000};
+ data_accessed = CS1 | CS2 | MEMC;
+ decoded_str = $sformatf("%s\tc%0d,%0d(c%0d)", mnemonic, rvfi_rs2_addr, imm, rvfi_rs1_addr);
+ end else begin
+ if (rvfi_insn[1:0] == OPCODE_C0) begin
+ // C.SW
+ imm = {1'b0, rvfi_insn[5], rvfi_insn[12:10], rvfi_insn[6], 2'b00};
+ end else begin
+ // C.SWSP
+ imm = {rvfi_insn[8:7], rvfi_insn[12:9], 2'b00};
+ end
+ if (cheri_pmode_i) begin
+ data_accessed = CS1 | RS2 | MEM;
+ decoded_str = $sformatf("%s\tx%0d,%0d(c%0d)", mnemonic, rvfi_rs2_addr, imm, rvfi_rs1_addr);
+ end else begin
+ data_accessed = RS1 | RS2 | MEM;
+ decoded_str = $sformatf("%s\tx%0d,%0d(x%0d)", mnemonic, rvfi_rs2_addr, imm, rvfi_rs1_addr);
+ end
+ end
+ endfunction
+
+ function automatic void decode_load_insn();
+ string mnemonic;
+ logic [13:0] imm;
+
+ /*
+ Gives wrong results in Verilator < 4.020.
+ See https://github.com/lowRISC/ibex/issues/372 and
+ https://www.veripool.org/issues/1536-Verilator-Misoptimization-in-if-and-case-with-default-statement-inside-a-function
+
+ unique case (rvfi_insn[14:12])
+ 3'b000: mnemonic = "lb";
+ 3'b001: mnemonic = "lh";
+ 3'b010: mnemonic = "lw";
+ 3'b100: mnemonic = "lbu";
+ 3'b101: mnemonic = "lhu";
+ default: begin
+ decode_mnemonic("INVALID");
+ return;
+ end
+ endcase
+ */
+ logic [2:0] size;
+ logic is_cap;
+
+ size = rvfi_insn[14:12];
+ is_cap = 1'b0;
+
+ if (size == 3'b000) begin
+ mnemonic = cheri_pmode_i ? "clb" : "lb";
+ end else if (size == 3'b001) begin
+ mnemonic = cheri_pmode_i ? "clh" :"lh";
+ end else if (size == 3'b010) begin
+ mnemonic = cheri_pmode_i ? "clw" :"lw";
+ end else if (size == 3'b100) begin
+ mnemonic = cheri_pmode_i ? "clbu" :"lbu";
+ end else if (size == 3'b101) begin
+ mnemonic = cheri_pmode_i ? "clhu" :"lhu";
+ end else if (size == 3'b011) begin
+ mnemonic = "CH.clc";
+ is_cap = 1'b1;
+ end else begin
+ decode_mnemonic("INVALID");
+ return;
+ end
+
+ imm = {{3{rvfi_insn[31]}},rvfi_insn[30:20]};
+
+ if (is_cap) begin
+ data_accessed = CD | CS1 | MEMC;
+ decoded_str = $sformatf("%s\tc%0d,%0d(c%0d)", mnemonic, rvfi_rd_addr,
+ $signed(imm), rvfi_rs1_addr);
+ end else if (cheri_pmode_i) begin
+ data_accessed = RD | CS1 | MEM;
+ decoded_str = $sformatf("%s\tx%0d,%0d(c%0d)", mnemonic, rvfi_rd_addr,
+ $signed(imm), rvfi_rs1_addr);
+ end else begin
+ data_accessed = RD | RS1 | MEM;
+ decoded_str = $sformatf("%s\tx%0d,%0d(x%0d)", mnemonic, rvfi_rd_addr,
+ $signed(imm), rvfi_rs1_addr);
+ end
+ endfunction
+
+ function automatic void decode_store_insn();
+ string mnemonic;
+ logic is_cap;
+ logic [13:0] imm;
+
+ is_cap = 1'b0;
+ unique case (rvfi_insn[13:12])
+ 2'b00: mnemonic = cheri_pmode_i ? "csb" : "sb";
+ 2'b01: mnemonic = cheri_pmode_i ? "csh" : "sh";
+ 2'b10: mnemonic = cheri_pmode_i ? "csw" : "sw";
+ 2'b11: begin
+ mnemonic = "CH.csc";
+ is_cap = 1'b1;
+ end
+ default: begin
+ decode_mnemonic("INVALID");
+ return;
+ end
+ endcase
+
+ imm = {{3{rvfi_insn[31]}},rvfi_insn[30:25], rvfi_insn[11:7]};
+
+ if (!rvfi_insn[14]) begin
+ // regular store
+ if (is_cap) begin
+ data_accessed = CS1 | CS2 | MEMC;
+ decoded_str = $sformatf("%s\tc%0d,%0d(c%0d)",
+ mnemonic,
+ rvfi_rs2_addr,
+ $signed(imm),
+ rvfi_rs1_addr);
+ end else if (cheri_pmode_i) begin
+ data_accessed = CS1 | RS2 | MEM;
+ decoded_str = $sformatf("%s\tx%0d,%0d(c%0d)",
+ mnemonic,
+ rvfi_rs2_addr,
+ $signed(imm),
+ rvfi_rs1_addr);
+ end else begin
+ data_accessed = RS1 | RS2 | MEM;
+ decoded_str = $sformatf("%s\tx%0d,%0d(x%0d)",
+ mnemonic,
+ rvfi_rs2_addr,
+ $signed(imm),
+ rvfi_rs1_addr);
+ end
+ end else begin
+ decode_mnemonic("INVALID");
+ end
+ endfunction
+
+ function automatic string get_fence_description(logic [3:0] bits);
+ string desc = "";
+ if (bits[3]) begin
+ desc = {desc, "i"};
+ end
+ if (bits[2]) begin
+ desc = {desc, "o"};
+ end
+ if (bits[1]) begin
+ desc = {desc, "r"};
+ end
+ if (bits[0]) begin
+ desc = {desc, "w"};
+ end
+ return desc;
+ endfunction
+
+ function automatic void decode_fence();
+ string predecessor;
+ string successor;
+ predecessor = get_fence_description(rvfi_insn[27:24]);
+ successor = get_fence_description(rvfi_insn[23:20]);
+ decoded_str = $sformatf("fence\t%s,%s", predecessor, successor);
+ endfunction
+
+ function automatic void decode_cheri_rd_rs1_insn(input string mnemonic);
+ data_accessed = RS1 | RD;
+ decoded_str = $sformatf("%s\tx%0d,x%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr);
+ endfunction
+
+ function automatic void decode_cheri_rd_cs1_insn(input string mnemonic);
+ data_accessed = CS1 | RD;
+ decoded_str = $sformatf("%s\tx%0d,c%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr);
+ endfunction
+
+ function automatic void decode_cheri_cd_cs1_insn(input string mnemonic);
+ data_accessed = CS1 | CD;
+ decoded_str = $sformatf("%s\tc%0d,c%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr);
+ endfunction
+
+ function automatic void decode_cheri_rd_cs1_cs2_insn(input string mnemonic);
+ data_accessed = CS2 | CS1 | RD;
+ decoded_str = $sformatf("%s\tx%0d,c%0d,c%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr, rvfi_rs2_addr);
+ endfunction
+
+ function automatic void decode_cheri_cd_cs1_cs2_insn(input string mnemonic);
+ data_accessed = CS2 | CS1 | CD;
+ decoded_str = $sformatf("%s\tc%0d,c%0d,c%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr, rvfi_rs2_addr);
+ endfunction
+
+ function automatic void decode_cheri_cd_cs1_rs2_insn(input string mnemonic);
+ data_accessed = RS2 | CS1 | CD;
+ decoded_str = $sformatf("%s\tc%0d,c%0d,x%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr, rvfi_rs2_addr);
+ endfunction
+
+ function automatic void decode_cheri_cd_cs1_imm_insn(input string mnemonic);
+ logic [13:0] imm;
+
+ data_accessed = CS1 | CD;
+
+ // cincaddrimm and csetboundsimm
+ imm = {{3{rvfi_insn[31]}}, rvfi_insn[30:20]}; // imm not extended
+
+ if (rvfi_insn[14:12] == 3'b001) // cincaddrimm
+ decoded_str = $sformatf("%s\tc%0d,c%0d,%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr, $signed(imm));
+ else // csetboundsimm
+ decoded_str = $sformatf("%s\tc%0d,c%0d,%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr, imm);
+
+ endfunction
+
+ function automatic void decode_cheri_auipcc_insn();
+ logic [31:0] imm;
+
+ // We cannot use rvfi_pc_wdata for conditional jumps.
+ imm = rvfi_insn[31:12];
+ data_accessed = CD;
+ if (cheri_pmode_i) begin
+ decoded_str = $sformatf("%s\tc%0d,0x%0x", "CH.auipcc", rvfi_rd_addr, imm);
+ end else begin
+ decoded_str = $sformatf("%s\tx%0d,0x%0x", "auipc", rvfi_rd_addr, imm);
+ end
+
+ endfunction
+
+
+ function automatic void decode_cheri_auicgp_insn();
+ logic [31:0] imm;
+
+ // We cannot use rvfi_pc_wdata for conditional jumps.
+ imm = rvfi_insn[31:12];
+ data_accessed = CD | CS1;
+ decoded_str = $sformatf("%s\tc%0d,0x%0x", "CH.auicgp", rvfi_rd_addr, imm);
+ endfunction
+
+
+ function automatic void decode_cheri_cs1_cs2_insn(input string mnemonic);
+ data_accessed = CS2 | CS1;
+ decoded_str = $sformatf("%s\tc%0d,c%0d", mnemonic, rvfi_rs1_addr, rvfi_rs2_addr);
+ endfunction
+
+ function automatic void decode_cheri_scrrw_insn();
+ string mnemonic, scr_name;
+
+ scr_name = get_scr_name(rvfi_insn[24:20]);
+ data_accessed = CS1 | CD;
+
+ if (rvfi_rd_addr == 0) begin
+ mnemonic = "CH.cspecialw";
+ decoded_str = $sformatf("%s\t%s,c%0d", mnemonic, scr_name, rvfi_rs1_addr);
+ end else if (rvfi_rs1_addr == 0) begin
+ mnemonic = "CH.cspecialr";
+ decoded_str = $sformatf("%s\tc%0d,%s", mnemonic, rvfi_rd_addr, scr_name);
+ end else begin
+ mnemonic = "CH.cspecialrw";
+ decoded_str = $sformatf("%s\tc%0d,%s,c%0d", mnemonic, rvfi_rd_addr, scr_name, rvfi_rs1_addr);
+ end
+ endfunction
+
+ // cycle counter
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ cycle <= 0;
+ end else begin
+ cycle <= cycle + 1;
+ end
+ end
+
+ // close output file for writing
+ final begin
+ if (file_handle != 32'h0) begin
+ $fclose(file_handle);
+ end
+ end
+ // log execution
+ always_ff @(posedge clk_i) begin
+ if (rvfi_valid && trace_log_enable) begin
+ printbuffer_dumpline();
+ end
+ end
+
+ if (DataWidth == 33) begin
+ assign rvfi_mem_wdata_bit32 = rvfi_mem_wdata[32];
+ end else begin
+ assign rvfi_mem_wdata_bit32 = 1'b0;
+ end
+
+ //always_comb begin
+ // change to always @* to get rid of VCS warnings about dynamic type and sensitivity list
+ always @* begin
+ decoded_str = "";
+ data_accessed = 5'h0;
+ insn_is_compressed = 0;
+
+ // Check for compressed instructions
+ if (rvfi_insn[1:0] != 2'b11) begin
+ insn_is_compressed = 1;
+ // Separate case to avoid overlapping decoding
+ if (rvfi_insn[15:13] == INSN_CMV[15:13] && rvfi_insn[1:0] == OPCODE_C2) begin
+ if (rvfi_insn[12] == INSN_CADD[12]) begin
+ if (rvfi_insn[11:2] == INSN_CEBREAK[11:2]) begin
+ decode_mnemonic("c.ebreak");
+ end else if (rvfi_insn[6:2] == INSN_CJALR[6:2]) begin
+ decode_cr_insn("c.jalr");
+ end else begin
+ decode_cr_insn("c.add");
+ end
+ end else begin
+ if (rvfi_insn[6:2] == INSN_CJR[6:2]) begin
+ decode_cr_insn("c.jr");
+ end else begin
+ decode_cr_insn("c.mv");
+ end
+ end
+ end else begin
+ unique casez (rvfi_insn[15:0])
+ // C0 Opcodes
+ INSN_CADDI4SPN: begin
+ if (rvfi_insn[12:2] == 11'h0) begin
+ // Align with pseudo-mnemonic used by GNU binutils and LLVM's MC layer
+ decode_mnemonic("c.unimp");
+ end else begin
+ decode_ciw_insn("c.addi4spn");
+ end
+ end
+ INSN_CLW: decode_compressed_load_insn("c.lw");
+ INSN_CSW: decode_compressed_store_insn("c.sw");
+ INSN_CCLC: decode_compressed_load_insn("c.CH.clc");
+ INSN_CCSC: decode_compressed_store_insn("c.CH.csc");
+ // C1 Opcodes
+ INSN_CADDI: decode_ci_caddi_insn("c.addi");
+ INSN_CJAL: decode_cj_insn("c.jal");
+ INSN_CJ: decode_cj_insn("c.j");
+ INSN_CLI: decode_ci_cli_insn("c.li");
+ INSN_CLUI: begin
+ // These two instructions share opcode
+ if (rvfi_insn[11:7] == 5'd2) begin
+ decode_ci_caddi16sp_insn("c.addi16sp");
+ end else begin
+ decode_ci_clui_insn("c.lui");
+ end
+ end
+ INSN_CSRLI: decode_cb_sr_insn("c.srli");
+ INSN_CSRAI: decode_cb_sr_insn("c.srai");
+ INSN_CANDI: decode_cb_insn("c.andi");
+ INSN_CSUB: decode_cs_insn("c.sub");
+ INSN_CXOR: decode_cs_insn("c.xor");
+ INSN_COR: decode_cs_insn("c.or");
+ INSN_CAND: decode_cs_insn("c.and");
+ INSN_CBEQZ: decode_cb_insn("c.beqz");
+ INSN_CBNEZ: decode_cb_insn("c.bnez");
+ // C2 Opcodes
+ INSN_CSLLI: decode_ci_cslli_insn("c.slli");
+ INSN_CLWSP: decode_compressed_load_insn("c.lwsp");
+ INSN_SWSP: decode_compressed_store_insn("c.swsp");
+ INSN_CCLCSP: decode_compressed_load_insn("c.CH.clcsp");
+ INSN_CCSCSP: decode_compressed_store_insn("c.CH.cscsp");
+ default: decode_mnemonic("INVALID");
+ endcase
+ end
+ end else begin
+ unique casez (rvfi_insn)
+ // Regular opcodes
+ INSN_LUI: decode_u_insn("lui");
+ // INSN_AUIPC: decode_u_insn("auipc");
+ INSN_JAL: decode_j_insn("jal");
+ INSN_JALR: decode_i_jalr_insn("jalr");
+ // BRANCH
+ INSN_BEQ: decode_b_insn("beq");
+ INSN_BNE: decode_b_insn("bne");
+ INSN_BLT: decode_b_insn("blt");
+ INSN_BGE: decode_b_insn("bge");
+ INSN_BLTU: decode_b_insn("bltu");
+ INSN_BGEU: decode_b_insn("bgeu");
+ // OPIMM
+ INSN_ADDI: begin
+ if (rvfi_insn == 32'h00_00_00_13) begin
+ // TODO: objdump doesn't decode this as nop currently, even though it would be helpful
+ // Decide what to do here: diverge from objdump, or make the trace less readable to
+ // users.
+ //decode_mnemonic("nop");
+ decode_i_insn("addi");
+ end else begin
+ decode_i_insn("addi");
+ end
+ end
+ INSN_SLTI: decode_i_insn("slti");
+ INSN_SLTIU: decode_i_insn("sltiu");
+ INSN_XORI: decode_i_insn("xori");
+ INSN_ORI: decode_i_insn("ori");
+ // Unlike the ratified v.1.0.0 bitmanip extension, the v.0.94 draft extension continues to
+ // define the pseudo-instruction
+ // zext.b rd rs = andi rd, rs, 255.
+ // However, for now the tracer doesn't emit this due to a lack of support in the LLVM and
+ // GCC toolchains. Enabling this functionality when the time is right is tracked in
+ // https://github.com/lowRISC/ibex/issues/1228
+ INSN_ANDI: decode_i_insn("andi");
+ // INSN_ANDI:begin
+ // casez (rvfi_insn)
+ // INSN_ZEXTB: decode_r1_insn("zext.b");
+ // default: decode_i_insn("andi");
+ // endcase
+ // end
+ INSN_SLLI: decode_i_shift_insn("slli");
+ INSN_SRLI: decode_i_shift_insn("srli");
+ INSN_SRAI: decode_i_shift_insn("srai");
+ // OP
+ INSN_ADD: decode_r_insn("add");
+ INSN_SUB: decode_r_insn("sub");
+ INSN_SLL: decode_r_insn("sll");
+ INSN_SLT: decode_r_insn("slt");
+ INSN_SLTU: decode_r_insn("sltu");
+ INSN_XOR: decode_r_insn("xor");
+ INSN_SRL: decode_r_insn("srl");
+ INSN_SRA: decode_r_insn("sra");
+ INSN_OR: decode_r_insn("or");
+ INSN_AND: decode_r_insn("and");
+ // SYSTEM (CSR manipulation)
+ INSN_CSRRW: decode_csr_insn("csrrw");
+ INSN_CSRRS: decode_csr_insn("csrrs");
+ INSN_CSRRC: decode_csr_insn("csrrc");
+ INSN_CSRRWI: decode_csr_insn("csrrwi");
+ INSN_CSRRSI: decode_csr_insn("csrrsi");
+ INSN_CSRRCI: decode_csr_insn("csrrci");
+ // SYSTEM (others)
+ INSN_ECALL: decode_mnemonic("ecall");
+ INSN_EBREAK: decode_mnemonic("ebreak");
+ INSN_MRET: decode_mnemonic("mret");
+ INSN_DRET: decode_mnemonic("dret");
+ INSN_WFI: decode_mnemonic("wfi");
+ // RV32M
+ INSN_PMUL: decode_r_insn("mul");
+ INSN_PMUH: decode_r_insn("mulh");
+ INSN_PMULHSU: decode_r_insn("mulhsu");
+ INSN_PMULHU: decode_r_insn("mulhu");
+ INSN_DIV: decode_r_insn("div");
+ INSN_DIVU: decode_r_insn("divu");
+ INSN_REM: decode_r_insn("rem");
+ INSN_REMU: decode_r_insn("remu");
+ // LOAD & STORE
+ INSN_LOAD: decode_load_insn();
+ INSN_STORE: decode_store_insn();
+ // MISC-MEM
+ INSN_FENCE: decode_fence();
+ INSN_FENCEI: decode_mnemonic("fence.i");
+ // RV32B - ZBA
+ INSN_SH1ADD: decode_r_insn("sh1add");
+ INSN_SH2ADD: decode_r_insn("sh2add");
+ INSN_SH3ADD: decode_r_insn("sh3add");
+ // RV32B - ZBB
+ INSN_RORI: decode_i_shift_insn("rori");
+ INSN_ROL: decode_r_insn("rol");
+ INSN_ROR: decode_r_insn("ror");
+ INSN_MIN: decode_r_insn("min");
+ INSN_MAX: decode_r_insn("max");
+ INSN_MINU: decode_r_insn("minu");
+ INSN_MAXU: decode_r_insn("maxu");
+ INSN_XNOR: decode_r_insn("xnor");
+ INSN_ORN: decode_r_insn("orn");
+ INSN_ANDN: decode_r_insn("andn");
+ // The ratified v.1.0.0 bitmanip extension defines the pseudo-instruction
+ // zext.h rd rs = pack rd, rs, zero.
+ // However, for now the tracer doesn't emit this due to a lack of support in the LLVM and
+ // GCC toolchains. Enabling this functionality when the time is right is tracked in
+ // https://github.com/lowRISC/ibex/issues/1228
+ INSN_PACK: decode_r_insn("pack");
+ // INSN_PACK: begin
+ // casez (rvfi_insn)
+ // INSN_ZEXTH: decode_r1_insn("zext.h");
+ // default: decode_r_insn("pack");
+ // endcase
+ // end
+ INSN_PACKH: decode_r_insn("packh");
+ INSN_PACKU: decode_r_insn("packu");
+ INSN_CLZ: decode_r1_insn("clz");
+ INSN_CTZ: decode_r1_insn("ctz");
+ INSN_CPOP: decode_r1_insn("cpop");
+ INSN_SEXTB: decode_r1_insn("sext.b");
+ INSN_SEXTH: decode_r1_insn("sext.h");
+ // RV32B - ZBS
+ INSN_BCLRI: decode_i_insn("bclri");
+ INSN_BSETI: decode_i_insn("bseti");
+ INSN_BINVI: decode_i_insn("binvi");
+ INSN_BEXTI: decode_i_insn("bexti");
+ INSN_BCLR: decode_r_insn("bclr");
+ INSN_BSET: decode_r_insn("bset");
+ INSN_BINV: decode_r_insn("binv");
+ INSN_BEXT: decode_r_insn("bext");
+ // RV32B - ZBE
+ INSN_BDECOMPRESS: decode_r_insn("bdecompress");
+ INSN_BCOMPRESS: decode_r_insn("bcompress");
+ // RV32B - ZBP
+ INSN_GREV: decode_r_insn("grev");
+ INSN_GREVI: begin
+ unique casez (rvfi_insn)
+ INSN_REV_P: decode_r1_insn("rev.p");
+ INSN_REV2_N: decode_r1_insn("rev2.n");
+ INSN_REV_N: decode_r1_insn("rev.n");
+ INSN_REV4_B: decode_r1_insn("rev4.b");
+ INSN_REV2_B: decode_r1_insn("rev2.b");
+ INSN_REV_B: decode_r1_insn("rev.b");
+ INSN_REV8_H: decode_r1_insn("rev8.h");
+ INSN_REV4_H: decode_r1_insn("rev4.h");
+ INSN_REV2_H: decode_r1_insn("rev2.h");
+ INSN_REV_H: decode_r1_insn("rev.h");
+ INSN_REV16: decode_r1_insn("rev16");
+ INSN_REV8: decode_r1_insn("rev8");
+ INSN_REV4: decode_r1_insn("rev4");
+ INSN_REV2: decode_r1_insn("rev2");
+ INSN_REV: decode_r1_insn("rev");
+ default: decode_i_insn("grevi");
+ endcase
+ end
+ INSN_GORC: decode_r_insn("gorc");
+ INSN_GORCI: begin
+ unique casez (rvfi_insn)
+ INSN_ORC_P: decode_r1_insn("orc.p");
+ INSN_ORC2_N: decode_r1_insn("orc2.n");
+ INSN_ORC_N: decode_r1_insn("orc.n");
+ INSN_ORC4_B: decode_r1_insn("orc4.b");
+ INSN_ORC2_B: decode_r1_insn("orc2.b");
+ INSN_ORC_B: decode_r1_insn("orc.b");
+ INSN_ORC8_H: decode_r1_insn("orc8.h");
+ INSN_ORC4_H: decode_r1_insn("orc4.h");
+ INSN_ORC2_H: decode_r1_insn("orc2.h");
+ INSN_ORC_H: decode_r1_insn("orc.h");
+ INSN_ORC16: decode_r1_insn("orc16");
+ INSN_ORC8: decode_r1_insn("orc8");
+ INSN_ORC4: decode_r1_insn("orc4");
+ INSN_ORC2: decode_r1_insn("orc2");
+ INSN_ORC: decode_r1_insn("orc");
+ default: decode_i_insn("gorci");
+ endcase
+ end
+ INSN_SHFL: decode_r_insn("shfl");
+ INSN_SHFLI: begin
+ unique casez (rvfi_insn)
+ INSN_ZIP_N: decode_r1_insn("zip.n");
+ INSN_ZIP2_B: decode_r1_insn("zip2.b");
+ INSN_ZIP_B: decode_r1_insn("zip.b");
+ INSN_ZIP4_H: decode_r1_insn("zip4.h");
+ INSN_ZIP2_H: decode_r1_insn("zip2.h");
+ INSN_ZIP_H: decode_r1_insn("zip.h");
+ INSN_ZIP8: decode_r1_insn("zip8");
+ INSN_ZIP4: decode_r1_insn("zip4");
+ INSN_ZIP2: decode_r1_insn("zip2");
+ INSN_ZIP: decode_r1_insn("zip");
+ default: decode_i_insn("shfli");
+ endcase
+ end
+ INSN_UNSHFL: decode_r_insn("unshfl");
+ INSN_UNSHFLI: begin
+ unique casez (rvfi_insn)
+ INSN_UNZIP_N: decode_r1_insn("unzip.n");
+ INSN_UNZIP2_B: decode_r1_insn("unzip2.b");
+ INSN_UNZIP_B: decode_r1_insn("unzip.b");
+ INSN_UNZIP4_H: decode_r1_insn("unzip4.h");
+ INSN_UNZIP2_H: decode_r1_insn("unzip2.h");
+ INSN_UNZIP_H: decode_r1_insn("unzip.h");
+ INSN_UNZIP8: decode_r1_insn("unzip8");
+ INSN_UNZIP4: decode_r1_insn("unzip4");
+ INSN_UNZIP2: decode_r1_insn("unzip2");
+ INSN_UNZIP: decode_r1_insn("unzip");
+ default: decode_i_insn("unshfli");
+ endcase
+ end
+ INSN_XPERM_N: decode_r_insn("xperm_n");
+ INSN_XPERM_B: decode_r_insn("xperm_b");
+ INSN_XPERM_H: decode_r_insn("xperm_h");
+ INSN_SLO: decode_r_insn("slo");
+ INSN_SRO: decode_r_insn("sro");
+ INSN_SLOI: decode_i_shift_insn("sloi");
+ INSN_SROI: decode_i_shift_insn("sroi");
+
+ // RV32B - ZBT
+ INSN_CMIX: decode_r_cmixcmov_insn("cmix");
+ INSN_CMOV: decode_r_cmixcmov_insn("cmov");
+ INSN_FSR: decode_r_funnelshift_insn("fsr");
+ INSN_FSL: decode_r_funnelshift_insn("fsl");
+ INSN_FSRI: decode_i_funnelshift_insn("fsri");
+
+ // RV32B - ZBF
+ INSN_BFP: decode_r_insn("bfp");
+
+ // RV32B - ZBC
+ INSN_CLMUL: decode_r_insn("clmul");
+ INSN_CLMULR: decode_r_insn("clmulr");
+ INSN_CLMULH: decode_r_insn("clmulh");
+
+ // RV32B - ZBR
+ INSN_CRC32_B: decode_r1_insn("crc32.b");
+ INSN_CRC32_H: decode_r1_insn("crc32.h");
+ INSN_CRC32_W: decode_r1_insn("crc32.w");
+ INSN_CRC32C_B: decode_r1_insn("crc32c.b");
+ INSN_CRC32C_H: decode_r1_insn("crc32c.h");
+ INSN_CRC32C_W: decode_r1_insn("crc32c.w");
+
+ // CHERI, get fields
+ INSN_CHGETPERM: decode_cheri_rd_cs1_insn("CH.cgetperm");
+ INSN_CHGETTYPE: decode_cheri_rd_cs1_insn("CH.cgettype");
+ INSN_CHGETBASE: decode_cheri_rd_cs1_insn("CH.cgetbase");
+ INSN_CHGETTOP: decode_cheri_rd_cs1_insn("CH.cgettop");
+ INSN_CHGETLEN: decode_cheri_rd_cs1_insn("CH.cgetlen");
+ INSN_CHGETTAG: decode_cheri_rd_cs1_insn("CH.cgettag");
+ INSN_CHGETSEALED: decode_cheri_rd_cs1_insn("CH.cgetseald");
+ INSN_CHGETADDR: decode_cheri_rd_cs1_insn("CH.cgetaddr");
+ INSN_CHGETHIGH: decode_cheri_rd_cs1_insn("CH.cgethigh");
+
+ INSN_CHSEAL: decode_cheri_cd_cs1_cs2_insn("CH.cseal");
+ INSN_CHUNSEAL: decode_cheri_cd_cs1_cs2_insn("CH.cunseal");
+ INSN_CHANDPERM: decode_cheri_cd_cs1_rs2_insn("CH.candperm");
+ INSN_CHSETADDR: decode_cheri_cd_cs1_rs2_insn("CH.csetaddr");
+ INSN_CHINCADDR: decode_cheri_cd_cs1_rs2_insn("CH.cincaddr");
+ INSN_CHINCADDRIMM: decode_cheri_cd_cs1_imm_insn("CH.cincaddrimm");
+ INSN_CHSETBOUNDS: decode_cheri_cd_cs1_rs2_insn("CH.csetbounds");
+ INSN_CHSETBOUNDSEX: decode_cheri_cd_cs1_rs2_insn("CH.csetboundsexact");
+ INSN_CHSETBOUNDSRNDN: decode_cheri_cd_cs1_rs2_insn("CH.csetboundsrounddown");
+
+ INSN_CHSETBOUNDSIMM: decode_cheri_cd_cs1_imm_insn("CH.csetboundsimm");
+ INSN_CHCLEARTAG: decode_cheri_cd_cs1_insn("CH.ccleartag");
+ INSN_CHCRRL: decode_cheri_rd_rs1_insn("CH.crrl");
+ INSN_CHCRAM: decode_cheri_rd_rs1_insn("CH.cram");
+
+ INSN_CHSUB: decode_cheri_rd_cs1_cs2_insn("CH.csub");
+ INSN_CHMOVE: decode_cheri_cd_cs1_insn("CH.cmove");
+ INSN_CHTESTSUB: decode_cheri_rd_cs1_cs2_insn("CH.ctestsubset");
+ INSN_CHSETEQUAL: decode_cheri_rd_cs1_cs2_insn("CH.csetequalexact");
+ INSN_CHSETHIGH: decode_cheri_cd_cs1_rs2_insn("CH.csethigh");
+ //INSN_CHJALR: decode_cheri_cd_cs1_insn("CH.jalr");
+ INSN_CHCSRRW: decode_cheri_scrrw_insn();
+ INSN_AUIPC: decode_cheri_auipcc_insn();
+ INSN_AUICGP: decode_cheri_auicgp_insn();
+
+ default: decode_mnemonic("INVALID");
+ endcase
+ end
+ end
+// synthesis translate_on
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_tracer_pkg.sv b/hw/ip/cheriot-ibex/rtl/cheriot_tracer_pkg.sv
new file mode 100644
index 0000000..ce0fed8
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_tracer_pkg.sv
@@ -0,0 +1,379 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright lowRISC contributors.
+// Copyright 2017 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+package cheriot_tracer_pkg;
+ import cheriot_pkg::*;
+
+ parameter logic [1:0] OPCODE_C0 = 2'b00;
+ parameter logic [1:0] OPCODE_C1 = 2'b01;
+ parameter logic [1:0] OPCODE_C2 = 2'b10;
+
+ // instruction masks (for tracer)
+ parameter logic [31:0] INSN_LUI = { 25'h?, {OPCODE_LUI } };
+ parameter logic [31:0] INSN_AUIPC = { 25'h?, {OPCODE_AUIPC} };
+ parameter logic [31:0] INSN_JAL = { 25'h?, {OPCODE_JAL } };
+ parameter logic [31:0] INSN_JALR = { 17'h?, 3'b000, 5'h?, {OPCODE_JALR } };
+
+ // BRANCH
+ parameter logic [31:0] INSN_BEQ = { 17'h?, 3'b000, 5'h?, {OPCODE_BRANCH} };
+ parameter logic [31:0] INSN_BNE = { 17'h?, 3'b001, 5'h?, {OPCODE_BRANCH} };
+ parameter logic [31:0] INSN_BLT = { 17'h?, 3'b100, 5'h?, {OPCODE_BRANCH} };
+ parameter logic [31:0] INSN_BGE = { 17'h?, 3'b101, 5'h?, {OPCODE_BRANCH} };
+ parameter logic [31:0] INSN_BLTU = { 17'h?, 3'b110, 5'h?, {OPCODE_BRANCH} };
+ parameter logic [31:0] INSN_BGEU = { 17'h?, 3'b111, 5'h?, {OPCODE_BRANCH} };
+
+ // OPIMM
+ parameter logic [31:0] INSN_ADDI = { 17'h?, 3'b000, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_SLTI = { 17'h?, 3'b010, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_SLTIU = { 17'h?, 3'b011, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_XORI = { 17'h?, 3'b100, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ORI = { 17'h?, 3'b110, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ANDI = { 17'h?, 3'b111, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_SLLI = { 7'b0000000, 10'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_SRLI = { 7'b0000000, 10'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_SRAI = { 7'b0100000, 10'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+
+ // OP
+ parameter logic [31:0] INSN_ADD = { 7'b0000000, 10'h?, 3'b000, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_SUB = { 7'b0100000, 10'h?, 3'b000, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_SLL = { 7'b0000000, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_SLT = { 7'b0000000, 10'h?, 3'b010, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_SLTU = { 7'b0000000, 10'h?, 3'b011, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_XOR = { 7'b0000000, 10'h?, 3'b100, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_SRL = { 7'b0000000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_SRA = { 7'b0100000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_OR = { 7'b0000000, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_AND = { 7'b0000000, 10'h?, 3'b111, 5'h?, {OPCODE_OP} };
+
+ // SYSTEM
+ parameter logic [31:0] INSN_CSRRW = { 17'h?, 3'b001, 5'h?, {OPCODE_SYSTEM} };
+ parameter logic [31:0] INSN_CSRRS = { 17'h?, 3'b010, 5'h?, {OPCODE_SYSTEM} };
+ parameter logic [31:0] INSN_CSRRC = { 17'h?, 3'b011, 5'h?, {OPCODE_SYSTEM} };
+ parameter logic [31:0] INSN_CSRRWI = { 17'h?, 3'b101, 5'h?, {OPCODE_SYSTEM} };
+ parameter logic [31:0] INSN_CSRRSI = { 17'h?, 3'b110, 5'h?, {OPCODE_SYSTEM} };
+ parameter logic [31:0] INSN_CSRRCI = { 17'h?, 3'b111, 5'h?, {OPCODE_SYSTEM} };
+ parameter logic [31:0] INSN_ECALL = { 12'b000000000000, 13'b0, {OPCODE_SYSTEM} };
+ parameter logic [31:0] INSN_EBREAK = { 12'b000000000001, 13'b0, {OPCODE_SYSTEM} };
+ parameter logic [31:0] INSN_MRET = { 12'b001100000010, 13'b0, {OPCODE_SYSTEM} };
+ parameter logic [31:0] INSN_DRET = { 12'b011110110010, 13'b0, {OPCODE_SYSTEM} };
+ parameter logic [31:0] INSN_WFI = { 12'b000100000101, 13'b0, {OPCODE_SYSTEM} };
+
+ // RV32M
+ parameter logic [31:0] INSN_DIV = { 7'b0000001, 10'h?, 3'b100, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_DIVU = { 7'b0000001, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_REM = { 7'b0000001, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_REMU = { 7'b0000001, 10'h?, 3'b111, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_PMUL = { 7'b0000001, 10'h?, 3'b000, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_PMUH = { 7'b0000001, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_PMULHSU = { 7'b0000001, 10'h?, 3'b010, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_PMULHU = { 7'b0000001, 10'h?, 3'b011, 5'h?, {OPCODE_OP} };
+
+ // RV32B
+ // ZBA
+ parameter logic [31:0] INSN_SH1ADD = { 7'b0010000, 10'h?, 3'b010, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_SH2ADD = { 7'b0010000, 10'h?, 3'b100, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_SH3ADD = { 7'b0010000, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };
+
+ // ZBB
+ // Only log2(XLEN) bits of the immediate are used. For RV32, this means only the bits in
+ // instr[24:20] are effectively used. Whenever instr[26] is set, sroi/rori is instead decoded as
+ // fsri.
+ parameter logic [31:0] INSN_RORI = { 5'b01100 , 1'b0, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_CLZ = { 12'b011000000000, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_CTZ = { 12'b011000000001, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_CPOP = { 12'b011000000010, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_SEXTB = { 12'b011000000100, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_SEXTH = { 12'b011000000101, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+
+ // The zext.h and zext.b pseudo-instructions are defined in the ratified v.1.0.0 and draft v.0.94
+ // specifications of the bitmanip extension, respectively. They are currently not emitted by the
+ // tracer due to a lack of support in the LLVM and GCC toolchains. Enabling this functionality
+ // when the time is right is tracked in https://github.com/lowRISC/ibex/issues/1228
+ // zext.b -- pseudo-instruction: andi rd, rs 255
+ // parameter logic [31:0] INSN_ZEXTB =
+ // { 4'b0000, 8'b11111111, 5'h?, 3'b111, 5'h?, {OPCODE_OP_IMM} };
+ // zext.h -- pseudo-instruction: pack rd, rs zero
+ // parameter logic [31:0] INSN_ZEXTH = { 7'b0000100, 5'b00000, 5'h?, 3'b100, 5'h?, {OPCODE_OP} };
+
+ parameter logic [31:0] INSN_ROL = { 7'b0110000, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_ROR = { 7'b0110000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_MIN = { 7'b0000101, 10'h?, 3'b100, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_MAX = { 7'b0000101, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_MINU = { 7'b0000101, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_MAXU = { 7'b0000101, 10'h?, 3'b111, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_XNOR = { 7'b0100000, 10'h?, 3'b100, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_ORN = { 7'b0100000, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_ANDN = { 7'b0100000, 10'h?, 3'b111, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_PACK = { 7'b0000100, 10'h?, 3'b100, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_PACKU = { 7'b0100100, 10'h?, 3'b100, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_PACKH = { 7'b0000100, 10'h?, 3'b111, 5'h?, {OPCODE_OP} };
+
+ // ZBS
+ parameter logic [31:0] INSN_BCLRI = { 5'b01001, 12'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_BSETI = { 5'b00101, 12'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_BINVI = { 5'b01101, 12'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ // Only log2(XLEN) bits of the immediate are used. For RV32, this means only the bits in
+ // instr[24:20] are effectively used. Whenever instr[26] is set, bexti is instead decoded as fsri.
+ parameter logic [31:0] INSN_BEXTI = { 5'b01001, 1'b0, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+
+ parameter logic [31:0] INSN_BCLR = { 7'b0100100, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_BSET = { 7'b0010100, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_BINV = { 7'b0110100, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_BEXT = { 7'b0100100, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
+
+ // ZBP
+ // grevi
+ // Only log2(XLEN) bits of the immediate are used. For RV32, this means only the bits in
+ // instr[24:20] are effectively used. Whenever instr[26] is set, grevi is instead decoded as fsri.
+ parameter logic [31:0] INSN_GREVI = { 5'b01101, 1'b0, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ // grevi -- pseudo-instructions
+ parameter logic [31:0] INSN_REV_P =
+ { 5'b01101, 1'b0, 1'b?, 5'b00001, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_REV2_N =
+ { 5'b01101, 1'b0, 1'b?, 5'b00010, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_REV_N =
+ { 5'b01101, 1'b0, 1'b?, 5'b00011, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_REV4_B =
+ { 5'b01101, 1'b0, 1'b?, 5'b00100, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_REV2_B =
+ { 5'b01101, 1'b0, 1'b?, 5'b00110, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_REV_B =
+ { 5'b01101, 1'b0, 1'b?, 5'b00111, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_REV8_H =
+ { 5'b01101, 1'b0, 1'b?, 5'b01000, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_REV4_H =
+ { 5'b01101, 1'b0, 1'b?, 5'b01100, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_REV2_H =
+ { 5'b01101, 1'b0, 1'b?, 5'b01110, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_REV_H =
+ { 5'b01101, 1'b0, 1'b?, 5'b01111, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_REV16 =
+ { 5'b01101, 1'b0, 1'b?, 5'b10000, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_REV8 =
+ { 5'b01101, 1'b0, 1'b?, 5'b11000, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_REV4 =
+ { 5'b01101, 1'b0, 1'b?, 5'b11100, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_REV2 =
+ { 5'b01101, 1'b0, 1'b?, 5'b11110, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_REV =
+ { 5'b01101, 1'b0, 1'b?, 5'b11111, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ // gorci
+ // Only log2(XLEN) bits of the immediate are used. For RV32, this means only the bits in
+ // instr[24:20] are effectively used. Whenever instr[26] is set, gorci is instead decoded as fsri.
+ parameter logic [31:0] INSN_GORCI = { 5'b00101, 1'b0, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ // gorci -- pseudo-instructions
+ parameter logic [31:0] INSN_ORC_P =
+ { 5'b00101, 1'b0, 1'b?, 5'b00001, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ORC2_N =
+ { 5'b00101, 1'b0, 1'b?, 5'b00010, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ORC_N =
+ { 5'b00101, 1'b0, 1'b?, 5'b00011, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ORC4_B =
+ { 5'b00101, 1'b0, 1'b?, 5'b00100, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ORC2_B =
+ { 5'b00101, 1'b0, 1'b?, 5'b00110, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ORC_B =
+ { 5'b00101, 1'b0, 1'b?, 5'b00111, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ORC8_H =
+ { 5'b00101, 1'b0, 1'b?, 5'b01000, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ORC4_H =
+ { 5'b00101, 1'b0, 1'b?, 5'b01100, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ORC2_H =
+ { 5'b00101, 1'b0, 1'b?, 5'b01110, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ORC_H =
+ { 5'b00101, 1'b0, 1'b?, 5'b01111, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ORC16 =
+ { 5'b00101, 1'b0, 1'b?, 5'b10000, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ORC8 =
+ { 5'b00101, 1'b0, 1'b?, 5'b11000, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ORC4 =
+ { 5'b00101, 1'b0, 1'b?, 5'b11100, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ORC2 =
+ { 5'b00101, 1'b0, 1'b?, 5'b11110, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ORC =
+ { 5'b00101, 1'b0, 1'b?, 5'b11111, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ // shfli
+ parameter logic [31:0] INSN_SHFLI = { 6'b000010, 11'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ // shfli -- pseudo-instructions
+ parameter logic [31:0] INSN_ZIP_N =
+ { 6'b000010, 2'h?, 4'b0001, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ZIP2_B =
+ { 6'b000010, 2'h?, 4'b0010, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ZIP_B =
+ { 6'b000010, 2'h?, 4'b0011, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ZIP4_H =
+ { 6'b000010, 2'h?, 4'b0100, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ZIP2_H =
+ { 6'b000010, 2'h?, 4'b0110, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ZIP_H =
+ { 6'b000010, 2'h?, 4'b0111, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ZIP8 =
+ { 6'b000010, 2'h?, 4'b1000, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ZIP4 =
+ { 6'b000010, 2'h?, 4'b1100, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ZIP2 =
+ { 6'b000010, 2'h?, 4'b1110, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_ZIP =
+ { 6'b000010, 2'h?, 4'b1111, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ // unshfli
+ parameter logic [31:0] INSN_UNSHFLI = { 6'b000010, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ // unshfli -- pseudo-instructions
+ parameter logic [31:0] INSN_UNZIP_N =
+ { 6'b000010, 2'h?, 4'b0001, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_UNZIP2_B =
+ { 6'b000010, 2'h?, 4'b0010, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_UNZIP_B =
+ { 6'b000010, 2'h?, 4'b0011, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_UNZIP4_H =
+ { 6'b000010, 2'h?, 4'b0100, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_UNZIP2_H =
+ { 6'b000010, 2'h?, 4'b0110, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_UNZIP_H =
+ { 6'b000010, 2'h?, 4'b0111, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_UNZIP8 =
+ { 6'b000010, 2'h?, 4'b1000, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_UNZIP4 =
+ { 6'b000010, 2'h?, 4'b1100, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_UNZIP2 =
+ { 6'b000010, 2'h?, 4'b1110, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_UNZIP =
+ { 6'b000010, 2'h?, 4'b1111, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+
+ parameter logic [31:0] INSN_GREV = { 7'b0110100, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_GORC = { 7'b0010100, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_SHFL = { 7'b0000100, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_UNSHFL = { 7'b0000100, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
+
+ parameter logic [31:0] INSN_XPERM_N = { 7'b0010100, 10'h?, 3'b010, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_XPERM_B = { 7'b0010100, 10'h?, 3'b100, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_XPERM_H = { 7'b0010100, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };
+
+ parameter logic [31:0] INSN_SLO = { 7'b0010000, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_SRO = { 7'b0010000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_SLOI = { 5'b00100 , 12'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ // Only log2(XLEN) bits of the immediate are used. For RV32, this means only the bits in
+ // instr[24:20] are effectively used. Whenever instr[26] is set, sroi/rori is instead decoded as
+ // fsri.
+ parameter logic [31:0] INSN_SROI = { 5'b00100 , 1'b0, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+
+ // ZBE
+ parameter logic [31:0] INSN_BDECOMPRESS = {7'b0100100, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_BCOMPRESS = {7'b0000100, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };
+
+ // ZBT
+ parameter logic [31:0] INSN_FSRI = { 5'h?, 1'b1, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
+
+ parameter logic [31:0] INSN_CMIX = {5'h?, 2'b11, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_CMOV = {5'h?, 2'b11, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_FSL = {5'h?, 2'b10, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_FSR = {5'h?, 2'b10, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
+
+ // ZBF
+ parameter logic [31:0] INSN_BFP = {7'b0100100, 10'h?, 3'b111, 5'h?, {OPCODE_OP} };
+
+ // ZBC
+ parameter logic [31:0] INSN_CLMUL = {7'b0000101, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_CLMULR = {7'b0000101, 10'h?, 3'b010, 5'h?, {OPCODE_OP} };
+ parameter logic [31:0] INSN_CLMULH = {7'b0000101, 10'h?, 3'b011, 5'h?, {OPCODE_OP} };
+
+ // ZBR
+ parameter logic [31:0] INSN_CRC32_B =
+ {7'b0110000, 5'b10000, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_CRC32_H =
+ {7'b0110000, 5'b10001, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_CRC32_W =
+ {7'b0110000, 5'b10010, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_CRC32C_B =
+ {7'b0110000, 5'b11000, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_CRC32C_H =
+ {7'b0110000, 5'b11001, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+ parameter logic [31:0] INSN_CRC32C_W =
+ {7'b0110000, 5'b11010, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
+
+ // LOAD & STORE
+ parameter logic [31:0] INSN_LOAD = {25'h?, {OPCODE_LOAD } };
+ parameter logic [31:0] INSN_STORE = {25'h?, {OPCODE_STORE} };
+
+ // MISC-MEM
+ parameter logic [31:0] INSN_FENCE = { 17'h?, 3'b000, 5'h?, {OPCODE_MISC_MEM} };
+ parameter logic [31:0] INSN_FENCEI = { 17'h0, 3'b001, 5'h0, {OPCODE_MISC_MEM} };
+
+ // Compressed Instructions
+ // C0
+ parameter logic [15:0] INSN_CADDI4SPN = { 3'b000, 11'h?, {OPCODE_C0} };
+ parameter logic [15:0] INSN_CLW = { 3'b010, 11'h?, {OPCODE_C0} };
+ parameter logic [15:0] INSN_CSW = { 3'b110, 11'h?, {OPCODE_C0} };
+ parameter logic [15:0] INSN_CCLC = { 3'b011, 11'h?, {OPCODE_C0} };
+ parameter logic [15:0] INSN_CCSC = { 3'b111, 11'h?, {OPCODE_C0} };
+
+ // C1
+ parameter logic [15:0] INSN_CADDI = { 3'b000, 11'h?, {OPCODE_C1} };
+ parameter logic [15:0] INSN_CJAL = { 3'b001, 11'h?, {OPCODE_C1} };
+ parameter logic [15:0] INSN_CJ = { 3'b101, 11'h?, {OPCODE_C1} };
+ parameter logic [15:0] INSN_CLI = { 3'b010, 11'h?, {OPCODE_C1} };
+ parameter logic [15:0] INSN_CLUI = { 3'b011, 11'h?, {OPCODE_C1} };
+ parameter logic [15:0] INSN_CBEQZ = { 3'b110, 11'h?, {OPCODE_C1} };
+ parameter logic [15:0] INSN_CBNEZ = { 3'b111, 11'h?, {OPCODE_C1} };
+ parameter logic [15:0] INSN_CSRLI = { 3'b100, 1'h?, 2'b00, 8'h?, {OPCODE_C1} };
+ parameter logic [15:0] INSN_CSRAI = { 3'b100, 1'h?, 2'b01, 8'h?, {OPCODE_C1} };
+ parameter logic [15:0] INSN_CANDI = { 3'b100, 1'h?, 2'b10, 8'h?, {OPCODE_C1} };
+ parameter logic [15:0] INSN_CSUB = { 3'b100, 1'b0, 2'b11, 3'h?, 2'b00, 3'h?, {OPCODE_C1} };
+ parameter logic [15:0] INSN_CXOR = { 3'b100, 1'b0, 2'b11, 3'h?, 2'b01, 3'h?, {OPCODE_C1} };
+ parameter logic [15:0] INSN_COR = { 3'b100, 1'b0, 2'b11, 3'h?, 2'b10, 3'h?, {OPCODE_C1} };
+ parameter logic [15:0] INSN_CAND = { 3'b100, 1'b0, 2'b11, 3'h?, 2'b11, 3'h?, {OPCODE_C1} };
+
+ // C2
+ parameter logic [15:0] INSN_CSLLI = { 3'b000, 11'h?, {OPCODE_C2} };
+ parameter logic [15:0] INSN_CLWSP = { 3'b010, 11'h?, {OPCODE_C2} };
+ parameter logic [15:0] INSN_SWSP = { 3'b110, 11'h?, {OPCODE_C2} };
+ parameter logic [15:0] INSN_CMV = { 3'b100, 1'b0, 10'h?, {OPCODE_C2} };
+ parameter logic [15:0] INSN_CADD = { 3'b100, 1'b1, 10'h?, {OPCODE_C2} };
+ parameter logic [15:0] INSN_CEBREAK = { 3'b100, 1'b1, 5'h0, 5'h0, {OPCODE_C2} };
+ parameter logic [15:0] INSN_CJR = { 3'b100, 1'b0, 5'h0, 5'h0, {OPCODE_C2} };
+ parameter logic [15:0] INSN_CJALR = { 3'b100, 1'b1, 5'h?, 5'h0, {OPCODE_C2} };
+ parameter logic [15:0] INSN_CCLCSP = { 3'b011, 11'h?, {OPCODE_C2} }; // FLWSP
+ parameter logic [15:0] INSN_CCSCSP = { 3'b111, 11'h?, {OPCODE_C2} }; // FSWSP
+
+ // 32-bit CHERI instructions
+ parameter logic [31:0] INSN_CHGETPERM = {7'h7f, 5'h0, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHGETTYPE = {7'h7f, 5'h1, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHGETBASE = {7'h7f, 5'h2, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHGETHIGH = {7'h7f, 5'h17, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHGETTOP = {7'h7f, 5'h18, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHGETLEN = {7'h7f, 5'h3, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHGETTAG = {7'h7f, 5'h4, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHGETSEALED = {7'h7f, 5'h5, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHGETADDR = {7'h7f, 5'hf, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+
+ parameter logic [31:0] INSN_CHSEAL = {7'h0b, 10'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHUNSEAL = {7'h0c, 10'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHANDPERM = {7'h0d, 10'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHSETADDR = {7'h10, 10'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHINCADDR = {7'h11, 10'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHINCADDRIMM = {12'h?, 5'h?, 3'b001, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHSETBOUNDS = {7'h08, 10'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHSETBOUNDSEX = {7'h09, 10'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHSETBOUNDSRNDN = {7'h0a, 10'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHSETBOUNDSIMM = {12'h?, 5'h?, 3'b010, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHCLEARTAG = {7'h7f, 5'hb, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHCRRL = {7'h7f, 5'h8, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHCRAM = {7'h7f, 5'h9, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+
+ parameter logic [31:0] INSN_CHSUB = {7'h14, 5'h?, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHMOVE = {7'h7f, 5'ha, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHTESTSUB = {7'h20, 5'h?, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHSETEQUAL = {7'h21, 5'h?, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_CHSETHIGH = {7'h16, 5'h?, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+
+ parameter logic [31:0] INSN_CHJALR = {7'h7f, 5'hc, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+
+ parameter logic [31:0] INSN_CHCSRRW = {7'h01, 5'h?, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} };
+ parameter logic [31:0] INSN_AUICGP = { 25'h?, {OPCODE_AUICGP} };
+
+endpackage
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_wb_stage.sv b/hw/ip/cheriot-ibex/rtl/cheriot_wb_stage.sv
new file mode 100644
index 0000000..8ff5461
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriot_wb_stage.sv
@@ -0,0 +1,269 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Writeback Stage
+ *
+ * Writeback is an optional third pipeline stage. It writes data back to the register file that was
+ * produced in the ID/EX stage or awaits a response to a load/store (LSU writes direct to register
+ * file for load data). If the writeback stage is not present (WritebackStage == 0) this acts as
+ * a simple passthrough to write data direct to the register file.
+ */
+
+`include "prim_assert.sv"
+`include "dv_fcov_macros.svh"
+
+
+module cheriot_wb_stage import cheri_pkg::*; #(
+ parameter bit ResetAll = 1'b0,
+ parameter bit WritebackStage = 1'b0
+) (
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic en_wb_i,
+ input cheriot_pkg::wb_instr_type_e instr_type_wb_i,
+ input logic [31:0] pc_id_i,
+ input logic instr_is_compressed_id_i,
+ input logic instr_perf_count_id_i,
+ input logic instr_is_cheri_i,
+ input logic cheri_load_i,
+ input logic cheri_store_i,
+
+ output logic ready_wb_o,
+ output logic rf_write_wb_o,
+ output logic outstanding_load_wb_o,
+ output logic outstanding_store_wb_o,
+ output logic [31:0] pc_wb_o,
+ output logic perf_instr_ret_wb_o,
+ output logic perf_instr_ret_compressed_wb_o,
+ output logic perf_instr_ret_wb_spec_o,
+ output logic perf_instr_ret_compressed_wb_spec_o,
+
+ input logic [4:0] rf_waddr_id_i,
+ input logic [31:0] rf_wdata_id_i,
+ input logic rf_we_id_i,
+
+ input logic cheri_rf_we_i,
+ input logic [31:0] cheri_rf_wdata_i,
+ input reg_cap_t cheri_rf_wcap_i,
+
+ input logic [31:0] rf_wdata_lsu_i,
+ input reg_cap_t rf_wcap_lsu_i,
+ input logic rf_we_lsu_i,
+
+ output logic [31:0] rf_wdata_fwd_wb_o,
+ output reg_cap_t rf_wcap_fwd_wb_o,
+
+ output logic [4:0] rf_waddr_wb_o,
+ output logic [31:0] rf_wdata_wb_o,
+ output reg_cap_t rf_wcap_wb_o,
+ output logic rf_we_wb_o,
+
+ input logic lsu_resp_valid_i,
+ input logic lsu_resp_err_i,
+
+ output logic instr_done_wb_o
+);
+
+ import cheriot_pkg::*;
+
+ // 0 == RF write from ID
+ // 1 == RF write from LSU
+ logic [31:0] rf_wdata_wb_mux [2];
+ logic [1:0] rf_wdata_wb_mux_we;
+
+ reg_cap_t rf_wcap_wb;
+
+ if (WritebackStage) begin : g_writeback_stage
+ logic [31:0] rf_wdata_wb_q;
+ logic rf_we_wb_q;
+ logic [4:0] rf_waddr_wb_q;
+
+ logic wb_done;
+
+ logic wb_valid_q;
+ logic [31:0] wb_pc_q;
+ logic wb_compressed_q;
+ logic wb_count_q;
+ wb_instr_type_e wb_instr_type_q;
+
+ logic wb_valid_d;
+
+ logic wb_is_cheri_q;
+ logic wb_cheri_load_q, wb_cheri_store_q;
+ logic cheri_rf_we_q;
+ logic [31:0] cheri_rf_wdata_q;
+ reg_cap_t cheri_rf_wcap_q;
+
+ // Stage becomes valid if an instruction enters for ID/EX and valid is cleared when instruction
+ // is done
+ assign wb_valid_d = (en_wb_i & ready_wb_o) | (wb_valid_q & ~wb_done);
+
+ // Writeback for non load/store instructions always completes in a cycle (so instantly done)
+ // Writeback for load/store must wait for response to be received by the LSU
+ // Signal only relevant if wb_valid_q set
+
+ // note cheri_load/store doesn't just come from the decoder, but includes bound/permission check results
+ assign wb_done = (wb_instr_type_q == WB_INSTR_OTHER && ~wb_cheri_load_q && ~wb_cheri_store_q) | lsu_resp_valid_i;
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ wb_valid_q <= 1'b0;
+ end else begin
+ wb_valid_q <= wb_valid_d;
+ end
+ end
+
+ if (ResetAll) begin : g_wb_regs_ra
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ rf_we_wb_q <= '0;
+ rf_waddr_wb_q <= '0;
+ rf_wdata_wb_q <= '0;
+ wb_instr_type_q <= wb_instr_type_e'(0);
+ wb_pc_q <= '0;
+ wb_compressed_q <= '0;
+ wb_count_q <= '0;
+
+ wb_is_cheri_q <= 1'b0;
+ wb_cheri_load_q <= 1'b0;
+ wb_cheri_store_q <= 1'b0;
+ cheri_rf_we_q <= 1'b0;
+ cheri_rf_wdata_q <= 32'h0;
+ cheri_rf_wcap_q <= NULL_REG_CAP;
+ end else if (en_wb_i) begin
+ rf_we_wb_q <= rf_we_id_i;
+ rf_waddr_wb_q <= rf_waddr_id_i;
+ rf_wdata_wb_q <= rf_wdata_id_i;
+ wb_instr_type_q <= instr_type_wb_i;
+ wb_pc_q <= pc_id_i;
+ wb_compressed_q <= instr_is_compressed_id_i;
+ wb_count_q <= instr_perf_count_id_i;
+
+ wb_is_cheri_q <= instr_is_cheri_i;
+ wb_cheri_load_q <= cheri_load_i;
+ wb_cheri_store_q <= cheri_store_i;
+ cheri_rf_we_q <= cheri_rf_we_i;
+ cheri_rf_wdata_q <= cheri_rf_wdata_i;
+ cheri_rf_wcap_q <= cheri_rf_wcap_i;
+ end
+ end
+ end else begin : g_wb_regs_nr
+ always_ff @(posedge clk_i) begin
+ if (en_wb_i) begin
+ rf_we_wb_q <= rf_we_id_i;
+ rf_waddr_wb_q <= rf_waddr_id_i;
+ rf_wdata_wb_q <= rf_wdata_id_i;
+ wb_instr_type_q <= instr_type_wb_i;
+ wb_pc_q <= pc_id_i;
+ wb_compressed_q <= instr_is_compressed_id_i;
+ wb_count_q <= instr_perf_count_id_i;
+
+ wb_is_cheri_q <= instr_is_cheri_i;
+ wb_cheri_load_q <= cheri_load_i;
+ wb_cheri_store_q <= cheri_store_i;
+ cheri_rf_we_q <= cheri_rf_we_i;
+ cheri_rf_wdata_q <= cheri_rf_wdata_i;
+ cheri_rf_wcap_q <= cheri_rf_wcap_i;
+ end
+ end
+ end
+
+ assign rf_waddr_wb_o = rf_waddr_wb_q;
+ assign rf_wdata_wb_mux[0] = wb_is_cheri_q ? cheri_rf_wdata_q : rf_wdata_wb_q;
+ assign rf_wdata_wb_mux_we[0] = (wb_is_cheri_q ? cheri_rf_we_q : rf_we_wb_q) & wb_valid_q;
+
+ assign ready_wb_o = ~wb_valid_q | wb_done;
+
+ // This is used for determining RF read hazards & forwarding in ID/EX
+ // Instruction in writeback will be writing to register file if either rf_we is set or writeback
+ // is awaiting load data.
+ assign rf_write_wb_o = wb_valid_q & (rf_we_wb_q | cheri_rf_we_q | (wb_instr_type_q == WB_INSTR_LOAD) | wb_cheri_load_q);
+
+ assign outstanding_load_wb_o = wb_valid_q & ((wb_instr_type_q == WB_INSTR_LOAD) | wb_cheri_load_q);
+ assign outstanding_store_wb_o = wb_valid_q & ((wb_instr_type_q == WB_INSTR_STORE) | wb_cheri_store_q);
+
+ assign pc_wb_o = wb_pc_q;
+
+ assign instr_done_wb_o = wb_valid_q & wb_done;
+
+ // Increment instruction retire counters for valid instructions which are not lsu errors.
+ // Speculative versions of the signals do not factor in exceptions and whether the instruction
+ // is done yet. These are used to get correct values for instructions reading the relevant
+ // performance counters in the ID stage.
+ assign perf_instr_ret_wb_spec_o = wb_count_q;
+ assign perf_instr_ret_compressed_wb_spec_o = perf_instr_ret_wb_spec_o & wb_compressed_q;
+ assign perf_instr_ret_wb_o = instr_done_wb_o & wb_count_q &
+ ~(lsu_resp_valid_i & lsu_resp_err_i);
+ assign perf_instr_ret_compressed_wb_o = perf_instr_ret_wb_o & wb_compressed_q;
+
+ // Forward data that will be written to the RF back to ID to resolve data hazards. The flopped
+ // rf_wdata_wb_q is used rather than rf_wdata_wb_o as the latter includes read data from memory
+ // that returns too late to be used on the forwarding path.
+ assign rf_wdata_fwd_wb_o = wb_is_cheri_q ? cheri_rf_wdata_q : rf_wdata_wb_q;
+ assign rf_wcap_fwd_wb_o = wb_is_cheri_q ? cheri_rf_wcap_q : NULL_REG_CAP;
+ assign rf_wcap_wb = (wb_is_cheri_q && (~wb_cheri_load_q)) ? cheri_rf_wcap_q : NULL_REG_CAP;
+
+ end else begin : g_bypass_wb
+ // without writeback stage just pass through register write signals
+ assign rf_waddr_wb_o = rf_waddr_id_i;
+ assign rf_wdata_wb_mux[0] = instr_is_cheri_i ? cheri_rf_wdata_i : rf_wdata_id_i;
+ assign rf_wdata_wb_mux_we[0] = instr_is_cheri_i ? cheri_rf_we_i : rf_we_id_i;
+ assign rf_wcap_wb = (instr_is_cheri_i && (~cheri_load_i)) ? cheri_rf_wcap_i : NULL_REG_CAP;
+
+ // Increment instruction retire counters for valid instructions which are not lsu errors.
+ // The speculative signals are always 0 when no writeback stage is present as the raw counter
+ // values will be correct.
+ assign perf_instr_ret_wb_spec_o = 1'b0;
+ assign perf_instr_ret_compressed_wb_spec_o = 1'b0;
+ assign perf_instr_ret_wb_o = instr_perf_count_id_i & en_wb_i &
+ ~(lsu_resp_valid_i & lsu_resp_err_i);
+ assign perf_instr_ret_compressed_wb_o = perf_instr_ret_wb_o & instr_is_compressed_id_i;
+
+ // ready needs to be constant 1 without writeback stage (otherwise ID/EX stage will stall)
+ assign ready_wb_o = 1'b1;
+
+ // Unused Writeback stage only IO & wiring
+ // Assign inputs and internal wiring to unused signals to satisfy lint checks
+ // Tie-off outputs to constant values
+ logic unused_clk;
+ logic unused_rst;
+ wb_instr_type_e unused_instr_type_wb;
+ logic [31:0] unused_pc_id;
+
+ assign unused_clk = clk_i;
+ assign unused_rst = rst_ni;
+ assign unused_instr_type_wb = instr_type_wb_i;
+ assign unused_pc_id = pc_id_i;
+
+ assign outstanding_load_wb_o = 1'b0;
+ assign outstanding_store_wb_o = 1'b0;
+ assign pc_wb_o = '0;
+ assign rf_write_wb_o = 1'b0;
+ assign rf_wdata_fwd_wb_o = 32'b0;
+ assign rf_wcap_fwd_wb_o = NULL_REG_CAP;
+ assign instr_done_wb_o = 1'b0;
+ end
+
+ assign rf_wdata_wb_mux[1] = rf_wdata_lsu_i;
+ assign rf_wdata_wb_mux_we[1] = rf_we_lsu_i;
+
+ // RF write data can come from ID results (all RF writes that aren't because of loads will come
+ // from here) or the LSU (RF writes for load data)
+ assign rf_wdata_wb_o = ({32{rf_wdata_wb_mux_we[0]}} & rf_wdata_wb_mux[0]) |
+ ({32{rf_wdata_wb_mux_we[1]}} & rf_wdata_wb_mux[1]);
+ assign rf_we_wb_o = |rf_wdata_wb_mux_we;
+
+ assign rf_wcap_wb_o = rf_wdata_wb_mux_we[0] ? rf_wcap_wb :
+ (rf_wdata_wb_mux_we[1] ? rf_wcap_lsu_i : NULL_REG_CAP);
+
+ `DV_FCOV_SIGNAL_GEN_IF(logic, wb_valid, g_writeback_stage.wb_valid_q, WritebackStage)
+
+ `ASSERT(RFWriteFromOneSourceOnly, $onehot0(rf_wdata_wb_mux_we))
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriotc_top.sv b/hw/ip/cheriot-ibex/rtl/cheriotc_top.sv
new file mode 100644
index 0000000..5d949bf
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriotc_top.sv
@@ -0,0 +1,465 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+`ifdef RISCV_FORMAL
+ `define RVFI
+`endif
+
+`include "prim_assert.sv"
+
+
+/**
+ * Top level module of the ibex RISC-V core
+ */
+module cheriot_top import cheriot_pkg::*; import cheri_pkg::*; #(
+ parameter int unsigned DmHaltAddr = 32'h1A110800,
+ parameter int unsigned DmExceptionAddr = 32'h1A110808,
+ parameter bit DbgTriggerEn = 1'b1,
+ parameter int unsigned DbgHwBreakNum = 2,
+ parameter int unsigned MHPMCounterNum = 0,
+ parameter int unsigned MHPMCounterWidth = 40,
+ parameter bit RV32E = 1'b0,
+ parameter rv32b_e RV32B = RV32BNone,
+ parameter rv32m_e RV32M = RV32MFast,
+ parameter bit WritebackStage = 1'b1,
+ parameter bit BranchPredictor = 1'b0,
+ parameter bit SecureIbex = 1'b0, // placeholder for TB compatbility
+ parameter bit CHERIoTEn = 1'b1,
+ parameter int unsigned DataWidth = 33,
+ parameter int unsigned HeapBase = 32'h2001_0000,
+ parameter int unsigned TSMapBase = 32'h2002_f000, // 4kB default
+ parameter int unsigned TSMapSize = 1024,
+ parameter bit MemCapFmt = 1'b0,
+ parameter bit CheriPPLBC = 1'b1,
+ parameter bit CheriSBND2 = 1'b0,
+ parameter bit CheriTBRE = 1'b1,
+ parameter bit CheriStkZ = 1'b1,
+ parameter int unsigned MMRegDinW = 128,
+ parameter int unsigned MMRegDoutW = 64,
+ parameter bit CheriCapIT8 = 1'b0
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic test_en_i, // enable all clock gates for testing
+ input prim_ram_1p_pkg::ram_1p_cfg_t ram_cfg_i,
+
+ input logic cheri_pmode_i,
+ input logic cheri_tsafe_en_i,
+ input logic [31:0] hart_id_i,
+ input logic [31:0] boot_addr_i,
+
+ // Instruction memory interface
+ output logic instr_req_o,
+ input logic instr_gnt_i,
+ input logic instr_rvalid_i,
+ output logic [31:0] instr_addr_o,
+ input logic [31:0] instr_rdata_i,
+ input logic [6:0] instr_rdata_intg_i,
+ input logic instr_err_i,
+
+ // Data memory interface
+ output logic data_req_o,
+ output logic data_is_cap_o,
+ input logic data_gnt_i,
+ input logic data_rvalid_i,
+ output logic data_we_o,
+ output logic [3:0] data_be_o,
+ output logic [31:0] data_addr_o,
+ output logic [32:0] data_wdata_o,
+ output logic [6:0] data_wdata_intg_o,
+ input logic [32:0] data_rdata_i,
+ input logic [6:0] data_rdata_intg_i,
+ input logic data_err_i,
+
+ // TS map memory interface
+ output logic tsmap_cs_o,
+ output logic [15:0] tsmap_addr_o,
+ input logic [31:0] tsmap_rdata_i,
+ input logic [MMRegDinW-1:0] mmreg_corein_i,
+ output logic [MMRegDoutW-1:0] mmreg_coreout_o,
+ output logic cheri_fatal_err_o,
+
+ // Interrupt inputs
+ input logic irq_software_i,
+ input logic irq_timer_i,
+ input logic irq_external_i,
+ input logic [14:0] irq_fast_i,
+ input logic irq_nm_i, // non-maskeable interrupt
+
+ // Scrambling Interface
+ input logic scramble_key_valid_i,
+ input logic [SCRAMBLE_KEY_W-1:0] scramble_key_i,
+ input logic [SCRAMBLE_NONCE_W-1:0] scramble_nonce_i,
+ output logic scramble_req_o,
+
+ // Debug Interface
+ input logic debug_req_i,
+ output crash_dump_t crash_dump_o,
+ output logic double_fault_seen_o,
+
+ // RISC-V Formal Interface
+ // Does not comply with the coding standards of _i/_o suffixes, but follows
+ // the convention of RISC-V Formal Interface Specification.
+`ifdef RVFI
+ output logic rvfi_valid,
+ output logic [63:0] rvfi_order,
+ output logic [31:0] rvfi_insn,
+ output logic rvfi_trap,
+ output logic rvfi_halt,
+ output logic rvfi_intr,
+ output logic [ 1:0] rvfi_mode,
+ output logic [ 1:0] rvfi_ixl,
+ output logic [ 4:0] rvfi_rs1_addr,
+ output logic [ 4:0] rvfi_rs2_addr,
+ output logic [ 4:0] rvfi_rs3_addr,
+ output logic [31:0] rvfi_rs1_rdata,
+ output reg_cap_t rvfi_rs1_rcap,
+ output logic [31:0] rvfi_rs2_rdata,
+ output reg_cap_t rvfi_rs2_rcap,
+ output logic [31:0] rvfi_rs3_rdata,
+ output logic [ 4:0] rvfi_rd_addr,
+ output logic [31:0] rvfi_rd_wdata,
+ output reg_cap_t rvfi_rd_wcap,
+ output logic [31:0] rvfi_pc_rdata,
+ output logic [31:0] rvfi_pc_wdata,
+ output logic [31:0] rvfi_mem_addr,
+ output logic [ 3:0] rvfi_mem_rmask,
+ output logic [ 3:0] rvfi_mem_wmask,
+ output logic [32:0] rvfi_mem_rdata,
+ output logic [32:0] rvfi_mem_wdata,
+ output logic rvfi_mem_is_cap,
+ output reg_cap_t rvfi_mem_rcap,
+ output reg_cap_t rvfi_mem_wcap,
+ output logic [31:0] rvfi_ext_mip,
+ output logic rvfi_ext_nmi,
+ output logic rvfi_ext_debug_req,
+ output logic [63:0] rvfi_ext_mcycle,
+`endif
+
+ // CPU Control Signals
+ input fetch_enable_t fetch_enable_i,
+ output logic core_sleep_o,
+ output logic alert_minor_o,
+ output logic alert_major_internal_o,
+ output logic alert_major_bus_o,
+
+
+ // DFT bypass controls
+ input logic scan_rst_ni
+);
+
+ localparam bit ResetAll = 1'b1;
+ localparam int unsigned RegFileDataWidth = 32;
+
+ // Clock signals
+ logic clk;
+ logic core_busy_d, core_busy_q;
+ logic clock_en;
+ logic irq_pending;
+ // Core <-> Register file signals
+ logic [4:0] rf_raddr_a;
+ logic [4:0] rf_raddr_b;
+ logic [4:0] rf_waddr_wb;
+ logic rf_we_wb;
+ logic [RegFileDataWidth-1:0] rf_wdata_wb_ecc;
+ logic [RegFileDataWidth-1:0] rf_rdata_a_ecc, rf_rdata_a_ecc_buf;
+ logic [RegFileDataWidth-1:0] rf_rdata_b_ecc, rf_rdata_b_ecc_buf;
+ reg_cap_t rf_rcap_a, rf_rcap_b;
+ reg_cap_t rf_wcap;
+
+ logic [31:0] rf_reg_rdy;
+ logic [4:0] rf_trvk_addr;
+ logic rf_trvk_en;
+ logic rf_trvk_clrtag;
+ logic [4:0] rf_trsv_addr;
+ logic rf_trsv_en;
+
+ fetch_enable_t fetch_enable_buf;
+
+ /////////////////////
+ // Main clock gate //
+ /////////////////////
+
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ core_busy_q <= 1'b0;
+ end else begin
+ core_busy_q <= core_busy_d;
+ end
+ end
+
+ assign clock_en = core_busy_q | debug_req_i | irq_pending | irq_nm_i;
+ assign core_sleep_o = ~clock_en;
+
+ // let's not worry about clock gating for now. kliu
+ assign clk = clk_i;
+
+// prim_clock_gating core_clock_gate_i (
+// .clk_i (clk_i),
+// .en_i (clock_en),
+// .test_en_i(test_en_i),
+// .clk_o (clk)
+// );
+
+ ////////////////////////
+ // Core instantiation //
+ ////////////////////////
+
+`ifdef FPGA
+ // Buffer security critical signals to prevent synthesis optimisation removing them
+ prim_buf #(.Width($bits(fetch_enable_t))) u_fetch_enable_buf (
+ .in_i (fetch_enable_i),
+ .out_o(fetch_enable_buf)
+ );
+
+ prim_buf #(.Width(RegFileDataWidth)) u_rf_rdata_a_ecc_buf (
+ .in_i (rf_rdata_a_ecc),
+ .out_o(rf_rdata_a_ecc_buf)
+ );
+
+ prim_buf #(.Width(RegFileDataWidth)) u_rf_rdata_b_ecc_buf (
+ .in_i (rf_rdata_b_ecc),
+ .out_o(rf_rdata_b_ecc_buf)
+ );
+`else
+ assign fetch_enable_buf = fetch_enable_i;
+ assign rf_rdata_a_ecc_buf = rf_rdata_a_ecc;
+ assign rf_rdata_b_ecc_buf = rf_rdata_b_ecc;
+`endif
+
+ cheriot_core #(
+ .PMPEnable (1'b0),
+ .PMPGranularity (0),
+ .PMPNumRegions (4),
+ .MHPMCounterNum (MHPMCounterNum ),
+ .MHPMCounterWidth (MHPMCounterWidth),
+ .RV32E (RV32E),
+ .RV32M (RV32M),
+ .RV32B (RV32BNone),
+ .BranchTargetALU (1'b1),
+ .ICache (1'b0),
+ .ICacheECC (1'b0),
+ .BusSizeECC (BUS_SIZE),
+ .TagSizeECC (IC_TAG_SIZE),
+ .LineSizeECC (IC_LINE_SIZE),
+ .BranchPredictor (BranchPredictor),
+ .DbgTriggerEn (DbgTriggerEn),
+ .DbgHwBreakNum (DbgHwBreakNum),
+ .WritebackStage (WritebackStage),
+ .ResetAll (ResetAll),
+ .RndCnstLfsrSeed (RndCnstLfsrSeedDefault),
+ .RndCnstLfsrPerm (RndCnstLfsrPermDefault),
+ .SecureIbex (1'b0),
+ .DummyInstructions(1'b0),
+ .RegFileECC (1'b0),
+ .RegFileDataWidth (RegFileDataWidth),
+ .DmHaltAddr (DmHaltAddr),
+ .DmExceptionAddr (DmExceptionAddr),
+ .CHERIoTEn (CHERIoTEn),
+ .DataWidth (DataWidth),
+ .HeapBase (HeapBase),
+ .TSMapBase (TSMapBase),
+ .TSMapSize (TSMapSize),
+ .MemCapFmt (MemCapFmt),
+ .CheriPPLBC (CheriPPLBC),
+ .CheriSBND2 (CheriSBND2),
+ .CheriTBRE (CheriTBRE),
+ .CheriStkZ (CheriStkZ),
+ .MMRegDinW (MMRegDinW),
+ .MMRegDoutW (MMRegDoutW),
+ .CheriCapIT8 (CheriCapIT8)
+ ) u_cheriot_core (
+ .clk_i (clk),
+ .rst_ni (rst_ni),
+
+ .cheri_pmode_i (cheri_pmode_i),
+ .cheri_tsafe_en_i (cheri_tsafe_en_i),
+ .hart_id_i (hart_id_i ) ,
+ .boot_addr_i (boot_addr_i ) ,
+
+ .instr_req_o (instr_req_o ),
+ .instr_gnt_i (instr_gnt_i ),
+ .instr_rvalid_i (instr_rvalid_i),
+ .instr_addr_o (instr_addr_o ),
+ .instr_rdata_i (instr_rdata_i ),
+ .instr_err_i (instr_err_i ),
+
+ .data_req_o (data_req_o ),
+ .data_is_cap_o (data_is_cap_o ),
+ .data_gnt_i (data_gnt_i ),
+ .data_rvalid_i (data_rvalid_i ),
+ .data_we_o (data_we_o ),
+ .data_be_o (data_be_o ),
+ .data_addr_o (data_addr_o ),
+ .data_wdata_o (data_wdata_o ),
+ .data_rdata_i (data_rdata_i ),
+ .data_err_i (data_err_i ),
+
+ .dummy_instr_id_o (),
+ .rf_raddr_a_o (rf_raddr_a),
+ .rf_raddr_b_o (rf_raddr_b),
+ .rf_waddr_wb_o (rf_waddr_wb),
+ .rf_we_wb_o (rf_we_wb),
+ .rf_wdata_wb_ecc_o(rf_wdata_wb_ecc),
+ .rf_rdata_a_ecc_i (rf_rdata_a_ecc_buf),
+ .rf_rdata_b_ecc_i (rf_rdata_b_ecc_buf),
+ .rf_wcap_wb_o (rf_wcap),
+ .rf_rcap_a_i (rf_rcap_a),
+ .rf_rcap_b_i (rf_rcap_b),
+ .rf_reg_rdy_i (rf_reg_rdy),
+ .rf_trsv_en_o (rf_trsv_en),
+ .rf_trsv_addr_o (rf_trsv_addr),
+ .rf_trvk_addr_o (rf_trvk_addr),
+ .rf_trvk_en_o (rf_trvk_en ),
+ .rf_trvk_clrtag_o (rf_trvk_clrtag),
+ .rf_trvk_par_o (),
+ .rf_trsv_par_o (),
+ .tsmap_cs_o (tsmap_cs_o ),
+ .tsmap_addr_o (tsmap_addr_o ),
+ .tsmap_rdata_i (tsmap_rdata_i),
+ .mmreg_corein_i (mmreg_corein_i),
+ .mmreg_coreout_o (mmreg_coreout_o),
+ .cheri_fatal_err_o(cheri_fatal_err_o),
+
+ .irq_software_i (irq_software_i),
+ .irq_timer_i (irq_timer_i ),
+ .irq_external_i (irq_external_i),
+ .irq_fast_i (irq_fast_i ),
+ .irq_nm_i (irq_nm_i ),
+ .irq_pending_o(irq_pending),
+
+ .debug_req_i,
+ .crash_dump_o,
+ .double_fault_seen_o,
+
+`ifdef RVFI
+ .rvfi_valid,
+ .rvfi_order,
+ .rvfi_insn,
+ .rvfi_trap,
+ .rvfi_halt,
+ .rvfi_intr,
+ .rvfi_mode,
+ .rvfi_ixl,
+ .rvfi_rs1_addr,
+ .rvfi_rs2_addr,
+ .rvfi_rs3_addr,
+ .rvfi_rs1_rdata,
+ .rvfi_rs1_rcap,
+ .rvfi_rs2_rdata,
+ .rvfi_rs2_rcap,
+ .rvfi_rs3_rdata,
+ .rvfi_rd_addr,
+ .rvfi_rd_wdata,
+ .rvfi_rd_wcap,
+ .rvfi_pc_rdata,
+ .rvfi_pc_wdata,
+ .rvfi_mem_addr,
+ .rvfi_mem_rmask,
+ .rvfi_mem_wmask,
+ .rvfi_mem_rdata,
+ .rvfi_mem_wdata,
+ .rvfi_mem_is_cap,
+ .rvfi_mem_rcap,
+ .rvfi_mem_wcap,
+ .rvfi_ext_mip,
+ .rvfi_ext_nmi,
+ .rvfi_ext_debug_req,
+ .rvfi_ext_mcycle,
+`endif
+
+ .fetch_enable_i(fetch_enable_buf),
+ .alert_minor_o(alert_minor_o),
+ .alert_major_o(alert_major_internal_o),
+ .icache_inval_o(),
+ .core_busy_o (core_busy_d),
+ .ic_scr_key_valid_i (1'b0),
+ .ic_data_rdata_i (),
+ .ic_data_wdata_o (),
+ .ic_data_addr_o (),
+ .ic_data_write_o (),
+ .ic_data_req_o (),
+ .ic_tag_rdata_i (),
+ .ic_tag_wdata_o (),
+ .ic_tag_addr_o (),
+ .ic_tag_write_o (),
+ .ic_tag_req_o ()
+ );
+
+ assign data_wdata_intg_o = 7'h0;
+ assign alert_major_bus_o = 1'b0;
+
+ /////////////////////////////////
+ // Register file Instantiation //
+ /////////////////////////////////
+ if (RV32E) begin
+ cheri_regfile #(
+ .NREGS(16),
+ .NCAPS(16),
+ .CheriPPLBC(CheriPPLBC)
+ ) register_file_i (
+ .clk_i (clk),
+ .rst_ni (rst_ni),
+ .raddr_a_i (rf_raddr_a),
+ .rdata_a_o (rf_rdata_a_ecc),
+ .rcap_a_o (rf_rcap_a),
+ .raddr_b_i (rf_raddr_b),
+ .rdata_b_o (rf_rdata_b_ecc),
+ .rcap_b_o (rf_rcap_b),
+ .waddr_a_i (rf_waddr_wb),
+ .wdata_a_i (rf_wdata_wb_ecc),
+ .wcap_a_i (rf_wcap),
+ .we_a_i (rf_we_wb),
+ .reg_rdy_o (rf_reg_rdy),
+ .trvk_addr_i (rf_trvk_addr),
+ .trvk_en_i (rf_trvk_en),
+ .trvk_clrtag_i (rf_trvk_clrtag),
+ .trsv_addr_i (rf_trsv_addr),
+ .trsv_en_i (rf_trsv_en),
+ .trsv_par_i (7'h0),
+ .trvk_par_i (7'h0),
+ .par_rst_ni (1'b0),
+ .alert_o ()
+ );
+ end else begin
+ cheri_regfile #(
+ .NREGS(32),
+ .NCAPS(16),
+ .CheriPPLBC(CheriPPLBC)
+ ) register_file_i (
+ .clk_i (clk),
+ .rst_ni (rst_ni),
+ .raddr_a_i (rf_raddr_a),
+ .rdata_a_o (rf_rdata_a_ecc),
+ .rcap_a_o (rf_rcap_a),
+ .raddr_b_i (rf_raddr_b),
+ .rdata_b_o (rf_rdata_b_ecc),
+ .rcap_b_o (rf_rcap_b),
+ .waddr_a_i (rf_waddr_wb),
+ .wdata_a_i (rf_wdata_wb_ecc),
+ .wcap_a_i (rf_wcap),
+ .we_a_i (rf_we_wb),
+ .reg_rdy_o (rf_reg_rdy),
+ .trvk_addr_i (rf_trvk_addr),
+ .trvk_en_i (rf_trvk_en),
+ .trvk_clrtag_i (rf_trvk_clrtag),
+ .trsv_addr_i (rf_trsv_addr),
+ .trsv_en_i (rf_trsv_en),
+ .trsv_par_i (7'h0),
+ .trvk_par_i (7'h0),
+ .par_rst_ni (1'b0),
+ .alert_o ()
+ );
+ end
+
+ assign scramble_req_o = 0;
+
+endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriotc_top_tracing.sv b/hw/ip/cheriot-ibex/rtl/cheriotc_top_tracing.sv
new file mode 100644
index 0000000..5840322
--- /dev/null
+++ b/hw/ip/cheriot-ibex/rtl/cheriotc_top_tracing.sv
@@ -0,0 +1,316 @@
+// Copyright Microsoft Corporation
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Top level module of the ibex RISC-V core with tracing enabled
+ */
+
+module cheriot_top_tracing import cheriot_pkg::*; import cheri_pkg::*; #(
+ parameter int unsigned DmHaltAddr = 32'h1A110800,
+ parameter int unsigned DmExceptionAddr = 32'h1A110808,
+ parameter bit RV32E = 1'b0,
+ parameter bit CheriTBRE = 1'b1,
+ parameter bit CheriStkZ = 1'b1,
+ parameter int unsigned HeapBase = 32'h2001_0000,
+ parameter int unsigned TSMapBase = 32'h2004_0000, // 4kB default
+ parameter int unsigned TSMapSize = 1024, // in words
+ parameter int unsigned MMRegDinW = 128,
+ parameter int unsigned MMRegDoutW = 64,
+ parameter int unsigned DataWidth = 33, // this enables testbench to use defparam to override
+ parameter bit CheriCapIT8 = 1'b0
+) (
+ // Clock and Reset
+ input logic clk_i,
+ input logic rst_ni,
+
+ input logic test_en_i, // enable all clock gates for testing
+ input logic scan_rst_ni,
+ input prim_ram_1p_pkg::ram_1p_cfg_t ram_cfg_i,
+
+ input logic cheri_pmode_i,
+ input logic cheri_tsafe_en_i,
+ input logic [31:0] hart_id_i,
+ input logic [31:0] boot_addr_i,
+
+ // Instruction memory interface
+ output logic instr_req_o,
+ input logic instr_gnt_i,
+ input logic instr_rvalid_i,
+ output logic [31:0] instr_addr_o,
+ input logic [31:0] instr_rdata_i,
+ input logic [6:0] instr_rdata_intg_i,
+ input logic instr_err_i,
+
+ // Data memory interface
+ output logic data_req_o,
+ output logic data_is_cap_o,
+ input logic data_gnt_i,
+ input logic data_rvalid_i,
+ output logic data_we_o,
+ output logic [3:0] data_be_o,
+ output logic [31:0] data_addr_o,
+ output logic [32:0] data_wdata_o,
+ output logic [6:0] data_wdata_intg_o,
+ input logic [32:0] data_rdata_i,
+ input logic [6:0] data_rdata_intg_i,
+ input logic data_err_i,
+
+ // TS map memory interface
+ output logic tsmap_cs_o,
+ output logic [15:0] tsmap_addr_o,
+ input logic [31:0] tsmap_rdata_i,
+ input logic [6:0] tsmap_rdata_intg_i, // not used in cheriotc_top
+ input logic [MMRegDinW-1:0] mmreg_corein_i,
+ output logic [MMRegDoutW-1:0] mmreg_coreout_o,
+ output logic cheri_fatal_err_o,
+
+ // Interrupt inputs
+ input logic irq_software_i,
+ input logic irq_timer_i,
+ input logic irq_external_i,
+ input logic [14:0] irq_fast_i,
+ input logic irq_nm_i, // non-maskeable interrupt
+
+ // Scrambling Interface
+ input logic scramble_key_valid_i,
+ input logic [SCRAMBLE_KEY_W-1:0] scramble_key_i,
+ input logic [SCRAMBLE_NONCE_W-1:0] scramble_nonce_i,
+ output logic scramble_req_o,
+
+ // Debug Interface
+ input logic debug_req_i,
+ output crash_dump_t crash_dump_o,
+ output logic double_fault_seen_o,
+
+ // CPU Control Signals
+ input fetch_enable_t fetch_enable_i,
+ output logic core_sleep_o
+);
+
+
+ logic rvfi_valid;
+ logic [63:0] rvfi_order;
+ logic [31:0] rvfi_insn;
+ logic rvfi_trap;
+ logic rvfi_halt;
+ logic rvfi_intr;
+ logic [ 1:0] rvfi_mode;
+ logic [ 1:0] rvfi_ixl;
+ logic [ 4:0] rvfi_rs1_addr;
+ logic [ 4:0] rvfi_rs2_addr;
+ logic [ 4:0] rvfi_rs3_addr;
+ logic [31:0] rvfi_rs1_rdata;
+ reg_cap_t rvfi_rs1_rcap;
+ reg_cap_t rvfi_rs2_rcap;
+ logic [31:0] rvfi_rs2_rdata;
+ logic [31:0] rvfi_rs3_rdata;
+ logic [ 4:0] rvfi_rd_addr;
+ logic [31:0] rvfi_rd_wdata;
+ reg_cap_t rvfi_rd_wcap;
+ logic [31:0] rvfi_pc_rdata;
+ logic [31:0] rvfi_pc_wdata;
+ logic [31:0] rvfi_mem_addr;
+ logic [ 3:0] rvfi_mem_rmask;
+ logic [ 3:0] rvfi_mem_wmask;
+ logic [DataWidth-1:0] rvfi_mem_rdata;
+ logic [DataWidth-1:0] rvfi_mem_wdata;
+ logic rvfi_mem_is_cap;
+ reg_cap_t rvfi_mem_rcap;
+ reg_cap_t rvfi_mem_wcap;
+ logic [31:0] rvfi_ext_mip;
+ logic rvfi_ext_nmi;
+ logic rvfi_ext_debug_req;
+ logic [63:0] rvfi_ext_mcycle;
+
+ logic [31:0] unused_rvfi_ext_mip;
+ logic unused_rvfi_ext_nmi;
+ logic unused_rvfi_ext_debug_req;
+ logic [63:0] unused_rvfi_ext_mcycle;
+
+ // Tracer doesn't use these signals, though other modules may probe down into tracer to observe
+ // them.
+ assign unused_rvfi_ext_mip = rvfi_ext_mip;
+ assign unused_rvfi_ext_nmi = rvfi_ext_nmi;
+ assign unused_rvfi_ext_debug_req = rvfi_ext_debug_req;
+ assign unused_rvfi_ext_mcycle = rvfi_ext_mcycle;
+
+ cheriot_top #(
+ .DmHaltAddr (DmHaltAddr ),
+ .DmExceptionAddr (DmExceptionAddr ),
+ .MHPMCounterNum (13 ),
+ .MHPMCounterWidth (40),
+ .DbgTriggerEn (1'b1),
+ .DbgHwBreakNum (4),
+ .RV32E (RV32E),
+ .RV32B (RV32BFull),
+ .WritebackStage (1'b1),
+ .BranchPredictor (1'b0),
+ .CHERIoTEn (1'b1),
+ .DataWidth (DataWidth),
+ .HeapBase (HeapBase ),
+ .TSMapBase (TSMapBase),
+ .TSMapSize (TSMapSize),
+ .MemCapFmt (1'b0),
+ .CheriPPLBC (1'b1),
+ .CheriSBND2 (1'b0),
+ .CheriTBRE (CheriTBRE),
+ .CheriStkZ (CheriStkZ),
+ .MMRegDinW (MMRegDinW),
+ .MMRegDoutW (MMRegDoutW),
+ .CheriCapIT8 (CheriCapIT8)
+ ) u_cheriot_top (
+ .clk_i,
+ .rst_ni,
+
+ .test_en_i,
+ .scan_rst_ni,
+ .ram_cfg_i,
+
+ .cheri_pmode_i,
+ .cheri_tsafe_en_i,
+ .hart_id_i,
+ .boot_addr_i,
+
+ .instr_req_o,
+ .instr_gnt_i,
+ .instr_rvalid_i,
+ .instr_addr_o,
+ .instr_rdata_i,
+ .instr_rdata_intg_i,
+ .instr_err_i,
+
+ .data_req_o,
+ .data_is_cap_o,
+ .data_gnt_i,
+ .data_rvalid_i,
+ .data_we_o,
+ .data_be_o,
+ .data_addr_o,
+ .data_wdata_o,
+ .data_wdata_intg_o,
+ .data_rdata_i,
+ .data_rdata_intg_i,
+ .data_err_i,
+
+ .tsmap_cs_o,
+ .tsmap_addr_o,
+ .tsmap_rdata_i,
+ .mmreg_corein_i,
+ .mmreg_coreout_o,
+ .cheri_fatal_err_o,
+
+ .irq_software_i,
+ .irq_timer_i,
+ .irq_external_i,
+ .irq_fast_i,
+ .irq_nm_i,
+
+ .scramble_key_valid_i,
+ .scramble_key_i,
+ .scramble_nonce_i,
+ .scramble_req_o,
+
+ .debug_req_i,
+ .crash_dump_o,
+ .double_fault_seen_o,
+
+`ifdef RVFI
+ .rvfi_valid,
+ .rvfi_order,
+ .rvfi_insn,
+ .rvfi_trap,
+ .rvfi_halt,
+ .rvfi_intr,
+ .rvfi_mode,
+ .rvfi_ixl,
+ .rvfi_rs1_addr,
+ .rvfi_rs2_addr,
+ .rvfi_rs3_addr,
+ .rvfi_rs1_rdata,
+ .rvfi_rs1_rcap,
+ .rvfi_rs2_rdata,
+ .rvfi_rs2_rcap,
+ .rvfi_rs3_rdata,
+ .rvfi_rd_addr,
+ .rvfi_rd_wdata,
+ .rvfi_rd_wcap,
+ .rvfi_pc_rdata,
+ .rvfi_pc_wdata,
+ .rvfi_mem_addr,
+ .rvfi_mem_rmask,
+ .rvfi_mem_wmask,
+ .rvfi_mem_rdata,
+ .rvfi_mem_wdata,
+ .rvfi_mem_rcap,
+ .rvfi_mem_wcap,
+ .rvfi_mem_is_cap,
+ .rvfi_ext_mip,
+ .rvfi_ext_nmi,
+ .rvfi_ext_debug_req,
+ .rvfi_ext_mcycle,
+`endif
+ .fetch_enable_i,
+ .core_sleep_o,
+ .alert_major_bus_o(),
+ .alert_major_internal_o(),
+ .alert_minor_o()
+ );
+
+// cheriot_tracer relies on the signals from the RISC-V Formal Interface
+// synthesis translate_off
+`ifndef RVFI
+ $fatal("Fatal error: RVFI needs to be defined globally.");
+`endif
+
+`ifdef RVFI
+ cheriot_tracer #(
+ .DataWidth (DataWidth),
+ .CheriCapIT8 (CheriCapIT8)
+ ) u_cheriot_tracer (
+ .clk_i,
+ .rst_ni,
+
+ .cheri_pmode_i,
+ .cheri_tsafe_en_i,
+ .hart_id_i,
+
+ .rvfi_valid,
+ .rvfi_order,
+ .rvfi_insn,
+ .rvfi_trap,
+ .rvfi_halt,
+ .rvfi_intr,
+ .rvfi_mode,
+ .rvfi_ixl,
+ .rvfi_rs1_addr,
+ .rvfi_rs2_addr,
+ .rvfi_rs3_addr,
+ .rvfi_rs1_rdata,
+ .rvfi_rs2_rdata,
+ .rvfi_rs3_rdata,
+ .rvfi_rs1_rcap,
+ .rvfi_rs2_rcap,
+ .rvfi_rd_wcap,
+ .rvfi_rd_addr,
+ .rvfi_rd_wdata,
+ .rvfi_pc_rdata,
+ .rvfi_pc_wdata,
+ .rvfi_mem_addr,
+ .rvfi_mem_rmask,
+ .rvfi_mem_wmask,
+ .rvfi_mem_rdata,
+ .rvfi_mem_wdata,
+ .rvfi_mem_rcap,
+ .rvfi_mem_wcap,
+ .rvfi_mem_is_cap
+ );
+`endif
+
+// synthesis translate_on
+
+endmodule