Initial commit of cheriot ibex into hw/matcha -Sync to commit 31dbab1 Bypass-Presubmit-Reason: failed test related to test environment change Change-Id: I28699fb4cd29b805c60549251b4980c96f2c177b
diff --git a/hw/ip/cheriot-ibex/cheriot_core.core b/hw/ip/cheriot-ibex/cheriot_core.core new file mode 100644 index 0000000..31ac2b7 --- /dev/null +++ b/hw/ip/cheriot-ibex/cheriot_core.core
@@ -0,0 +1,186 @@ +CAPI=2: +# Copyright lowRISC contributors. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +name: "lowrisc:ibex:cheriot_core:0.1" +description: "Ibex CPU Core Components" + +filesets: + files_rtl: + depend: + - lowrisc:prim:assert + - lowrisc:prim:clock_gating + - lowrisc:prim:lfsr + - lowrisc:ibex:cheriot_pkg + - lowrisc:ibex:cheriot_icache + - lowrisc:dv:dv_fcov_macros + files: + - rtl/cheri_decoder.sv + - rtl/cheri_ex.sv + - rtl/cheri_tbre.sv + - rtl/cheri_stkz.sv + - rtl/cheri_tbre_wrapper.sv + - rtl/cheri_trvk_stage.sv + - rtl/cheriot_alu.sv + - rtl/cheriot_branch_predict.sv + - rtl/cheriot_compressed_decoder.sv + - rtl/cheriot_controller.sv + - rtl/cheriot_cs_registers.sv + - rtl/cheriot_csr.sv + - rtl/cheriot_counter.sv + - rtl/cheriot_decoder.sv + - rtl/cheriot_ex_block.sv + - rtl/cheriot_fetch_fifo.sv + - rtl/cheriot_id_stage.sv + - rtl/cheriot_if_stage.sv + - rtl/cheriot_load_store_unit.sv + - rtl/cheriot_multdiv_fast.sv + - rtl/cheriot_multdiv_slow.sv + - rtl/cheriot_prefetch_buffer.sv + - rtl/cheriot_pmp.sv + - rtl/cheriot_wb_stage.sv + - rtl/cheriot_dummy_instr.sv + - rtl/cheriot_core.sv + - rtl/cheriot_pmp_reset_default.svh: {is_include_file: true} + file_type: systemVerilogSource + + files_lint_verilator: + files: + - lint/verilator_waiver.vlt: {file_type: vlt} + + files_lint_verible: + files: + - lint/verible_waiver.vbw: {file_type: veribleLintWaiver} + + files_check_tool_requirements: + depend: + - lowrisc:tool:check_tool_requirements + +parameters: + RVFI: + datatype: bool + paramtype: vlogdefine + + SYNTHESIS: + datatype: bool + paramtype: vlogdefine + + FPGA_XILINX: + datatype: bool + description: Identifies Xilinx FPGA targets to set DSP pragmas for performance counters. + default: false + paramtype: vlogdefine + + RV32E: + datatype: int + default: 0 + paramtype: vlogparam + + RV32M: + datatype: str + default: cheriot_pkg::RV32MFast + paramtype: vlogdefine + description: "RV32M implementation parameter enum. See the cheriot_pkg::rv32m_e enum in cheriot_pkg.sv for permitted values." + + RV32B: + datatype: str + default: cheriot_pkg::RV32BNone + paramtype: vlogdefine + description: "Bitmanip implementation parameter enum. See the cheriot_pkg::rv32b_e enum in cheriot_pkg.sv for permitted values." + + RegFile: + datatype: str + default: cheriot_pkg::RegFileFF + paramtype: vlogdefine + description: "Register file implementation parameter enum. See the cheriot_pkg::regfile_e enum in cheriot_pkg.sv for permitted values." + + ICache: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enable instruction cache" + + ICacheECC: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enable ECC protection in instruction cache" + + BranchTargetALU: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enables separate branch target ALU (increasing branch performance EXPERIMENTAL) [0/1]" + + WritebackStage: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enables third pipeline stage (EXPERIMENTAL) [0/1]" + + BranchPredictor: + datatype: int + paramtype: vlogparam + default: 0 + description: "Enables static branch prediction (EXPERIMENTAL)" + + SecureIbex: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enables security hardening features (EXPERIMENTAL) [0/1]" + + PMPEnable: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enable PMP" + + PMPGranularity: + datatype: int + default: 0 + paramtype: vlogparam + description: "Granularity of NAPOT range, 0 = 4 byte, 1 = 8 byte, 2 = 16 byte, 3 = 32 byte etc" + + PMPNumRegions: + datatype: int + default: 4 + paramtype: vlogparam + description: "Number of PMP regions" + +targets: + default: &default_target + filesets: + - tool_verilator ? (files_lint_verilator) + - tool_veriblelint ? (files_lint_verible) + - files_rtl + - files_check_tool_requirements + toplevel: cheriot_core + parameters: + - tool_vivado ? (FPGA_XILINX=true) + lint: + <<: *default_target + parameters: + - SYNTHESIS=true + - RVFI=true + default_tool: verilator + tools: + verilator: + mode: lint-only + verilator_options: + - "-Wall" + # RAM primitives wider than 64bit (required for ECC) fail to build in + # Verilator without increasing the unroll count (see Verilator#1266) + - "--unroll-count 72" + format: + filesets: + - files_rtl + parameters: + - SYNTHESIS=true + - RVFI=true + default_tool: veribleformat + toplevel: cheriot_core + tools: + veribleformat: + verible_format_args: + - "--inplace"
diff --git a/hw/ip/cheriot-ibex/cheriot_icache.core b/hw/ip/cheriot-ibex/cheriot_icache.core new file mode 100644 index 0000000..6f963c5 --- /dev/null +++ b/hw/ip/cheriot-ibex/cheriot_icache.core
@@ -0,0 +1,22 @@ +CAPI=2: +# Copyright lowRISC contributors. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +name: "lowrisc:ibex:cheriot_icache:0.1" +description: "Ibex instruction cache" +filesets: + files_rtl: + depend: + - lowrisc:prim:secded + - lowrisc:prim:assert + - lowrisc:ibex:cheriot_pkg + files: + - rtl/cheriot_icache.sv + file_type: systemVerilogSource + +targets: + default: &default_target + filesets: + - files_rtl + toplevel: cheriot_icache + default_tool: vcs
diff --git a/hw/ip/cheriot-ibex/cheriot_multdiv.core b/hw/ip/cheriot-ibex/cheriot_multdiv.core new file mode 100644 index 0000000..6898853 --- /dev/null +++ b/hw/ip/cheriot-ibex/cheriot_multdiv.core
@@ -0,0 +1,28 @@ +CAPI=2: +# Copyright lowRISC contributors. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +name: "lowrisc:ibex:ibex_multdiv:0.1" +description: "Multiplier and divider" + +filesets: + files_rtl: + depend: + - lowrisc:prim:assert + - lowrisc:ibex:cheriot_pkg + files: + - rtl/cheriot_multdiv_fast.sv + - rtl/cheriot_multdiv_slow.sv + file_type: systemVerilogSource + +parameters: + RV32M: + datatype: int + default: 2 + paramtype: vlogparam + description: "Selection of multiplication implementation. Switch to enable single cycle multiplications." + +targets: + default: &default_target + filesets: + - files_rtl
diff --git a/hw/ip/cheriot-ibex/cheriot_pkg.core b/hw/ip/cheriot-ibex/cheriot_pkg.core new file mode 100644 index 0000000..4c60a18 --- /dev/null +++ b/hw/ip/cheriot-ibex/cheriot_pkg.core
@@ -0,0 +1,18 @@ +CAPI=2: +# Copyright lowRISC contributors. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +name: "lowrisc:ibex:cheriot_pkg:0.1" +description: "Header package for Ibex" + +filesets: + files_rtl: + files: + - rtl/cheriot_pkg.sv + - rtl/cheri_pkg.sv + file_type: systemVerilogSource + +targets: + default: + filesets: + - files_rtl
diff --git a/hw/ip/cheriot-ibex/cheriot_top.core b/hw/ip/cheriot-ibex/cheriot_top.core new file mode 100644 index 0000000..5d08123 --- /dev/null +++ b/hw/ip/cheriot-ibex/cheriot_top.core
@@ -0,0 +1,175 @@ +CAPI=2: +# Copyright lowRISC contributors. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +name: "lowrisc:ibex:cheriot_top:0.1" +description: "Ibex, a small RV32 CPU core" + +filesets: + files_rtl: + depend: + - lowrisc:ibex:cheriot_pkg + - lowrisc:ibex:cheriot_core + - lowrisc:prim:buf + - lowrisc:prim:clock_mux2 + - lowrisc:prim:flop + - lowrisc:prim:ram_1p_scr + files: + - rtl/cheriot_register_file_ff.sv # generic FF-based + - rtl/cheriot_register_file_fpga.sv # FPGA + - rtl/cheriot_register_file_latch.sv # ASIC + - rtl/cheri_regfile.sv # generic FF-based + - rtl/cheriot_lockstep.sv + - rtl/cheriot_top.sv + file_type: systemVerilogSource + + files_lint_verilator: + files: + - lint/verilator_waiver.vlt: {file_type: vlt} + + files_lint_verible: + files: + - lint/verible_waiver.vbw: {file_type: veribleLintWaiver} + + files_check_tool_requirements: + depend: + - lowrisc:tool:check_tool_requirements + +parameters: + RVFI: + datatype: bool + paramtype: vlogdefine + + SYNTHESIS: + datatype: bool + paramtype: vlogdefine + + FPGA_XILINX: + datatype: bool + description: Identifies Xilinx FPGA targets to set DSP pragmas for performance counters. + default: false + paramtype: vlogdefine + + RV32E: + datatype: int + default: 0 + paramtype: vlogparam + + RV32M: + datatype: str + default: cheriot_pkg::RV32MFast + paramtype: vlogdefine + description: "RV32M implementation parameter enum. See the cheriot_pkg::rv32m_e enum in cheriot_pkg.sv for permitted values." + + RV32B: + datatype: str + default: cheriot_pkg::RV32BNone + paramtype: vlogdefine + description: "Bitmanip implementation parameter enum. See the cheriot_pkg::rv32b_e enum in cheriot_pkg.sv for permitted values." + + RegFile: + datatype: str + default: cheriot_pkg::RegFileFF + paramtype: vlogdefine + description: "Register file implementation parameter enum. See the cheriot_pkg::regfile_e enum in cheriot_pkg.sv for permitted values." + + ICache: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enable instruction cache" + + ICacheECC: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enable ECC protection in instruction cache" + + BranchTargetALU: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enables separate branch target ALU (increasing branch performance EXPERIMENTAL) [0/1]" + + WritebackStage: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enables third pipeline stage (EXPERIMENTAL) [0/1]" + + BranchPredictor: + datatype: int + paramtype: vlogparam + default: 0 + description: "Enables static branch prediction (EXPERIMENTAL)" + + SecureIbex: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enables security hardening features (EXPERIMENTAL) [0/1]" + + ICacheScramble: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enables ICache scrambling feature (EXPERIMENTAL) [0/1]" + + PMPEnable: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enable PMP" + + PMPGranularity: + datatype: int + default: 0 + paramtype: vlogparam + description: "Granularity of NAPOT range, 0 = 4 byte, 1 = 8 byte, 2 = 16 byte, 3 = 32 byte etc" + + PMPNumRegions: + datatype: int + default: 4 + paramtype: vlogparam + description: "Number of PMP regions" + +targets: + default: &default_target + filesets: + - tool_verilator ? (files_lint_verilator) + - tool_veriblelint ? (files_lint_verible) + - files_rtl + - files_check_tool_requirements + toplevel: cheriot_top + parameters: + - tool_vivado ? (FPGA_XILINX=true) + lint: + <<: *default_target + parameters: + - SYNTHESIS=true + - RVFI=true + default_tool: verilator + tools: + verilator: + mode: lint-only + verilator_options: + - "-Wall" + # RAM primitives wider than 64bit (required for ECC) fail to build in + # Verilator without increasing the unroll count (see Verilator#1266) + - "--unroll-count 72" + format: + filesets: + - files_rtl + parameters: + - SYNTHESIS=true + - RVFI=true + default_tool: veribleformat + toplevel: cheriot_top + tools: + veribleformat: + verible_format_args: + - "--inplace" + - "--formal_parameters_indentation=indent" + - "--named_parameter_indentation=indent" + - "--named_port_indentation=indent" + - "--port_declarations_indentation=indent"
diff --git a/hw/ip/cheriot-ibex/cheriot_top_tracing.core b/hw/ip/cheriot-ibex/cheriot_top_tracing.core new file mode 100644 index 0000000..48c6995 --- /dev/null +++ b/hw/ip/cheriot-ibex/cheriot_top_tracing.core
@@ -0,0 +1,161 @@ +CAPI=2: +# Copyright lowRISC contributors. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +name: "lowrisc:ibex:cheriot_top_tracing:0.1" +description: "Ibex, a small RV32 CPU core with tracing enabled" +filesets: + files_rtl: + depend: + - lowrisc:ibex:cheriot_top + - lowrisc:ibex:cheriot_tracer + files: + - rtl/cheriot_top_tracing.sv + file_type: systemVerilogSource + +parameters: + # The tracer uses the RISC-V Formal Interface (RVFI) to collect trace signals. + RVFI: + datatype: bool + paramtype: vlogdefine + default: true + + SYNTHESIS: + datatype: bool + paramtype: vlogdefine + + RV32E: + datatype: int + default: 0 + paramtype: vlogparam + + RV32M: + datatype: str + default: cheriot_pkg::RV32MFast + paramtype: vlogdefine + description: "RV32M implementation parameter enum. See the cheriot_pkg::rv32m_e enum in cheriot_pkg.sv for permitted values." + + RV32B: + datatype: str + default: cheriot_pkg::RV32BNone + paramtype: vlogdefine + description: "Bitmanip implementation parameter enum. See the cheriot_pkg::rv32b_e enum in cheriot_pkg.sv for permitted values." + + RegFile: + datatype: str + default: cheriot_pkg::RegFileFF + paramtype: vlogdefine + description: "Register file implementation parameter enum. See the cheriot_pkg::regfile_e enum in cheriot_pkg.sv for permitted values." + + ICache: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enable instruction cache" + + ICacheECC: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enable ECC protection in instruction cache" + + BranchTargetALU: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enables separate branch target ALU (increasing branch performance EXPERIMENTAL) [0/1]" + + WritebackStage: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enables third pipeline stage (EXPERIMENTAL) [0/1]" + + BranchPredictor: + datatype: int + paramtype: vlogparam + default: 0 + description: "Enables static branch prediction (EXPERIMENTAL)" + + SecureIbex: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enables security hardening features (EXPERIMENTAL) [0/1]" + + ICacheScramble: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enables ICache scrambling feature (EXPERIMENTAL) [0/1]" + + PMPEnable: + datatype: int + default: 0 + paramtype: vlogparam + description: "Enable PMP" + + PMPGranularity: + datatype: int + default: 0 + paramtype: vlogparam + description: "Granularity of NAPOT range, 0 = 4 byte, 1 = 8 byte, 2 = 16 byte, 3 = 32 byte etc" + + PMPNumRegions: + datatype: int + default: 4 + paramtype: vlogparam + description: "Number of PMP regions" + +targets: + default: &default_target + filesets: + - files_rtl + parameters: + - RVFI=true + toplevel: cheriot_top_tracing + + lint: + <<: *default_target + parameters: + - RVFI=true + - SYNTHESIS=true + - RV32E + - RV32M + - RV32B + - RegFile + - ICache + - ICacheECC + - BranchTargetALU + - WritebackStage + - BranchPredictor + - SecureIbex + - ICacheScramble + - PMPEnable + - PMPGranularity + - PMPNumRegions + default_tool: verilator + tools: + verilator: + mode: lint-only + verilator_options: + - "-Wall" + # RAM primitives wider than 64bit (required for ECC) fail to build in + # Verilator without increasing the unroll count (see Verilator#1266) + - "--unroll-count 72" + format: + filesets: + - files_rtl + parameters: + - SYNTHESIS=true + - RVFI=true + default_tool: veribleformat + toplevel: cheriot_top_tracing + tools: + veribleformat: + verible_format_args: + - "--inplace" + - "--formal_parameters_indentation=indent" + - "--named_parameter_indentation=indent" + - "--named_port_indentation=indent" + - "--port_declarations_indentation=indent"
diff --git a/hw/ip/cheriot-ibex/cheriot_tracer.core b/hw/ip/cheriot-ibex/cheriot_tracer.core new file mode 100644 index 0000000..e9bbce5 --- /dev/null +++ b/hw/ip/cheriot-ibex/cheriot_tracer.core
@@ -0,0 +1,20 @@ +CAPI=2: +# Copyright lowRISC contributors. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +name: "lowrisc:ibex:cheriot_tracer:0.1" +description: "Tracer for use with Ibex using the RVFI interface" +filesets: + files_rtl: + depend: + - lowrisc:prim:assert + - lowrisc:ibex:cheriot_pkg + files: + - rtl/cheriot_tracer_pkg.sv + - rtl/cheriot_tracer.sv + file_type: systemVerilogSource + +targets: + default: + filesets: + - files_rtl
diff --git a/hw/ip/cheriot-ibex/lint/verible_waiver.vbw b/hw/ip/cheriot-ibex/lint/verible_waiver.vbw new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/hw/ip/cheriot-ibex/lint/verible_waiver.vbw
diff --git a/hw/ip/cheriot-ibex/lint/verilator_waiver.vlt b/hw/ip/cheriot-ibex/lint/verilator_waiver.vlt new file mode 100644 index 0000000..b7c952c --- /dev/null +++ b/hw/ip/cheriot-ibex/lint/verilator_waiver.vlt
@@ -0,0 +1,72 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Lint waivers for Verilator +// See https://www.veripool.org/projects/verilator/wiki/Manual-verilator#CONFIGURATION-FILES +// for documentation. +// +// Important: This file must included *before* any other Verilog file is read. +// Otherwise, only global waivers are applied, but not file-specific waivers. + +`verilator_config +lint_off -rule PINCONNECTEMPTY + +// We have some boolean top-level parameters in e.g. ibex_core_tracing.sv. +// When building with fusesoc, these get set with defines like +// -GRV32M=1 (rather than -GRV32M=1'b1), leading to warnings like: +// +// Operator VAR '<varname>' expects 1 bits on the Initial value, but +// Initial value's CONST '32'h1' generates 32 bits. +// +// This signoff rule ignores errors like this. Note that it only +// matches when you set a 1-bit value to a literal 1, so it won't hide +// silly mistakes like setting it to 2. +// +lint_off -rule WIDTH -file "*/rtl/ibex_top_tracing.sv" + -match "*expects 1 bits*Initial value's CONST '32'h1'*" + +// Operator expects 1 bit on initial value but initial value's CONST generates +// 32 bits, need a specific RV32B waiver as it uses enums so the above catch-all +// waiver doesn't work. +lint_off -rule WIDTH -file "*/rtl/ibex_top_tracing.sv" -match "*'RV32B'*" + +// Bits of signal are not used: be_i[3:1] +// Bits of signal are not used: addr_i[31:10,1:0] +// Bits of signal are not used: wdata_i[31:8] +// +// simulator_ctrl exposes a 32-bit write-only interface to its control +// registers, but actually only looks at the bottom byte and rounds addresses +// down to be 4-byte aligned. +// +lint_off -rule UNUSED -file "*/rtl/sim/simulator_ctrl.sv" -match "*'be_i'[3:1]*" +lint_off -rule UNUSED -file "*/rtl/sim/simulator_ctrl.sv" -match "*'addr_i'[31:10,1:0]*" +lint_off -rule UNUSED -file "*/rtl/sim/simulator_ctrl.sv" -match "*'wdata_i'[31:8]*" + +// Bits of signal are not used: timer_addr_i[31:10] +// +// The upper bits of this address are used to select whether the timer is +// addressed at all (encoded in the timer_req_i input). However, we pass the +// entire 32-bit address around to make the code a bit cleaner. +lint_off -rule UNUSED -file "*/rtl/timer.sv" -match "*'timer_addr_i'[31:10]*" + +// Signal is not used: clk_i +// leaving clk and reset connected in-case we want to add assertions +lint_off -rule UNUSED -file "*/rtl/ibex_pmp.sv" -match "*clk_i*" +lint_off -rule UNUSED -file "*/rtl/ibex_compressed_decoder.sv" -match "*clk_i*" +lint_off -rule UNUSED -file "*/rtl/ibex_decoder.sv" -match "*clk_i*" +lint_off -rule UNUSED -file "*/rtl/ibex_branch_predict.sv" -match "*clk_i*" + +// Signal is not used: rst_ni +// leaving clk and reset connected in-case we want to add assertions +lint_off -rule UNUSED -file "*/rtl/ibex_pmp.sv" -match "*rst_ni*" +lint_off -rule UNUSED -file "*/rtl/ibex_compressed_decoder.sv" -match "*rst_ni*" +lint_off -rule UNUSED -file "*/rtl/ibex_decoder.sv" -match "*rst_ni*" +lint_off -rule UNUSED -file "*/rtl/ibex_branch_predict.sv" -match "*rst_ni*" + +// Temporary waivers until OpenTitan primitives are lint-clean +// https://github.com/lowRISC/opentitan/issues/2313 +lint_off -file "*/lowrisc_prim_*/rtl/*.sv" + +lint_off -rule UNUSED -file "*/rtl/ibex_top_tracing.sv" -match "*RndCnstLfsrSeed*" +lint_off -rule UNUSED -file "*/rtl/ibex_top_tracing.sv" -match "*RndCnstLfsrPerm*"
diff --git a/hw/ip/cheriot-ibex/rtl/cheri_decoder.sv b/hw/ip/cheriot-ibex/rtl/cheri_decoder.sv new file mode 100644 index 0000000..113c95f --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheri_decoder.sv
@@ -0,0 +1,130 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Cheri instruction decoder +// should we merge this with cheri_EX? let's leave it alone for now since we may look into +// a separate decoder PL stage later + +module cheri_decoder import cheri_pkg::*; # ( + parameter bit CheriPPLBC = 1'b1, + parameter bit CheriSBND2 = 1'b0 +) ( + input logic [31:0] instr_rdata_i, + input logic cheri_opcode_en_i, // op = 0x5b + input logic cheri_tsafe_en_i, + input logic cheri_auipcc_en_i, // op = 0x17 (AUIPC) + input logic cheri_auicgp_en_i, // op = 0x7b (AUIGCP) + input logic cheri_jalr_en_i, // op = 0x67 (JALR) + input logic cheri_jal_en_i, // op = 0x6f (JAL) + input logic cheri_cload_en_i, // op = 0x3, [14:12] = 0x3 (LD) + input logic cheri_cstore_en_i, // op = 0x23, [14:12] = 0x3 (SD) + output logic instr_is_cheri_o, // instr in cheri space + output logic instr_is_legal_cheri_o, // legal cheri instruction + output logic [11:0] cheri_imm12_o, + output logic [19:0] cheri_imm20_o, + output logic [20:0] cheri_imm21_o, + output logic [OPDW-1:0] cheri_operator_o, + output logic [4:0] cheri_cs2_dec_o, + output logic cheri_rf_ren_a_o, + output logic cheri_rf_ren_b_o, + output logic cheri_rf_we_dec_o, + output logic cheri_multicycle_dec_o + ); + + logic [6:0] unused_opcode; + logic [2:0] func3_op; + logic [6:0] func7_op; + logic [4:0] imm5_op; + logic [4:0] rd_op; + + // note there are 3 encoding formats of CHERI instructions + // - fmt1: I-format, func3(14:12) = subFuc. + // - fmt2: R-format, func3(14:12) = 0x0, func7(31:25) = subFunc, etc. + // - fmt3: I-format, func3(14:12) = 0x0, func7(31:25) = 0x7f, imm5(24:20) = subFunc + // - opcode [6:0] == 0x5b for all CHERI instructions + assign unused_opcode = instr_rdata_i[6:0]; + assign func3_op = instr_rdata_i[14:12]; + assign func7_op = instr_rdata_i[31:25]; + assign imm5_op = instr_rdata_i[24:20]; + assign rd_op = instr_rdata_i[11:7]; + + always_comb begin + cheri_operator_o = OPDW'('h0); + + cheri_operator_o[CCSR_RW] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h01); + cheri_operator_o[CSET_BOUNDS] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h08); + cheri_operator_o[CSET_BOUNDS_EX] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h09); + cheri_operator_o[CSET_BOUNDS_RNDN]= cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h0a); + cheri_operator_o[CSEAL] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h0b); + cheri_operator_o[CUNSEAL] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h0c); + cheri_operator_o[CAND_PERM] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h0d); + cheri_operator_o[CSET_ADDR] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h10); + cheri_operator_o[CINC_ADDR] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h11); + cheri_operator_o[CSUB_CAP] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h14); + cheri_operator_o[CSET_HIGH] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h16); + cheri_operator_o[CIS_SUBSET] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h20); + cheri_operator_o[CIS_EQUAL] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h21); + + + cheri_operator_o[CGET_PERM] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h00); + cheri_operator_o[CGET_TYPE] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h01); + cheri_operator_o[CGET_BASE] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h02); + cheri_operator_o[CGET_HIGH] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h17); + cheri_operator_o[CGET_TOP] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h18); + cheri_operator_o[CGET_LEN] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h03); + cheri_operator_o[CGET_TAG] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h04); + cheri_operator_o[CRRL] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h08); + cheri_operator_o[CRAM] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h09); + cheri_operator_o[CGET_ADDR] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h0f); + cheri_operator_o[CMOVE_CAP] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h0a); + cheri_operator_o[CCLEAR_TAG] = cheri_opcode_en_i && (func3_op==0) && (func7_op==7'h7f) && (imm5_op==5'h0b); + + cheri_operator_o[CINC_ADDR_IMM] = cheri_opcode_en_i && (func3_op == 1); + cheri_operator_o[CSET_BOUNDS_IMM] = cheri_opcode_en_i && (func3_op == 2); + + + cheri_operator_o[CAUIPCC] = cheri_auipcc_en_i; + cheri_operator_o[CAUICGP] = cheri_auicgp_en_i; + cheri_operator_o[CJALR] = cheri_jalr_en_i; + cheri_operator_o[CJAL] = cheri_jal_en_i; + cheri_operator_o[CLOAD_CAP] = cheri_cload_en_i; + // cheri_operator_o[CLBC] = cheri_cload_en_i & ~func3_op[2] & cheri_tsafe_en_i; + cheri_operator_o[CSTORE_CAP] = cheri_cstore_en_i; + end + + // partially decoded, early signal to control muxing and regfile read + assign instr_is_cheri_o = cheri_opcode_en_i | cheri_jalr_en_i | cheri_jal_en_i | + cheri_auipcc_en_i | cheri_auicgp_en_i | cheri_cload_en_i | cheri_cstore_en_i; + + assign instr_is_legal_cheri_o = |cheri_operator_o; + + assign cheri_cs2_dec_o = cheri_operator_o[CCSR_RW] ? imm5_op : 0; + + assign cheri_imm12_o = (cheri_operator_o[CJALR]|cheri_operator_o[CSET_BOUNDS_IMM]| + cheri_operator_o[CINC_ADDR_IMM]|cheri_operator_o[CLOAD_CAP]) ? + {func7_op, imm5_op}:(cheri_operator_o[CSTORE_CAP]?{func7_op, rd_op}:0); + + assign cheri_imm20_o = (cheri_operator_o[CAUIPCC]|cheri_operator_o[CAUICGP]) ? instr_rdata_i[31:12] : 0; + + assign cheri_imm21_o = cheri_operator_o[CJAL] ? {instr_rdata_i[31], instr_rdata_i[19:12], + instr_rdata_i[20], instr_rdata_i[30:21], 1'b0} : 0; + + // register dependency decoding (ren_a, ren_b, we) + // only handled opcode=0x5b case here. + // Will be qualified and combined with other cases by ibexc_decoder + assign cheri_rf_ren_a_o = 1'b1; + assign cheri_rf_ren_b_o = (func3_op == 0) && (func7_op != 7'h7f) && (func7_op !=7'h01); + + // cheri_rf_we_dec_o is not used to generate the actual regfile write enables in the case of + // cheri instructions (which is in cheri_ex and muxed with rf_we in wb_stage). + // However it is merged into the overall rf_we and used to generate stall_cheri_trvk + assign cheri_rf_we_dec_o = cheri_opcode_en_i & (|cheri_operator_o); + + assign cheri_multicycle_dec_o = (cheri_operator_o[CLOAD_CAP] & cheri_tsafe_en_i & ~CheriPPLBC) | + (CheriSBND2 & (cheri_operator_o[CSET_BOUNDS] | + cheri_operator_o[CSET_BOUNDS_IMM] | + cheri_operator_o[CSET_BOUNDS_EX] | + cheri_operator_o[CRRL] | cheri_operator_o[CRAM])); + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheri_ex.sv b/hw/ip/cheriot-ibex/rtl/cheri_ex.sv new file mode 100644 index 0000000..45dd6c2 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheri_ex.sv
@@ -0,0 +1,1172 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +module cheri_ex import cheri_pkg::*; #( + parameter bit WritebackStage = 1'b0, + parameter bit MemCapFmt = 1'b0, + parameter int unsigned HeapBase = 32'h2001_0000, + parameter int unsigned TSMapBase = 32'h2002_f000, + parameter int unsigned TSMapSize = 1024, + parameter bit CheriPPLBC = 1'b1, + parameter bit CheriSBND2 = 1'b0, + parameter bit CheriStkZ = 1'b1, + parameter bit CheriCapIT8 = 1'b0 +)( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + // configuration & control + input logic cheri_pmode_i, + input logic cheri_tsafe_en_i, + input logic debug_mode_i, + + // data forwarded from WB stage + input logic fwd_we_i, + input logic [4:0] fwd_waddr_i, + input logic [31:0] fwd_wdata_i, + input reg_cap_t fwd_wcap_i, + + // regfile interface + input logic [4:0] rf_raddr_a_i, + input logic [31:0] rf_rdata_a_i, + input reg_cap_t rf_rcap_a_i, + input logic [4:0] rf_raddr_b_i, + input logic [31:0] rf_rdata_b_i, + input reg_cap_t rf_rcap_b_i, + output logic rf_trsv_en_o, + input logic [4:0] rf_waddr_i, + + // pcc interface + input pcc_cap_t pcc_cap_i, + output pcc_cap_t pcc_cap_o, + input logic [31:0] pc_id_i, + + // use branch_req_o also to update pcc cap + output logic branch_req_o, // update PCC (goes to cs_registers) + output logic branch_req_spec_o, // speculative branch request (go to IF) + output logic [31:0] branch_target_o, + + // Interface to ID stage control logic + input logic cheri_exec_id_i, + input logic instr_first_cycle_i, // 1st exec cycle allowing lsu_req + + // inputs from decoder + input logic instr_valid_i, + input logic instr_is_cheri_i, + input logic instr_is_rv32lsu_i, + input logic instr_is_compressed_i, + input logic [11:0] cheri_imm12_i, + input logic [19:0] cheri_imm20_i, + input logic [20:0] cheri_imm21_i, + input logic [4:0] cheri_cs2_dec_i, // cs2 used for CSR address + input logic [OPDW-1:0] cheri_operator_i, + + // output to wb stage + output logic cheri_rf_we_o, + output logic [31:0] result_data_o, + output reg_cap_t result_cap_o, + + output logic cheri_ex_valid_o, + output logic cheri_ex_err_o, + output logic [11:0] cheri_ex_err_info_o, + output logic cheri_wb_err_o, + output logic [15:0] cheri_wb_err_info_o, + + // lsu interface + output logic lsu_req_o, + output logic lsu_cheri_err_o, + output logic lsu_is_cap_o, + output logic [3:0] lsu_lc_clrperm_o, + output logic lsu_we_o, + output logic [31:0] lsu_addr_o, + output logic [1:0] lsu_type_o, + output logic [32:0] lsu_wdata_o, + output reg_cap_t lsu_wcap_o, + output logic lsu_sign_ext_o, + output logic cpu_stall_by_stkz_o, + output logic cpu_grant_to_stkz_o, + + input logic addr_incr_req_i, + input logic [31:0] addr_last_i, + input logic lsu_req_done_i, + input logic [32:0] lsu_rdata_i, + input reg_cap_t lsu_rcap_i, + + // LSU interface to the existing core (muxed) + input logic rv32_lsu_req_i, + input logic rv32_lsu_we_i, + input logic [1:0] rv32_lsu_type_i, + input logic [31:0] rv32_lsu_wdata_i, + input logic rv32_lsu_sign_ext_i, + input logic [31:0] rv32_lsu_addr_i, + output logic rv32_addr_incr_req_o, + output logic [31:0] rv32_addr_last_o, + + // TBRE LSU request (for muxing) + input logic lsu_tbre_sel_i, + input logic tbre_lsu_req_i, + input logic tbre_lsu_is_cap_i, + input logic tbre_lsu_we_i, + input logic [31:0] tbre_lsu_addr_i, + input logic [32:0] tbre_lsu_wdata_i, + output logic cpu_lsu_dec_o, + + input logic [31:0] csr_rdata_i, + input reg_cap_t csr_rcap_i, + input logic csr_mstatus_mie_i, + output logic csr_access_o, + output logic [4:0] csr_addr_o, + output logic [31:0] csr_wdata_o, + output reg_cap_t csr_wcap_o, + output cheri_csr_op_e csr_op_o, + output logic csr_op_en_o, + output logic csr_set_mie_o, + output logic csr_clr_mie_o, + + // stack highwater mark updates + input logic [31:0] csr_mshwm_i, + input logic [31:0] csr_mshwmb_i, + output logic csr_mshwm_set_o, + output logic [31:0] csr_mshwm_new_o, + + // stack fast clearing control signals + input logic stkz_active_i, + input logic stkz_abort_i, + input logic [31:0] stkz_ptr_i, + input logic [31:0] stkz_base_i, + + output logic ztop_wr_o, + output logic [31:0] ztop_wdata_o, + output full_cap_t ztop_wfcap_o, + input logic [31:0] ztop_rdata_i, + input reg_cap_t ztop_rcap_i, + + // debug feature + input logic csr_dbg_tclr_fault_i +); + + localparam int unsigned TSMapTop = TSMapBase+TSMapSize*4; + + logic cheri_lsu_req; + logic cheri_lsu_we; + logic [31:0] cheri_lsu_addr; + logic [32:0] cheri_lsu_wdata; + reg_cap_t cheri_lsu_wcap; + logic cheri_lsu_err; + logic cheri_lsu_is_cap; + + logic [31:0] rf_rdata_a, rf_rdata_ng_a; + logic [31:0] rf_rdata_b, rf_rdata_ng_b; + + reg_cap_t rf_rcap_a, rf_rcap_ng_a; + reg_cap_t rf_rcap_b, rf_rcap_ng_b; + + full_cap_t rf_fullcap_a, rf_fullcap_b; + + reg_cap_t csc_wcap; + + logic is_load_cap, is_store_cap, is_cap; + + logic addr_bound_vio; + logic perm_vio, perm_vio_slc; + logic rv32_lsu_err; + logic addr_bound_vio_rv32; + logic perm_vio_rv32; + + logic [W_PVIO-1:0] perm_vio_vec, perm_vio_vec_rv32; + + logic [31:0] cs1_addr_plusimm; + logic [31:0] cs1_imm; + logic [31:0] addr_result; + + + logic cheri_rf_we_raw, branch_req_raw, branch_req_spec_raw; + logic csr_set_mie_raw, csr_clr_mie_raw; + logic cheri_ex_valid_raw, cheri_ex_err_raw; + logic csr_op_en_raw; + logic cheri_wb_err_raw; + logic cheri_wb_err_q, cheri_wb_err_d; + logic ztop_wr_raw; + + logic [3:0] cheri_lsu_lc_clrperm; + logic lc_cglg, lc_csdlm, lc_ctag; + logic [31:0] pc_id_nxt; + + full_cap_t setaddr1_outcap, setbounds_outcap, setbounds_rndn_outcap; + logic [15:0] cheri_wb_err_info_q, cheri_wb_err_info_d; + logic set_bounds_done; + + logic [4:0] cheri_err_cause, rv32_err_cause; + logic [31:0] cpu_lsu_addr; + logic [32:0] cpu_lsu_wdata; + logic cpu_lsu_we; + logic cpu_lsu_cheri_err, cpu_lsu_is_cap; + + logic illegal_scr_addr; + logic scr_legalization; + + // data forwarding for CHERI instructions + // - note address 0 is a read-only location per RISC-V + always_comb begin : fwd_data_merger + if ((rf_raddr_a_i == fwd_waddr_i) && fwd_we_i && (|rf_raddr_a_i)) begin + rf_rdata_ng_a = fwd_wdata_i; + rf_rcap_ng_a = fwd_wcap_i; + end else begin + rf_rdata_ng_a = rf_rdata_a_i; + rf_rcap_ng_a = rf_rcap_a_i; + end + + if ((rf_raddr_b_i == fwd_waddr_i) && fwd_we_i && (|rf_raddr_b_i)) begin + rf_rdata_ng_b = fwd_wdata_i; + rf_rcap_ng_b = fwd_wcap_i; + end else begin + rf_rdata_ng_b = rf_rdata_b_i; + rf_rcap_ng_b = rf_rcap_b_i; + end + end + + // 1st level of operand gating (power-saving) + // - gate off the input to reg2full conversion logic + // - note rv32 lsu req only use cs1 + // - may need to use dont_tounch gates + assign rf_rcap_a = (instr_is_cheri_i | instr_is_rv32lsu_i) ? rf_rcap_ng_a : NULL_REG_CAP; + assign rf_rdata_a = (instr_is_cheri_i | instr_is_rv32lsu_i) ? rf_rdata_ng_a : 32'h0; + + assign rf_rcap_b = instr_is_cheri_i ? rf_rcap_ng_b : NULL_REG_CAP; + assign rf_rdata_b = instr_is_cheri_i ? rf_rdata_ng_b : 32'h0; + + // expand the capabilities + assign rf_fullcap_a = reg2fullcap(rf_rcap_a, rf_rdata_a); + assign rf_fullcap_b = reg2fullcap(rf_rcap_b, rf_rdata_b); + + // gate these signals with cheri_exec_id to make sure they are only active when needed + // (only 1 cycle in all cases other than cheri_rf_we) + // -- safest approach and probably the right thing to do in case there is a wb_exception + assign cheri_rf_we_o = cheri_rf_we_raw & cheri_exec_id_i; + assign branch_req_o = branch_req_raw & cheri_exec_id_i; + assign branch_req_spec_o = branch_req_spec_raw & cheri_exec_id_i; + assign csr_set_mie_o = csr_set_mie_raw & cheri_exec_id_i; + assign csr_clr_mie_o = csr_clr_mie_raw & cheri_exec_id_i; + assign csr_op_en_o = csr_op_en_raw & cheri_exec_id_i; + assign ztop_wr_o = ztop_wr_raw & cheri_exec_id_i; + + // ex_valid only used in multicycle case + // ex_err is used for id exceptions + assign cheri_ex_valid_o = cheri_ex_valid_raw & cheri_exec_id_i; + assign cheri_ex_err_o = cheri_ex_err_raw & cheri_exec_id_i & ~debug_mode_i; + + if (WritebackStage) begin + assign cheri_wb_err_o = cheri_wb_err_q; + end else begin + assign cheri_wb_err_o = cheri_wb_err_d; + end + + assign cheri_lsu_lc_clrperm = debug_mode_i ? 4'h0 : {lc_ctag, 1'b0, lc_csdlm, lc_cglg}; + + always_comb begin : main_ex + logic [PERMS_W-1:0] perms_temp; + full_cap_t tfcap; + + //default + cheri_rf_we_raw = 1'b0; + result_data_o = 32'h0; + result_cap_o = NULL_REG_CAP; + csc_wcap = NULL_REG_CAP; + cheri_ex_valid_raw = 1'b0; + cheri_ex_err_raw = 1'b0; + cheri_wb_err_raw = 1'b0; + perms_temp = 0; + + csr_access_o = 1'b0; + csr_addr_o = 5'h0; + csr_wdata_o = 32'h0; + csr_wcap_o = NULL_REG_CAP; + csr_op_o = CHERI_CSR_NULL; + csr_op_en_raw = 1'b0; + scr_legalization = 1'b0; + + branch_req_raw = 1'b0; + branch_req_spec_raw = 1'b0; + csr_set_mie_raw = 1'b0; + csr_clr_mie_raw = 1'b0; + branch_target_o = 32'h0; + pcc_cap_o = NULL_PCC_CAP; + tfcap = NULL_FULL_CAP; + lc_cglg = 1'b0; + lc_csdlm = 1'b0; + lc_ctag = 1'b0; + rf_trsv_en_o = 1'b0; + ztop_wr_raw = 1'b0; + ztop_wdata_o = 32'h0; + ztop_wfcap_o = NULL_FULL_CAP; + + unique case (1'b1) + cheri_operator_i[CGET_PERM]: + begin + result_data_o = {19'h0, rf_fullcap_a.perms}; + result_cap_o = NULL_REG_CAP; // zerout the cap msw + cheri_rf_we_raw = 1'b1; + cheri_ex_valid_raw = 1'b1; + end + cheri_operator_i[CGET_TYPE]: + begin + result_data_o = {28'h0, decode_otype(rf_fullcap_a.otype, rf_fullcap_a.perms[PERM_EX])}; + result_cap_o = NULL_REG_CAP; + cheri_rf_we_raw = 1'b1; + cheri_ex_valid_raw = 1'b1; + end + cheri_operator_i[CGET_BASE]: + begin + result_data_o = rf_fullcap_a.base32; + result_cap_o = NULL_REG_CAP; + cheri_rf_we_raw = 1'b1; + cheri_ex_valid_raw = 1'b1; + end + cheri_operator_i[CGET_TOP]: + begin + result_data_o = rf_fullcap_a.top33[32] ? 32'hffff_ffff : rf_fullcap_a.top33[31:0]; + result_cap_o = NULL_REG_CAP; + cheri_rf_we_raw = 1'b1; + cheri_ex_valid_raw = 1'b1; + end + cheri_operator_i[CGET_LEN]: + begin + result_data_o = get_cap_len(rf_fullcap_a); + result_cap_o = NULL_REG_CAP; + cheri_rf_we_raw = 1'b1; + cheri_ex_valid_raw = 1'b1; + end + cheri_operator_i[CGET_TAG]: + begin + result_data_o = {31'h0, rf_fullcap_a.valid}; + result_cap_o = NULL_REG_CAP; + cheri_rf_we_raw = 1'b1; + cheri_ex_valid_raw = 1'b1; + end + cheri_operator_i[CGET_ADDR]: + begin + result_data_o = rf_rdata_a; + result_cap_o = NULL_REG_CAP; + cheri_rf_we_raw = 1'b1; + cheri_ex_valid_raw = 1'b1; + end + cheri_operator_i[CGET_HIGH]: + begin + logic [65:0] tmp66; + tmp66 = MemCapFmt ? (CheriCapIT8 ? reg2mem_it8_fmt1(rf_rcap_a, rf_rdata_a) : + reg2mem_fmt1(rf_rcap_a, rf_rdata_a)) : + (CheriCapIT8 ? {reg2memcap_it8_fmt0(rf_rcap_a), 1'b0, rf_rdata_a[31:0]} : + {reg2memcap_fmt0(rf_rcap_a), 1'b0, rf_rdata_a[31:0]}); + result_data_o = tmp66[64:33]; + result_cap_o = NULL_REG_CAP; + cheri_rf_we_raw = 1'b1; + cheri_ex_valid_raw = 1'b1; + end + (cheri_operator_i[CSEAL] | cheri_operator_i[CUNSEAL]): + begin // cd <-- cs1; cd.otyp <-- cs2.otype; cd.sealed <-- val + result_data_o = rf_rdata_a; + + if (cheri_operator_i[CSEAL]) + result_cap_o = full2regcap(seal_cap(rf_fullcap_a, rf_rdata_b[OTYPE_W-1:0])); + else begin + tfcap = unseal_cap(rf_fullcap_a); + tfcap.perms[PERM_GL] = rf_fullcap_a.perms[PERM_GL] & rf_fullcap_b.perms[PERM_GL]; + tfcap.cperms = compress_perms(tfcap.perms, tfcap.cperms[5:4]); + result_cap_o = full2regcap(tfcap); + end + + result_cap_o.valid = result_cap_o.valid & (~addr_bound_vio) & (~perm_vio); + cheri_rf_we_raw = 1'b1; + cheri_ex_valid_raw = 1'b1; + end + cheri_operator_i[CAND_PERM]: // cd <-- cs1; cd.perm <-- cd.perm & rs2 + begin + logic [PERMS_W-1:0] pmask; + result_data_o = rf_rdata_a; + tfcap = rf_fullcap_a; + tfcap.perms = tfcap.perms & rf_rdata_b[PERMS_W-1:0]; + tfcap.cperms = compress_perms(tfcap.perms, tfcap.cperms[5:4]); + // for sealed caps, clear tag unless perm mask (excluding GL) == all '1' + pmask = rf_rdata_b[PERMS_W-1:0]; + pmask[PERM_GL] = 1'b1; + tfcap.valid = tfcap.valid & (~is_cap_sealed(rf_fullcap_a) | (&pmask)); + result_cap_o = full2regcap(tfcap); + cheri_rf_we_raw = 1'b1; + cheri_ex_valid_raw = 1'b1; + end + cheri_operator_i[CSET_HIGH]: // cd <-- cs1; cd.high <-- convert(rs2) + begin + // this only works for memcap_fmt0 for now QQQ + result_data_o = rf_rdata_a; + result_cap_o = CheriCapIT8 ? mem2regcap_it8_fmt0({1'b0, rf_rdata_b}, {1'b0, rf_rdata_a}, 4'h0) : + mem2regcap_fmt0({1'b0, rf_rdata_b}, {1'b0, rf_rdata_a}, 4'h0); + cheri_rf_we_raw = 1'b1; + cheri_ex_valid_raw = 1'b1; + end + + // setaddr/incoffset: cd <-- cs1; cd.offset <-- rs2, or cs1.addr + rs2, or cs1.addr + imm12 + // auipcc: cd <-- pcc, cd.address <-- pcc.address + (imm20 << 12) + (cheri_operator_i[CSET_ADDR] | cheri_operator_i[CINC_ADDR] | + cheri_operator_i[CINC_ADDR_IMM] | cheri_operator_i[CAUIPCC] | cheri_operator_i[CAUICGP]): + begin + logic clr_sealed; + logic instr_fault; + + result_data_o = addr_result; + + // for pointer operations, follow C convention and allow newptr == top + clr_sealed = cheri_operator_i[CAUIPCC] ? 1'b0 : is_cap_sealed(rf_fullcap_a); + tfcap = setaddr1_outcap; + tfcap.valid = tfcap.valid & ~clr_sealed; + result_cap_o = full2regcap(tfcap); + instr_fault = csr_dbg_tclr_fault_i & (rf_fullcap_a.valid | cheri_operator_i[CAUIPCC]) & + ~result_cap_o.valid; + cheri_wb_err_raw = instr_fault; + cheri_rf_we_raw = ~instr_fault; + cheri_ex_valid_raw = 1'b1; + end + (cheri_operator_i[CSET_BOUNDS] | cheri_operator_i[CSET_BOUNDS_IMM] | cheri_operator_i[CSET_BOUNDS_EX] | + cheri_operator_i[CRRL] | cheri_operator_i[CRAM] | cheri_operator_i[CSET_BOUNDS_RNDN]): + begin // cd <-- cs1; cd.base <-- cs1.address, cd.len <-- rs2 or imm12 + logic instr_fault; + + tfcap = cheri_operator_i[CSET_BOUNDS_RNDN] ? setbounds_rndn_outcap : setbounds_outcap; + tfcap.valid = tfcap.valid & ~is_cap_sealed(rf_fullcap_a); + + if (cheri_operator_i[CRRL]) begin + result_data_o = tfcap.rlen; + result_cap_o = NULL_REG_CAP; + end else if (cheri_operator_i[CRAM]) begin + result_data_o = tfcap.maska; + result_cap_o = NULL_REG_CAP; + end else begin + result_data_o = rf_rdata_a; + result_cap_o = full2regcap(tfcap); + end + + cheri_ex_valid_raw = set_bounds_done; + instr_fault = csr_dbg_tclr_fault_i & rf_fullcap_a.valid & ~result_cap_o.valid & + (cheri_operator_i[CSET_BOUNDS] | cheri_operator_i[CSET_BOUNDS_IMM] | + cheri_operator_i[CSET_BOUNDS_EX] | cheri_operator_i[CSET_BOUNDS_RNDN]); + cheri_rf_we_raw = ~instr_fault; + cheri_wb_err_raw = instr_fault; + end + cheri_operator_i[CCLEAR_TAG]: // cd <-- cs1; cd.tag <-- '0' + begin + result_data_o = rf_rdata_a; + result_cap_o = rf_rcap_a; + result_cap_o.valid = 1'b0; + cheri_rf_we_raw = 1'b1; + cheri_ex_valid_raw = 1'b1; + end + cheri_operator_i[CIS_SUBSET]: // rd <-- (cs1.tag == cs2.tag) && (cs2 is_subset_of cs1) + begin + result_data_o = 32'((rf_fullcap_a.valid == rf_fullcap_b.valid) && + ~addr_bound_vio && (&(rf_fullcap_a.perms | ~rf_fullcap_b.perms))); + result_cap_o = NULL_REG_CAP; + cheri_rf_we_raw = 1'b1; + cheri_ex_valid_raw = 1'b1; + end + cheri_operator_i[CIS_EQUAL]: // rd <-- (cs1 == cs2) + begin + result_data_o = 32'(is_equal(rf_fullcap_a, rf_fullcap_b, rf_rdata_a, rf_rdata_b)); + result_cap_o = NULL_REG_CAP; + cheri_rf_we_raw = 1'b1; + cheri_ex_valid_raw = 1'b1; + end + cheri_operator_i[CSUB_CAP]: // rd <-- cs1.addr - cs2.addr + begin + result_data_o = rf_rdata_a - rf_rdata_b; + result_cap_o = NULL_REG_CAP; + cheri_rf_we_raw = 1'b1; + cheri_ex_valid_raw = 1'b1; + end + cheri_operator_i[CMOVE_CAP]: // cd <-- cs1 + begin + result_data_o = rf_rdata_a; + result_cap_o = rf_rcap_a; + cheri_rf_we_raw = 1'b1; + cheri_ex_valid_raw = 1'b1; + end + cheri_operator_i[CLOAD_CAP]: + begin + lc_cglg = ~rf_fullcap_a.perms[PERM_LG]; + lc_csdlm = ~rf_fullcap_a.perms[PERM_LM]; + lc_ctag = ~rf_fullcap_a.perms[PERM_MC]; + + result_data_o = 32'h0; + result_cap_o = NULL_REG_CAP; + cheri_rf_we_raw = 1'b0; + cheri_ex_valid_raw = 1'b1; // lsu_req_done is factored in by id_stage + cheri_ex_err_raw = 1'b0; // acc err passed to LSU and processed later in WB + rf_trsv_en_o = CheriPPLBC & cheri_tsafe_en_i & lsu_req_done_i; + end + cheri_operator_i[CSTORE_CAP]: + begin + result_data_o = 32'h0; + result_cap_o = NULL_REG_CAP; + cheri_rf_we_raw = 1'b0; + cheri_ex_valid_raw = 1'b1; + cheri_ex_err_raw = 1'b0; // acc err passed to LSU and processed later in WB + csc_wcap = rf_rcap_b; + csc_wcap.valid = rf_rcap_b.valid & ~perm_vio_slc; + end + cheri_operator_i[CCSR_RW]: // cd <-- scr; scr <-- cs1 if cs1 != C0 + begin + logic [31:0] tmp32; + logic is_ztop, is_write; + reg_cap_t trcap; + logic instr_fault; + + is_ztop = (cheri_cs2_dec_i==CHERI_SCR_ZTOPC); + is_write = (rf_raddr_a_i != 0); + instr_fault = perm_vio | illegal_scr_addr; + + csr_access_o = ~instr_fault; + csr_op_o = CHERI_CSR_RW; + csr_op_en_raw = ~instr_fault && is_write && ~is_ztop; + ztop_wr_raw = ~instr_fault && is_write && is_ztop; + csr_addr_o = cheri_cs2_dec_i; + + if (cheri_cs2_dec_i == CHERI_SCR_MTCC) begin + // MTVEC/MTCC legalization (clear tag if checking fails) + // note we don't reall need set_address checks here - it's only used to update temp fields + // so that RTL behavior would match sail + scr_legalization = 1'b1; + csr_wdata_o = {rf_rdata_a[31:2], 2'b00}; + trcap = full2regcap(setaddr1_outcap); + if ((rf_rdata_a[1:0] != 2'b00) || ~rf_fullcap_a.perms[PERM_EX] || (rf_fullcap_a.otype != 0)) + trcap.valid = 1'b0; + else + trcap.valid = rf_fullcap_a.valid; + csr_wcap_o = trcap; + end else if (cheri_cs2_dec_i == CHERI_SCR_MEPCC) begin + // MEPCC legalization (clear tag if checking fails) + scr_legalization = 1'b1; + csr_wdata_o = {rf_rdata_a[31:1], 1'b0}; + trcap = full2regcap(setaddr1_outcap); + if ((rf_rdata_a[0] != 1'b0) || ~rf_fullcap_a.perms[PERM_EX] || (rf_fullcap_a.otype != 0)) + trcap.valid = 1'b0; + else + trcap.valid = rf_fullcap_a.valid; + csr_wcap_o = trcap; + end else begin + scr_legalization = 1'b0; + csr_wdata_o = rf_rdata_a; + csr_wcap_o = rf_rcap_a; + end + + if (is_ztop) begin + result_data_o = ztop_rdata_i; + result_cap_o = ztop_rcap_i; + ztop_wfcap_o = rf_fullcap_a; + ztop_wdata_o = rf_rdata_a; + end else begin + result_data_o = csr_rdata_i; + result_cap_o = csr_rcap_i; + ztop_wfcap_o = NULL_FULL_CAP; + ztop_wdata_o = 32'h0; + end + cheri_rf_we_raw = ~instr_fault; + cheri_ex_valid_raw = 1'b1; + cheri_wb_err_raw = instr_fault; + end + (cheri_operator_i[CJALR] | cheri_operator_i[CJAL]): + begin // cd <-- pcc; pcc <-- cs1/pc+offset; pcc.address[0] <--'0'; pcc.sealed <--'0' + logic [2:0] seal_type; + logic instr_fault; + + // note this is the RV32 definition of JALR arithmetic (add first then mask of lsb) + branch_target_o = {addr_result[31:1], 1'b0}; + pcc_cap_o = full2pcap(unseal_cap(rf_fullcap_a)); + // Note we can't directly use pc_if here + // (link address == pc_id + delta, but pc_if should be the next executed PC (the jump target) + // if branch prediction works) + result_data_o = pc_id_nxt; + seal_type = csr_mstatus_mie_i ? OTYPE_SENTRY_IE_BKWD : OTYPE_SENTRY_ID_BKWD; + //tfcap = seal_cap(setaddr1_outcap, seal_type); + tfcap = (rf_waddr_i == 5'h1) ? seal_cap(setaddr1_outcap, seal_type) : + setaddr1_outcap; + result_cap_o = full2regcap(tfcap); + + // problem with instr_fault: the pcc_cap.valid check causing timing issue on instr_addr_o + // -- use the speculative version for instruction fetch + // -- the ID exception (cheri_ex_err) flushes the pipeline and re-set PC so + // the speculatively fetched instruction will be flushed + // -- this is now mitigated since we no longer do address bound checking here + // but let's keep the solution for now + + instr_fault = perm_vio; + + cheri_rf_we_raw = ~instr_fault; // err -> wb exception + branch_req_raw = ~instr_fault & cheri_operator_i[CJALR]; // update PCC in CSR + // branch_req_spec_raw = 1'b1; + branch_req_spec_raw = ~instr_fault; // set fetch PC + + cheri_wb_err_raw = instr_fault; + cheri_ex_err_raw = 1'b0; + csr_set_mie_raw = ~instr_fault && cheri_operator_i[CJALR] && + ((rf_fullcap_a.otype == OTYPE_SENTRY_IE_FWD) || + (rf_fullcap_a.otype == OTYPE_SENTRY_IE_BKWD)) ; + csr_clr_mie_raw = ~instr_fault && cheri_operator_i[CJALR] && + ((rf_fullcap_a.otype == OTYPE_SENTRY_ID_FWD) || + (rf_fullcap_a.otype == OTYPE_SENTRY_ID_BKWD)) ; + cheri_ex_valid_raw = 1'b1; + end + default:; + endcase + end // always_combi + + assign is_load_cap = cheri_operator_i[CLOAD_CAP]; + assign is_store_cap = cheri_operator_i[CSTORE_CAP]; + + assign is_cap = cheri_operator_i[CLOAD_CAP] | cheri_operator_i[CSTORE_CAP]; + + // muxing between "normal cheri LSU requests (clc/csc) and CLBC + + if (WritebackStage) begin + // assert LSU req until instruction is retired (req_done from LSU) + // note if the previous instr is also a load/store, cheri_exec_id won't be asserted + // till WB is ready (lsu_resp for the previous isntr) + assign cheri_lsu_req = is_cap & cheri_exec_id_i; + end else begin + // no WB stage, only assert req in the first_cycle phase of the instruction + // (consistent with the RV32 load/store instructions) + // Here instruction won't complete till lsu_resp_valid in this case, + // keeping lsu_req asserted causes problem as LSU sees it as a new request + assign cheri_lsu_req = is_cap & cheri_exec_id_i & instr_first_cycle_i; + end + + assign cheri_lsu_we = is_store_cap; + assign cheri_lsu_addr = cs1_addr_plusimm + {29'h0, addr_incr_req_i, 2'b00}; + assign cheri_lsu_is_cap = is_cap; + + assign cheri_lsu_wdata = is_store_cap ? {csc_wcap.valid, rf_rdata_b} : 33'h0; + assign cheri_lsu_wcap = is_store_cap ? csc_wcap : NULL_REG_CAP; + + // RS1/CS1+offset is + // keep this separate to help timing on the memory interface + // - the starting address for cheri L*/S*.CAP instructions + assign cs1_imm = (is_cap|cheri_operator_i[CJALR]) ? {{20{cheri_imm12_i[11]}}, cheri_imm12_i} : 0; + + assign cs1_addr_plusimm = rf_rdata_a + cs1_imm; + + assign pc_id_nxt = pc_id_i + (instr_is_compressed_i ? 2 : 4); + + // + // shared adder for address calculation + // + always_comb begin : shared_adder + logic [31:0] tmp32a, tmp32b; + + if (cheri_operator_i[CJALR]) tmp32a = {{20{cheri_imm12_i[11]}}, cheri_imm12_i}; + else if (cheri_operator_i[CJAL]) tmp32a = {{11{cheri_imm21_i[20]}}, cheri_imm21_i}; + else if (cheri_operator_i[CAUIPCC]) tmp32a = {cheri_imm20_i[19], cheri_imm20_i, 11'h0}; + else if (cheri_operator_i[CAUICGP]) tmp32a = {cheri_imm20_i[19], cheri_imm20_i, 11'h0}; + else if (cheri_operator_i[CSET_ADDR]) tmp32a = rf_rdata_b; + else if (cheri_operator_i[CINC_ADDR]) tmp32a = rf_rdata_b; + else if (cheri_operator_i[CINC_ADDR_IMM]) tmp32a = {{20{cheri_imm12_i[11]}}, cheri_imm12_i}; + else tmp32a = 0; + + if (cheri_operator_i[CJALR]) tmp32b = rf_rdata_a; + else if (cheri_operator_i[CJAL]) tmp32b = pc_id_i; + else if (cheri_operator_i[CAUIPCC]) tmp32b = pc_id_i; + else if (cheri_operator_i[CAUICGP]) tmp32b = rf_rdata_a; + else if (cheri_operator_i[CSET_ADDR]) tmp32b = 32'h0; + else if (cheri_operator_i[CINC_ADDR]) tmp32b = rf_rdata_a; + else if (cheri_operator_i[CINC_ADDR_IMM]) tmp32b = rf_rdata_a; + else tmp32b = 0; + + addr_result = tmp32a + tmp32b; + end + + // + // Big combinational functions + // - break out to make sure we can properly gate off operands to save power + // + always_comb begin: set_address_comb + full_cap_t tfcap1; + logic [31:0] taddr1; + + // set_addr operation 1 + if (cheri_operator_i[CJAL] | cheri_operator_i[CJALR]) begin + // we don't really need the representability check here, but update_temp_fields is necessary + tfcap1 = pcc2fullcap(pcc_cap_i); // pcc to link register + taddr1 = pc_id_nxt; + end else if (cheri_operator_i[CAUIPCC]) begin + tfcap1 = pcc2fullcap(pcc_cap_i); + taddr1 = addr_result; + end else if (cheri_operator_i[CSET_ADDR] | cheri_operator_i[CINC_ADDR] | + cheri_operator_i[CINC_ADDR_IMM] | cheri_operator_i[CAUICGP]) begin + tfcap1 = rf_fullcap_a; + taddr1 = addr_result; + end else if (scr_legalization) begin + tfcap1 = rf_fullcap_a; + taddr1 = csr_wdata_o; + end else begin + tfcap1 = NULL_FULL_CAP; + taddr1 = 32'h0; + end + + // representability check only + setaddr1_outcap = set_address(tfcap1, taddr1, 0, 0); + end + + bound_req_t bound_req1, bound_req2; + + always_comb begin: set_bounds_comb + logic [31:0] newlen; + logic req_exact; + logic [31:0] tmp_addr; + full_cap_t tfcap3; + + // set_bounds + if (cheri_operator_i[CSET_BOUNDS] | cheri_operator_i[CSET_BOUNDS_RNDN]) begin + newlen = rf_rdata_b; + req_exact = 1'b0; + tfcap3 = rf_fullcap_a; + tmp_addr = rf_rdata_a; + end else if (cheri_operator_i[CSET_BOUNDS_EX]) begin + newlen = rf_rdata_b; + req_exact = 1'b1; + tfcap3 = rf_fullcap_a; + tmp_addr = rf_rdata_a; + end else if (cheri_operator_i[CSET_BOUNDS_IMM]) begin + newlen = 32'(cheri_imm12_i); // unsigned imm + req_exact = 1'b0; + tfcap3 = rf_fullcap_a; + tmp_addr = rf_rdata_a; + end else if (cheri_operator_i[CRRL] | cheri_operator_i[CRAM]) begin + newlen = rf_rdata_a; + req_exact = 1'b0; + tfcap3 = NULL_FULL_CAP; + tmp_addr = 0; + end else begin + newlen = 32'h0; + req_exact = 1'b0; + tfcap3 = NULL_FULL_CAP; + tmp_addr = 0; + end + + bound_req1 = CheriCapIT8 ? prep_bound_req_it8 (tfcap3, tmp_addr, newlen) : + prep_bound_req (tfcap3, tmp_addr, newlen); + + setbounds_outcap = set_bounds(tfcap3, tmp_addr, bound_req2, req_exact); + + setbounds_rndn_outcap = CheriCapIT8 ? set_bounds_rndn_it8(tfcap3, tmp_addr, bound_req2) : + set_bounds_rndn(tfcap3, tmp_addr, bound_req2); + end + + if (CheriSBND2) begin + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + bound_req2 <= '{0, 0, 0, 0, 0, 0}; + set_bounds_done <= 1'b0; + end else begin + bound_req2 <= bound_req1; + // set_bounds_done is asserted in the 2nd cycle of execution when SBD2 == 1 + // note in ibex it actaully is ok to hold set_bounds_done high for both cycles + // since the multicycle control logic won't look at ex_valid till the 2nd cycle + // however this is the cleaner solution. + set_bounds_done <= (cheri_operator_i[CSET_BOUNDS] | cheri_operator_i[CSET_BOUNDS_IMM] | + cheri_operator_i[CSET_BOUNDS_EX] | cheri_operator_i[CRRL] | + cheri_operator_i[CRAM]) & cheri_exec_id_i & ~set_bounds_done ; + end + end + end else begin + assign bound_req2 = bound_req1; + assign set_bounds_done = 1'b1; + end + + + + // address bound and permission checks for + // - cheri no-LSU instructions + // - cheri LSU (cap) instructions (including internal instr like LBC) + // - RV32 LSU (data) instructions + // this is a architectural access check (apply to the whole duration of an instruction) + // - based on architectural capability registers and addresses + + // - orginally we combine checking for CHERI and RV32 but it caused a combi loop + // that goes from instr_executing -> rv32_lsu_req -> lsu_error -> cheri_ex_err -> instr_executing + // it's not a real runtime issue but it does confuses timing tools so let's split for now. + // Besides - note checking/lsu_cheri_err_o is one timing critical path + logic [31:0] rv32_ls_chkaddr; + assign rv32_ls_chkaddr = rv32_lsu_addr_i; + + always_comb begin : check_rv32 + logic [31:0] top_offset; + logic [32:0] top_bound; + logic [31:0] base_bound, base_chkaddr; + logic top_vio, base_vio; + logic [32:0] top_chkaddr; + logic top_size_ok; + + // generate the address used to check top bound violation + base_chkaddr = rv32_ls_chkaddr; + + if (rv32_lsu_type_i == 2'b00) begin + top_offset = 32'h4; + top_size_ok = |rf_fullcap_a.top33[32:2]; // at least 4 bytes + end else if (rv32_lsu_type_i == 2'b01) begin + top_offset = 32'h2; + top_size_ok = |rf_fullcap_a.top33[32:1]; + end else begin + top_offset = 32'h1; + top_size_ok = |rf_fullcap_a.top33[32:0]; + end + + //top_chkaddr = base_chkaddr + top_offset; + top_chkaddr = {1'b0, base_chkaddr}; + + // top_bound = rf_fullcap_a.top33; + top_bound = rf_fullcap_a.top33 - top_offset; + base_bound = rf_fullcap_a.base32; + + top_vio = (top_chkaddr > top_bound) || ~top_size_ok; + base_vio = (base_chkaddr < base_bound); + + // timing critical (data_req_o) path - don't add any unnecssary terms. + // we will chose with is_cheri on the LSU interface later. + // for unaligned access, only check the starting (1st) address + // (if there is an error, addr_incr_req won't be thre anyway + addr_bound_vio_rv32 = (top_vio | base_vio) & ~addr_incr_req_i ; + + // main permission logic + perm_vio_vec_rv32 = 0; + + perm_vio_vec_rv32[PVIO_TAG] = ~rf_fullcap_a.valid; + perm_vio_vec_rv32[PVIO_SEAL] = is_cap_sealed(rf_fullcap_a); + perm_vio_vec_rv32[PVIO_LD] = ((~rv32_lsu_we_i) && (~rf_fullcap_a.perms[PERM_LD])); + perm_vio_vec_rv32[PVIO_SD] = (rv32_lsu_we_i && (~rf_fullcap_a.perms[PERM_SD])); + + perm_vio_rv32 = |perm_vio_vec_rv32; + end + + assign rv32_lsu_err = cheri_pmode_i & ~debug_mode_i & (addr_bound_vio_rv32 | perm_vio_rv32); + + // Cheri instr address bound checking + // -- we choose to centralize the address bound checking here + // so that we can mux the inputs and save some area + + + logic [31:0] cheri_ls_chkaddr; + assign cheri_ls_chkaddr = cs1_addr_plusimm; + + always_comb begin : check_cheri + logic [31:0] top_offset; + logic [32:0] top_bound; + logic [31:0] base_bound, base_chkaddr; + logic [32:0] top_chkaddr; + logic top_vio, base_vio, top_equal; + logic cs2_bad_type; + logic cs1_otype_0, cs1_otype_1, cs1_otype_45, cs1_otype_23; + logic cs2_otype_45; + + // generate the address used to check top bound violation + if (cheri_operator_i[CSEAL]) + base_chkaddr = rf_rdata_b; // cs2.address + else if (cheri_operator_i[CUNSEAL]) + // inCapBounds(cs2_val, zero_extend(cs1_val.otype), 1) + base_chkaddr = {28'h0, decode_otype(rf_fullcap_a.otype, rf_fullcap_a.perms[PERM_EX])}; + else if (cheri_operator_i[CIS_SUBSET]) + base_chkaddr = rf_fullcap_b.base32; // cs2.base32 + else // CLC/CSC + base_chkaddr = cheri_ls_chkaddr; // cs1.address + offset + + if (cheri_operator_i[CIS_SUBSET]) + top_chkaddr = rf_fullcap_b.top33; + else if (is_cap) // CLC/CSC + top_chkaddr = {1'b0, base_chkaddr[31:3], 3'b000}; + else + top_chkaddr = {1'b0, base_chkaddr}; + + if (cheri_operator_i[CSEAL] | cheri_operator_i[CUNSEAL]) begin + top_bound = rf_fullcap_b.top33; + base_bound = rf_fullcap_b.base32; + end else if (is_cap) begin // CLC/CSC + top_bound = {rf_fullcap_a.top33[32:3], 3'b000}; // 8-byte aligned access only + base_bound = rf_fullcap_a.base32; + end else begin + top_bound = rf_fullcap_a.top33; + base_bound = rf_fullcap_a.base32; + end + + top_vio = (top_chkaddr > top_bound); + base_vio = (base_chkaddr < base_bound); + top_equal = (top_chkaddr == top_bound); + + if (debug_mode_i) + addr_bound_vio = 1'b0; + else if (is_cap) + addr_bound_vio = top_vio | base_vio | top_equal; + else if (cheri_operator_i[CIS_SUBSET]) + addr_bound_vio = top_vio | base_vio; + else if (cheri_operator_i[CSEAL] | cheri_operator_i[CUNSEAL]) + addr_bound_vio = top_vio | base_vio | top_equal; + else + addr_bound_vio = 1'b0; + + // main permission logic + perm_vio_vec = 0; + perm_vio = 0; + perm_vio_slc = 0; + cs2_bad_type = 1'b0; + illegal_scr_addr = 1'b0; + + // otype_1: forward sentry; otype_23: forward inherit sentry; otype_45: backward sentry; + cs1_otype_0 = (rf_fullcap_a.otype == 3'h0); + cs1_otype_1 = rf_fullcap_a.perms[PERM_EX] & (rf_fullcap_a.otype == 3'h1); // fwd sentry + cs1_otype_45 = rf_fullcap_a.perms[PERM_EX] & ((rf_fullcap_a.otype == 3'h4) || (rf_fullcap_a.otype == 3'h5)); + cs1_otype_23 = rf_fullcap_a.perms[PERM_EX] & ((rf_fullcap_a.otype == 3'h2) || (rf_fullcap_a.otype == 3'h3)); + + cs2_otype_45 = rf_fullcap_b.perms[PERM_EX] & ((rf_fullcap_b.otype == 3'h4) || (rf_fullcap_b.otype == 3'h5)); + + // note cseal/unseal/cis_subject doesn't generate exceptions, + // so for all exceptions, violations can always be attributed to cs1, thus no need to further split + // exceptions based on source operands. + if (is_load_cap) begin + perm_vio_vec[PVIO_TAG] = ~rf_fullcap_a.valid; + perm_vio_vec[PVIO_SEAL] = is_cap_sealed(rf_fullcap_a); + perm_vio_vec[PVIO_LD] = ~(rf_fullcap_a.perms[PERM_LD]); + perm_vio_vec[PVIO_ALIGN] = (cheri_ls_chkaddr[2:0] != 0); + end else if (is_store_cap) begin + perm_vio_vec[PVIO_TAG] = (~rf_fullcap_a.valid); + perm_vio_vec[PVIO_SEAL] = is_cap_sealed(rf_fullcap_a); + perm_vio_vec[PVIO_SD] = ~rf_fullcap_a.perms[PERM_SD]; + perm_vio_vec[PVIO_SC] = (~rf_fullcap_a.perms[PERM_MC] && rf_fullcap_b.valid); + perm_vio_vec[PVIO_ALIGN] = (cheri_ls_chkaddr[2:0] != 0); + perm_vio_slc = ~rf_fullcap_a.perms[PERM_SL] && rf_fullcap_b.valid && + (~rf_fullcap_b.perms[PERM_GL]) ; + end else if (cheri_operator_i[CSEAL]) begin + cs2_bad_type = rf_fullcap_a.perms[PERM_EX] ? + ((rf_rdata_b[31:3]!=0)||(rf_rdata_b[2:0]==0)) : + ((|rf_rdata_b[31:4]) || (rf_rdata_b[3:0] <= 8)); + // cs2.addr check : ex: 0-7, non-ex: 9-15 + perm_vio_vec[PVIO_TAG] = ~rf_fullcap_b.valid; + perm_vio_vec[PVIO_SEAL] = is_cap_sealed(rf_fullcap_a) || is_cap_sealed(rf_fullcap_b) || + (~rf_fullcap_b.perms[PERM_SE]) || cs2_bad_type; + end else if (cheri_operator_i[CUNSEAL]) begin + perm_vio_vec[PVIO_TAG] = ~rf_fullcap_b.valid; + perm_vio_vec[PVIO_SEAL] = (~is_cap_sealed(rf_fullcap_a)) || is_cap_sealed(rf_fullcap_b) || + (~rf_fullcap_b.perms[PERM_US]); + end else if (cheri_operator_i[CJALR]) begin + perm_vio_vec[PVIO_TAG] = ~rf_fullcap_a.valid; + perm_vio_vec[PVIO_SEAL] = (is_cap_sealed(rf_fullcap_a) && (cheri_imm12_i != 0)) || + ~(((rf_waddr_i == 0) && (rf_raddr_a_i == 5'h1) && cs1_otype_45) || + ((rf_waddr_i == 0) && (rf_raddr_a_i != 5'h1) && (cs1_otype_0 || cs1_otype_1)) || + ((rf_waddr_i == 5'h1) && (cs1_otype_0 | cs1_otype_23)) || + ((rf_waddr_i != 0) && (cs1_otype_0 | cs1_otype_1))); + + perm_vio_vec[PVIO_EX] = ~rf_fullcap_a.perms[PERM_EX]; + end else if (cheri_operator_i[CCSR_RW]) begin + perm_vio_vec[PVIO_ASR] = ~pcc_cap_i.perms[PERM_SR]; + illegal_scr_addr = ~debug_mode_i & (csr_addr_o < 27); + end else begin + perm_vio_vec = 0; + end + + perm_vio = | perm_vio_vec; + + end + + // qualified by lsu_req later + // store_local error only causes tag clearing unless escalated to fault for debugging + assign cheri_lsu_err = cheri_pmode_i & ~debug_mode_i & + (addr_bound_vio | perm_vio | (csr_dbg_tclr_fault_i & perm_vio_slc)); + + // + // fault case mtval generation + // report to csr as mtval + logic ls_addr_misaligned_only; + + assign cheri_ex_err_info_o = 12'h0; // no ex stage cheri error currently + assign cheri_wb_err_info_o = cheri_wb_err_info_q; + + assign cheri_wb_err_d = cheri_wb_err_raw & cheri_exec_id_i & cheri_ex_valid_raw & ~debug_mode_i; + + // addr_bound_vio is the timing optimized version (gating data_req) + // However we need to generate full version of addr_bound_vio to match the sail exception + // priority definition (bound_vio has higher priority over alignment_error). + // this has less timing impact since it goes to a flop stage + logic addr_bound_vio_ext; + logic [32:0] cheri_top_chkaddr_ext; + + assign cheri_top_chkaddr_ext = cheri_ls_chkaddr + 8; // extend to 33 bit for compare + assign addr_bound_vio_ext = is_cap ? addr_bound_vio | (cheri_top_chkaddr_ext > rf_fullcap_a.top33) : + addr_bound_vio; + + always_comb begin : err_cause_comb + cheri_err_cause = vio_cause_enc(addr_bound_vio_ext, perm_vio_vec); + rv32_err_cause = vio_cause_enc(addr_bound_vio_rv32, perm_vio_vec_rv32); + + + ls_addr_misaligned_only = perm_vio_vec[PVIO_ALIGN] && (perm_vio_vec[PVIO_ALIGN-1:0] == 0) && ~addr_bound_vio_ext; + + // cheri_wb_err_raw is already qualified by instr + // bit 15:13: reserved + // bit 12: illegal_scr_addr + // bit 11: alignment error (load/store) + // bit 10:0 mtval as defined by CHERIoT arch spec + if (cheri_operator_i[CCSR_RW] & cheri_wb_err_raw & illegal_scr_addr & cheri_exec_id_i) + // cspecialrw trap, illegal addr, treated as illegal_insn + cheri_wb_err_info_d = {3'h0, 1'b1, 12'h0}; + else if (cheri_operator_i[CCSR_RW] & cheri_wb_err_raw & cheri_exec_id_i) + // cspecialrw traps, PERM_SR + cheri_wb_err_info_d = {5'h0, 1'b1, cheri_cs2_dec_i, cheri_err_cause}; + else if (cheri_wb_err_raw & cheri_exec_id_i) + cheri_wb_err_info_d = {5'h0, 1'b0, rf_raddr_a_i, cheri_err_cause}; + else if ((is_load_cap | is_store_cap) & cheri_lsu_err & cheri_exec_id_i) + cheri_wb_err_info_d = {4'h0, ls_addr_misaligned_only, 1'b0, rf_raddr_a_i, cheri_err_cause}; + else if (rv32_lsu_req_i & rv32_lsu_err) + cheri_wb_err_info_d = {5'h0, 1'b0, rf_raddr_a_i, rv32_err_cause}; + else + cheri_wb_err_info_d = cheri_wb_err_info_q; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + cheri_wb_err_q <= 1'b0; + cheri_wb_err_info_q <= 'h0; + end else begin + // Simple flop here works since + // -- cheri_wb_err is gated by cheri_exec_id/ex_valid + // -- all non-load/store cheriot instructions that can generate exceptions + // only takes 1 cycle in ID/EX stage + // -- faulted non-load/store instruction can only stay 1 cycle in wb_stage + cheri_wb_err_q <= cheri_wb_err_d; + cheri_wb_err_info_q <= cheri_wb_err_info_d; + end + end + + // + // muxing in cheri LSU signals with the rv32 signals + // + assign lsu_req_o = (instr_is_cheri_i ? cheri_lsu_req : rv32_lsu_req_i); + assign cpu_lsu_dec_o = ((instr_is_cheri_i && is_cap) | instr_is_rv32lsu_i); + + + assign cpu_lsu_cheri_err = instr_is_cheri_i ? cheri_lsu_err : rv32_lsu_err; + assign cpu_lsu_addr = instr_is_cheri_i ? cheri_lsu_addr : rv32_lsu_addr_i; + assign cpu_lsu_we = instr_is_cheri_i ? cheri_lsu_we : rv32_lsu_we_i; + assign cpu_lsu_wdata = instr_is_cheri_i ? cheri_lsu_wdata : {1'b0, rv32_lsu_wdata_i}; + assign cpu_lsu_is_cap = instr_is_cheri_i & cheri_lsu_is_cap; + + // muxing tbre ctrl inputs and CPU ctrl inputs + + assign lsu_cheri_err_o = ~lsu_tbre_sel_i ? cpu_lsu_cheri_err : 1'b0; + assign lsu_we_o = ~lsu_tbre_sel_i ? cpu_lsu_we : tbre_lsu_we_i; + assign lsu_addr_o = ~lsu_tbre_sel_i ? cpu_lsu_addr : tbre_lsu_addr_i; + assign lsu_wdata_o = ~lsu_tbre_sel_i ? cpu_lsu_wdata : tbre_lsu_wdata_i; + assign lsu_is_cap_o = ~lsu_tbre_sel_i ? cpu_lsu_is_cap : tbre_lsu_is_cap_i; + + assign lsu_lc_clrperm_o = (~lsu_tbre_sel_i & instr_is_cheri_i) ? cheri_lsu_lc_clrperm : 0; + assign lsu_type_o = (~lsu_tbre_sel_i & ~instr_is_cheri_i) ? rv32_lsu_type_i : 2'b00; + assign lsu_wcap_o = (~lsu_tbre_sel_i & instr_is_cheri_i) ? cheri_lsu_wcap : NULL_REG_CAP; + assign lsu_sign_ext_o = (~lsu_tbre_sel_i & ~instr_is_cheri_i) ? rv32_lsu_sign_ext_i : 1'b0; + + + // rv32 core side signals + // request phase: be nice and mux using the current EX instruction to select + + // addr_incr: + // -- must qualify addr_incr otherwise it goes to ALU and mess up non-LSU instructions + // -- however for LEC to gate this with cheri_pmode, otherwise illegal_insn will feed into addr logic + // since illegal_insn goes into instr_is_rv32lsu + // assign rv32_addr_incr_req_o = instr_is_rv32lsu_i ? addr_incr_req_i : 1'b0; // original + assign rv32_addr_incr_req_o = (~cheri_pmode_i | instr_is_rv32lsu_i) ? addr_incr_req_i : 1'b0; + + assign rv32_addr_last_o = addr_last_i; + + // req_done, resp_valid, load/store_err will be directly from LSU + + // + // Stack high watermark CSR update + // + + // Notes, + // - this should also take care of unaligned access (which increases addr only) + // (although stack access should not have any) + // - it's also ok if the prev instr gets faulted in WB, since stall_mem/data_req_allowed logic ensures + // that lsu_req won't be issued till memory response/error comes back + // - what if the instruction gets faulted later in WB stage? Also fine since worst case even if HM is + // too aggressive we will just have to spend more time zeroing out more stack area. + + assign csr_mshwm_set_o = lsu_req_o & ~lsu_cheri_err_o & lsu_we_o & + (lsu_addr_o[31:4] >= csr_mshwmb_i[31:4]) & (lsu_addr_o[31:4] < csr_mshwm_i[31:4]); + assign csr_mshwm_new_o = {lsu_addr_o[31:4], 4'h0}; + + + // + // Stack fast clearing support + // + + if (CheriStkZ) begin + logic lsu_addr_in_stkz_range, stkz_stall_q; + + assign lsu_addr_in_stkz_range = cpu_lsu_dec_o && (cpu_lsu_addr[31:4] >= stkz_base_i[31:4]) && + (cpu_lsu_addr[31:2] < stkz_ptr_i[31:2]); + + // cpu_lsu_dec_o is meant to be an early hint to help LSU to generate mux selects for + // address/ctrl/wdata (eventually to help timing on those output ports) + // - we always suppress lsu_req if stkclr active and address-in-range (to be cleared) + // - however in the first cycle we speculatively still assert cpu_lsu_dec_o to let LSU choose + // the address from cpu core (and hold back stkz/tbre_req). In the next cycle we can deassert + // cpu_lsu_dec_o to let stkz/tbre_req go through + // - we also require that lsu_req (after gated by cpu_stkz_stall0) can only go from 0 to 1 + // once in an instruction cycle. It's satisfied b/c, + // -- Note stkz_active_i is asserted synchronously by writing to the new stkz_ptr CSR. + // As such it is not possible for active to go from '0' to '1' in the middle of an + // load/store instruction when we want to keep lsu_req high while waiting for lsu_req_done + // -- Also, since the cpu_lsu_addr only increments (clc/csc/unaligned) and stkz address + // only decrements, if lsu_addr_in_range = 0 for the 1st word, it will stay 0 for 2nd + // -- Need to ensure stkz design meet those requirements + assign cpu_stall_by_stkz_o = stkz_active_i & lsu_addr_in_stkz_range; + assign cpu_grant_to_stkz_o = ~instr_first_cycle_i & stkz_stall_q; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + stkz_stall_q <= 1'b0; + end else begin + stkz_stall_q <= stkz_active_i & lsu_addr_in_stkz_range; + end + end + + end else begin + assign cpu_stall_by_stkz_o = 1'b0; + assign cpu_grant_to_stkz_o = 1'b0; + end + + // + // debug signal for FPGA only + // + logic [15:0] dbg_status; + logic [66:0] dbg_cs1_vec, dbg_cs2_vec, dbg_cd_vec; + + assign dbg_status = {4'h0, + instr_is_rv32lsu_i, rv32_lsu_req_i, rv32_lsu_we_i, rv32_lsu_err, + cheri_exec_id_i, cheri_lsu_err, rf_fullcap_a.valid, result_cap_o.valid, + addr_bound_vio, perm_vio, addr_bound_vio_rv32, perm_vio_rv32}; + + assign dbg_cs1_vec = {rf_fullcap_a.top_cor, rf_fullcap_a.base_cor, // 66:64 + rf_fullcap_a.exp, // 63:59 + rf_fullcap_a.top, rf_fullcap_a.base, // 58:41 + rf_fullcap_a.otype, rf_fullcap_a.cperms, // 40:32 + rf_rdata_a}; // 31:0 + + assign dbg_cs2_vec = {rf_fullcap_b.top_cor, rf_fullcap_b.base_cor, // 66:64 + rf_fullcap_b.exp, // 63:59 + rf_fullcap_b.top, rf_fullcap_b.base, // 58:41 + rf_fullcap_b.otype, rf_fullcap_b.cperms, // 40:32 + rf_rdata_b}; // 31:0 + + assign dbg_cd_vec = {result_cap_o.top_cor, result_cap_o.base_cor, // 66:64 + result_cap_o.exp, // 63:59 + result_cap_o.top, result_cap_o.base, // 58:41 + result_cap_o.otype, result_cap_o.cperms, // 40:32 + result_data_o}; // 31:0 + + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheri_pkg.sv b/hw/ip/cheriot-ibex/rtl/cheri_pkg.sv new file mode 100644 index 0000000..186ce55 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheri_pkg.sv
@@ -0,0 +1,1247 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +package cheri_pkg; + + // bit field widths + parameter int unsigned ADDR_W = 32; + parameter int unsigned TOP_W = 9; + parameter int unsigned TOP8_W = 8; // IT8 encoding only + parameter int unsigned BOT_W = 9; + parameter int unsigned CEXP_W = 4; + parameter int unsigned CEXP5_W = 5; // IT8 encoding only + parameter int unsigned EXP_W = 5; + parameter int unsigned OTYPE_W = 3; + parameter int unsigned CPERMS_W = 6; + parameter int unsigned PERMS_W = 12; + + parameter int unsigned REGCAP_W = 37; + + parameter bit [4:0] RESETEXP = 24; + parameter int unsigned UPPER_W = 24; + parameter bit [4:0] RESETCEXP = 15; // only used in non-IT8 encoding + + // bit index of PERMS field + // U0 SE US EX SR MC LD SL LM SD LG GL + parameter int unsigned PERM_GL = 0; // global flag + parameter int unsigned PERM_LG = 1; // load global + parameter int unsigned PERM_SD = 2; // store + parameter int unsigned PERM_LM = 3; // load mutable + parameter int unsigned PERM_SL = 4; // store local + parameter int unsigned PERM_LD = 5; // load + parameter int unsigned PERM_MC = 6; // capability load/store + parameter int unsigned PERM_SR = 7; // access system registes + parameter int unsigned PERM_EX = 8; // execution + parameter int unsigned PERM_US = 9; // unseal + parameter int unsigned PERM_SE = 10; // seal + parameter int unsigned PERM_U0 = 11; // + + parameter logic [2:0] OTYPE_SENTRY_IE_BKWD = 3'd5; + parameter logic [2:0] OTYPE_SENTRY_ID_BKWD = 3'd4; + parameter logic [2:0] OTYPE_SENTRY_IE_FWD = 3'd3; + parameter logic [2:0] OTYPE_SENTRY_ID_FWD = 3'd2; + parameter logic [2:0] OTYPE_SENTRY = 3'd1; + parameter logic [2:0] OTYPE_UNSEALED = 3'd0; + + // Compressed (regFile) capability type + typedef struct packed { + logic valid; + logic [1:0] top_cor; + logic base_cor; + logic [EXP_W-1 :0] exp; // expanded + logic [TOP_W-1 :0] top; + logic [BOT_W-1 :0] base; + logic [OTYPE_W-1 :0] otype; + logic [CPERMS_W-1:0] cperms; + logic rsvd; + } reg_cap_t; + + typedef struct packed { + logic valid; + logic [EXP_W-1 :0] exp; // expanded + logic [ADDR_W :0] top33; + logic [ADDR_W-1 :0] base32; + logic [OTYPE_W-1 :0] otype; + logic [PERMS_W-1: 0] perms; + logic [1:0] top_cor; + logic base_cor; + logic [TOP_W-1 :0] top; + logic [BOT_W-1 :0] base; + logic [CPERMS_W-1:0] cperms; + logic [31:0] maska; + logic rsvd; + logic [31:0] rlen; + } full_cap_t; + + typedef struct packed { + logic valid; + logic [EXP_W-1 :0] exp; // expanded + logic [ADDR_W :0] top33; + logic [ADDR_W-1 :0] base32; + logic [OTYPE_W-1 :0] otype; + logic [PERMS_W-1: 0] perms; + logic [CPERMS_W-1:0] cperms; + logic rsvd; + } pcc_cap_t; + + typedef struct packed { + logic [32:0] top33req; + logic [EXP_W-1:0] exp1; + logic [EXP_W-1:0] exp2; + logic [EXP_W:0] explen; + logic [EXP_W:0] expb; // this can be 32 so must be 6-bit + logic in_bound; + } bound_req_t; + + parameter reg_cap_t NULL_REG_CAP = '{0, 0, 0, 0, 0, 0, 0, 0, 0}; + parameter full_cap_t NULL_FULL_CAP = '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + parameter pcc_cap_t NULL_PCC_CAP = '{0, 0, 0, 0, 0, 0, 0, 0}; + + parameter logic [5:0] CPERMS_TX = 6'b101111; // Tx (execution root) + parameter logic [5:0] CPERMS_TM = 6'b111111; // Tm (memory data root) + parameter logic [5:0] CPERMS_TS = 6'b100111; // Tx (seal root) + + parameter pcc_cap_t PCC_RESET_CAP = '{1'b1, RESETEXP, 33'h10000_0000, 0, OTYPE_UNSEALED, 13'h1eb, CPERMS_TX, 1'b0}; // Tx (execution root) + + parameter reg_cap_t MTVEC_RESET_CAP = '{1'b1, 0, 0, RESETEXP, 9'h100, 0, OTYPE_UNSEALED, CPERMS_TX, 1'b0}; // Tx (execution root) + parameter reg_cap_t MTDC_RESET_CAP = '{1'b1, 0, 0, RESETEXP, 9'h100, 0, OTYPE_UNSEALED, CPERMS_TM, 1'b0}; // Tm + parameter reg_cap_t MEPC_RESET_CAP = '{1'b1, 0, 0, RESETEXP, 9'h100, 0, OTYPE_UNSEALED, CPERMS_TX, 1'b0}; // Tx + parameter reg_cap_t MSCRATCHC_RESET_CAP = '{1'b1, 0, 0, RESETEXP, 9'h100, 0, OTYPE_UNSEALED, CPERMS_TS, 1'b0}; // Ts + + + parameter logic [PERMS_W-1: 0] PERM_MC_IMSK = (1<<PERM_LD) | (1<<PERM_MC) | (1<<PERM_SD); + parameter logic [PERMS_W-1: 0] PERM_LC_IMSK = (1<<PERM_LD) | (1<<PERM_MC); + parameter logic [PERMS_W-1: 0] PERM_SC_IMSK = (1<<PERM_SD) | (1<<PERM_MC); + parameter logic [PERMS_W-1: 0] PERM_DD_IMSK = 0; + parameter logic [PERMS_W-1: 0] PERM_EX_IMSK = (1<<PERM_EX) | (1<<PERM_MC) | (1<<PERM_LD); + parameter logic [PERMS_W-1: 0] PERM_SE_IMSK = 0; + + // expand the perms from memory representation + function automatic logic [PERMS_W-1:0] expand_perms(logic [CPERMS_W-1:0] cperms); + logic [PERMS_W-1:0] perms; + + perms = 0; + perms[PERM_GL] = cperms[5]; + + if (cperms[4:3] == 2'b11) begin + perms[PERM_LG] = cperms[0]; + perms[PERM_LM] = cperms[1]; + perms[PERM_SL] = cperms[2]; + perms = perms | PERM_MC_IMSK; + end else if (cperms[4:2] == 3'b101) begin + perms[PERM_LG] = cperms[0]; + perms[PERM_LM] = cperms[1]; + perms = perms | PERM_LC_IMSK; + end else if (cperms[4:0] == 5'b10000) begin + perms = perms | PERM_SC_IMSK; + end else if (cperms[4:2] == 3'b100) begin + perms[PERM_SD] = cperms[0]; + perms[PERM_LD] = cperms[1]; + perms = perms | PERM_DD_IMSK; + end else if (cperms[4:3] == 2'b01) begin + perms[PERM_LG] = cperms[0]; + perms[PERM_LM] = cperms[1]; + perms[PERM_SR] = cperms[2]; + perms = perms | PERM_EX_IMSK; + end else if (cperms[4:3] == 2'b00) begin + perms[PERM_US] = cperms[0]; + perms[PERM_SE] = cperms[1]; + perms[PERM_U0] = cperms[2]; + perms = perms | PERM_SE_IMSK; + end + + return perms; + endfunction + + // test the implict permission mask (any bits not 1?) + `define TEST_IMSK(P, M) (&((P) | ~(M))) + + // compress perms field to memory representation + function automatic logic [CPERMS_W-1:0] compress_perms (logic [PERMS_W-1:0] perms, logic [1:0] unused_qqq); // unused_qqq is a place holder, just to compatible with the old encoding for now. + logic [CPERMS_W-1:0] cperms; + + // test all types encoding and determine encoding (Robert's priority order) + // Encoding explicit bits based on type + cperms = 0; + cperms[5] = perms[PERM_GL]; + + if (`TEST_IMSK(perms, PERM_EX_IMSK)) begin + cperms[0] = perms[PERM_LG]; + cperms[1] = perms[PERM_LM]; + cperms[2] = perms[PERM_SR]; + cperms[4:3] = 2'b01; + end else if (`TEST_IMSK(perms, PERM_MC_IMSK)) begin + cperms[0] = perms[PERM_LG]; + cperms[1] = perms[PERM_LM]; + cperms[2] = perms[PERM_SL]; + cperms[4:3] = 2'b11; + end else if (`TEST_IMSK(perms, PERM_LC_IMSK)) begin + cperms[0] = perms[PERM_LG]; + cperms[1] = perms[PERM_LM]; + cperms[4:2] = 3'b101; + end else if (`TEST_IMSK(perms, PERM_SC_IMSK)) begin + cperms[4:0] = 5'b10000; + end else if (perms[PERM_SD] | perms[PERM_LD]) begin + cperms[0] = perms[PERM_SD]; + cperms[1] = perms[PERM_LD]; + cperms[4:2] = 3'b100; + end else begin + cperms[0] = perms[PERM_US]; + cperms[1] = perms[PERM_SE]; + cperms[2] = perms[PERM_U0]; + cperms[4:3] = 2'b00; + end + + //$display("-------compress_perms:%t: %x - %x", $time, perms, cperms); + return cperms; + endfunction + + // handling cperms in loaded cap based on the loading cap requirment + function automatic logic [CPERMS_W-1:0] mask_clcperms (logic [CPERMS_W-1:0] cperms_in, logic [3:0] clrperm, + logic valid_in, logic sealed); + logic [CPERMS_W-1:0] cperms_out; + logic clr_gl, clr_lg, clr_sdlm; + + clr_gl = clrperm[0] & valid_in; + clr_lg = clrperm[0] & valid_in & ~sealed; + clr_sdlm = clrperm[1] & valid_in & ~sealed; // only clear SD/LM if not sealed + + cperms_out = cperms_in; + cperms_out[5] = cperms_in[5] & ~clr_gl; // GL + + if (cperms_in[4:3] == 2'b11) begin + cperms_out[0] = cperms_in[0] & ~clr_lg; // LG + cperms_out[1] = cperms_in[1] & ~clr_sdlm; // LM + cperms_out[4:2] = clr_sdlm ? 3'b101 : cperms_in[4:2]; + end else if (cperms_in[4:2] == 3'b101) begin + cperms_out[0] = cperms_in[0] & ~clr_lg; // LG + cperms_out[1] = cperms_in[1] & ~clr_sdlm; // LM + end else if (cperms_in[4:0] == 5'b10000) begin + cperms_out[4:0] = clr_sdlm? 5'h0 : cperms_in[4:0]; // clear SD will results in NULL permission + end else if (cperms_in[4:2] == 3'b100) begin + cperms_out[4] = ~(clr_sdlm & ~cperms_in[1]); // must decode to 5'h0 if both ld/sd are 0. + cperms_out[0] = cperms_in[0] & ~clr_sdlm; + end else if (cperms_in[4:3] == 2'b01) begin + cperms_out[0] = cperms_in[0] & ~clr_lg; // LG + cperms_out[1] = cperms_in[1] & ~clr_sdlm; // LM + end + + return cperms_out; + endfunction + + // caculate length (mem size) in bytes of a capability + function automatic logic[31:0] get_cap_len (full_cap_t full_cap); + logic [32:0] tmp33; + logic [31:0] result; + + tmp33 = full_cap.top33 - full_cap.base32; + result = tmp33[32]? 32'hffff_ffff: tmp33[31:0]; + + return result; + endfunction + + // obtain 32-bit representation of top + function automatic logic[32:0] get_bound33(logic [TOP_W-1:0] top, logic [1:0] cor, + logic [EXP_W-1:0] exp, logic [31:0] addr); + logic [32:0] t1, t2, mask, cor_val; + + if (cor[1]) + cor_val = {33{cor[1]}}; // negative sign extension + else + cor_val = {32'h0, (~cor[1]) & cor[0]}; + + cor_val = (cor_val << exp) << TOP_W; + mask = (33'h1_ffff_ffff << exp) << TOP_W; + + t1 = ({1'b0, addr} & mask) + cor_val; // apply correction and truncate +//$display("gb33: corval=%09x, mask=%09x, t1=%09x", cor_val, mask, t1); + t2 = {24'h0, top}; // extend to 32 bit + t1 = t1 | (t2 << exp); + + return t1; + + endfunction + + // this implementation give slightly better timing/area results + function automatic logic[32:0] get_bound33_trial(logic [TOP_W-1:0] top, logic [1:0] cor, + logic [EXP_W-1:0] exp, logic [31:0] addr); + logic [32:0] t33a, t33b, result; + logic [23:0] t24a, t24b, mask24, cor24; + + if (cor[1]) + cor24 = {24{cor[1]}}; // negative sign extension + else + cor24 = {23'h0, (~cor[1]) & cor[0]}; + + cor24 = (cor24 << exp); + mask24 = {24{1'b1}} << exp; + + t24a = ({1'b0, addr[31:9]} & mask24) + cor24; // apply correction and truncate +//$display("gb33: corval=%09x, mask=%09x, t1=%09x", cor_val, mask, t1); + t33a = {24'h0, top}; + result = {t24a, 9'h0} | (t33a << exp); + + return result; + + endfunction + + // update the top/base correction for a cap + function automatic logic [2:0] update_temp_fields(logic [TOP_W-1:0] top, logic [BOT_W-1:0] base, + logic [BOT_W-1:0] addrmi); + logic top_hi, addr_hi; + logic [2:0] res3; + + top_hi = (top < base); + addr_hi = (addrmi < base); + + // top_cor + res3[2:1] = (top_hi == addr_hi)? 2'b00 : ((top_hi && (!addr_hi))? 2'b01 : 2'b11); + + // base_cor + res3[0] = (addr_hi) ? 1 : 0; + + return res3; + endfunction + + // set address of a capability + // by default we check for representability only. + // use checktop/checkbase to check explicitly against top33/base32 bounds (pcc updates) + // * note, representability check in most cases (other than exp=24) covers the base32 check + + function automatic full_cap_t set_address (full_cap_t in_cap, logic [31:0] newptr, logic chktop, logic chkbase); + full_cap_t out_cap; + logic [32:0] tmp33; + logic [32-TOP_W:0] tmp24, mask24; + logic [2:0] tmp3; + logic [BOT_W-1:0] ptrmi9; + logic top_lt; + + out_cap = in_cap; + mask24 = {(33-TOP_W){1'b1}} << in_cap.exp; // mask24 = 0 if exp == 24 + + tmp33 = {1'b0, newptr} - {1'b0, in_cap.base32}; // extend to make sure we can see carry from MSB + tmp24 = tmp33[32:TOP_W] & mask24; + top_lt = ({1'b0, newptr} < {in_cap.top33[32:1], 1'b0}); + + if ((tmp24 != 0) || (chktop & ~top_lt) || (chkbase & tmp33[32])) + out_cap.valid = 1'b0; + + ptrmi9 = BOT_W'(newptr >> in_cap.exp); + tmp3 = update_temp_fields(out_cap.top, out_cap.base, ptrmi9); + out_cap.top_cor = tmp3[2:1]; + out_cap.base_cor = tmp3[0]; + + return out_cap; + endfunction + + // + // utility functions + // + + // return the size (bit length) of input number without leading zeros + function automatic logic [5:0] get_size(logic [31:0] din); + logic [5:0] count; + logic [31:0] a32; + int i; + + a32 = {din[31], 31'h0}; + for (i = 30; i >= 0; i--) a32[i] = a32[i+1] | din[i]; + count = thermo_dec32(a32); + + return count; + endfunction + + // return the exp of a 32-bit input (by count trailing zeros) + function automatic logic [5:0] count_tz (logic [31:0] din); + logic [5:0] count; + logic [31:0] a32, b32; + int i; + + a32 = {31'h0, din[0]}; + for (i = 1; i < 32; i++) a32[i] = a32[i-1] | din[i]; + // count = a32[31] ? thermo_dec32(~a32) : 0; // if input all zero, return 0 + count = thermo_dec32(~a32); // if input all zero, return 32 + + return count; + endfunction + + // this simply count the number of 1's in a thermoeter-encoded input vector + // (32-N zeros followed by N ones) + // + function automatic logic [5:0] thermo_dec32(logic [31:0] a32); + logic [5:0] count; + logic [31:0] b32; + + if (a32[31]) count = 32; + else begin + count[5] = 1'b0; + count[4] = a32[15]; + b32[15:0] = count[4] ? a32[31:16] : a32[15:0]; + count[3] = b32[7]; + b32[ 7:0] = count[3] ? b32[15:8] : b32[7:0]; + count[2] = b32[3]; + b32[ 3:0] = count[2] ? b32[7:4] : b32[3:0]; + count[1] = b32[1]; + b32[ 1:0] = count[1] ? b32[3:2] : b32[1:0]; + count[0] = b32[0]; + end + + return count; + endfunction + + // set bounds (top/base/exp/addr) of a capability + + // break up into 2 parts to enable 2-cycle option + function automatic bound_req_t prep_bound_req (full_cap_t in_cap, logic [31:0] addr, logic [31:0] length); + bound_req_t result; + logic [5:0] size_result; + + result.top33req = {1'b0, addr} + {1'b0, length}; // "requested" 33-bit top + result.expb = count_tz(addr); + result.explen = get_size({9'h0, length[31:9]}); // length exp without saturation + + size_result = result.explen; + result.exp1 = (size_result >= 6'(RESETCEXP)) ? EXP_W'(RESETEXP) : EXP_W'(size_result); + + size_result += 1; + result.exp2 = (size_result >= 6'(RESETCEXP)) ? EXP_W'(RESETEXP) : EXP_W'(size_result); + + // move this to prep_bound_req to share with set_bounds_rndown + // should be ok to fit this in cycle 1 since it is a straight compare + result.in_bound = ~((result.top33req > in_cap.top33) || (addr < in_cap.base32)); + + return result; + endfunction + + function automatic bound_req_t prep_bound_req_it8 (full_cap_t in_cap, logic [31:0] addr, logic [31:0] length); // IT8 encoding + bound_req_t result; + logic [4:0] size_result; + logic gt24; + logic [4:0] limit24_mask; + + result.top33req = {1'b0, addr} + {1'b0, length}; // "requested" 33-bit top + result.expb = count_tz(addr); + result.explen = get_size({9'h0, length[31:9]}); // length exp without saturation, max 23 + + // since explen <= 23, exp1 and exp must be <= 24. No need for saturation logic + result.exp1 = result.explen; + result.exp2 = result.explen + 1; + + // move this to prep_bound_req to share with set_bounds_rndown + // should be ok to fit this in cycle 1 since it is a straight compare + result.in_bound = ~((result.top33req > in_cap.top33) || (addr < in_cap.base32)); + + return result; + endfunction + + function automatic full_cap_t set_bounds (full_cap_t in_cap, logic[31:0] addr, + bound_req_t bound_req, logic req_exact); + full_cap_t out_cap; + + logic [EXP_W-1:0] exp1, exp2; + logic [32:0] top33req; + logic [BOT_W:0] base1, base2, top1, top2, len1, len2; + logic [32:0] mask1, mask2; + logic ovrflw, topoff1, topoff2, topoff; + logic baseoff1, baseoff2, baseoff; + logic tophi1, tophi2, tophi; + logic in_bound; + + out_cap = in_cap; + + top33req = bound_req.top33req; + exp1 = bound_req.exp1; + exp2 = bound_req.exp2; + in_bound = bound_req.in_bound; + + // 1st path + mask1 = {33{1'b1}} << exp1; + base1 = (BOT_W+1)'(addr >> exp1); + topoff1 = |(top33req & ~mask1); + baseoff1 = |({1'b0, addr} & ~mask1); + top1 = (BOT_W+1)'(top33req >> exp1) + (BOT_W+1)'(topoff1); + len1 = top1 - base1; + tophi1 = (top1[8:0] >= base1[8:0]); + + // overflow detection based on 1st path + ovrflw = len1[9]; + + // 2nd path in parallel + mask2 = {33{1'b1}} << exp2; + base2 = (BOT_W+1)'(addr >> exp2); + topoff2 = |(top33req & ~mask2); + baseoff2 = |({1'b0, addr} & ~mask2); + top2 = (BOT_W+1)'(top33req >> exp2) + (BOT_W+1)'(topoff2); + len2 = top2 - base2; + tophi2 = (top2[8:0] >= base2[8:0]); + + // select results + if (~ovrflw) begin + out_cap.exp = exp1; + out_cap.top = top1[TOP_W-1:0]; + out_cap.base = base1[BOT_W-1:0]; + out_cap.maska = mask1[31:0]; + out_cap.rlen = {22'h0, len1} << exp1; + topoff = topoff1; + baseoff = baseoff1; + tophi = tophi1; + end else begin + out_cap.exp = exp2; + out_cap.top = top2[TOP_W-1:0]; + out_cap.base = base2[BOT_W-1:0]; + out_cap.maska = mask2[31:0]; + out_cap.rlen = {22'h0, len2} << exp2; + topoff = topoff2; + baseoff = baseoff2; + tophi = tophi2; + end + +`ifdef CHERI_PKG_DEBUG + +$display("--- set_bounds: exact = %x, ovrflw = %x, exp1 = %x, exp2 = %x, exp = %x, len = %x", ~(topoff|baseoff), ovrflw, exp1, exp2, out_cap.exp, out_cap.rlen); +$display("--- set_bounds: b1 = %x, t1 = %x, b2 = %x, t2 = %x", base1, top1, base2, top2); +`endif + + // top/base correction values + // Note the new base == addr >> exp, so addr_hi == FALSE, thus base_cor == 0 + // as such, top_cor can only be either either 0 or +1; + out_cap.top_cor = tophi ? 2'b00 : 2'b01; + out_cap.base_cor = 1'b0; + + if (req_exact & (topoff | baseoff)) out_cap.valid = 1'b0; + + // we used the "requested top" to verify the results against original bounds + // also compare address >= old base 32 to handle exp=24 case + // exp = 24 case: can have addr < base (not covered by representibility checking); + // other exp cases: always addr >= base when out_cap.tag == 1 + if (~in_bound) + out_cap.valid = 1'b0; + + return out_cap; + endfunction + + function automatic full_cap_t set_bounds_rndn (full_cap_t in_cap, logic[31:0] addr, + bound_req_t bound_req); + full_cap_t out_cap; + + logic [EXP_W:0] explen, expb, exp_final; + logic [32:0] top33req; + logic in_bound; + logic el_gt_eb, el_gt_14, eb_gt_14; + logic tophi; + + out_cap = in_cap; + + top33req = bound_req.top33req; + explen = bound_req.explen; + expb = bound_req.expb; + in_bound = bound_req.in_bound; + + el_gt_eb = (explen > expb); + el_gt_14 = (explen > 14); + eb_gt_14 = (expb > 14); + + // final exp = min(14, e_l, e_b) + exp_final = (el_gt_eb & !eb_gt_14) ? expb : (el_gt_14 ? 14 : explen); + + // if (el_gt_eb & eb_gt_14) exp_final = 14; // min(14, min(e_l, e_b)), el > eb, eb > 14 + // else if (el_gt_eb) exp_final = expb; // min(14, min(e_l, e_b)), el > eb, eb <= 14 + // else if (el_gt_14) exp_final = 14; // min(14, min(e_l, e_b)), el <= eb, el > 14 + // else exp_final = explen; // e_l, el <= eb, el <= 14 + + out_cap.exp = exp_final; + out_cap.base = (BOT_W)'(addr >> exp_final); + + out_cap.top = (el_gt_eb | el_gt_14) ? ((BOT_W)'(out_cap.base-1)) : + ((BOT_W)'(top33req >> exp_final)); + + if (~in_bound) out_cap.valid = 1'b0; + + // top/base correction values + // Note the new base == addr >> exp, so addr_hi == FALSE, thus base_cor == 0 + // as such, top_cor can only be either either 0 or +1; + tophi = (out_cap.top >= out_cap.base); + out_cap.top_cor = tophi ? 2'b00 : 2'b01; + out_cap.base_cor = 2'b00; + + return out_cap; + endfunction + + + function automatic full_cap_t set_bounds_rndn_it8 (full_cap_t in_cap, logic[31:0] addr, // IT8 encoding + bound_req_t bound_req); + full_cap_t out_cap; + + logic [EXP_W:0] explen, expb, exp_final; + logic [32:0] top33req; + logic in_bound; + logic el_gt_eb; + logic tophi; + + out_cap = in_cap; + + top33req = bound_req.top33req; + explen = bound_req.explen; + expb = bound_req.expb; + in_bound = bound_req.in_bound; + + el_gt_eb = (explen > expb); + + exp_final = (el_gt_eb) ? expb : explen; + + out_cap.exp = exp_final; + out_cap.base = (BOT_W)'(addr >> exp_final); + + out_cap.top = (el_gt_eb) ? ((BOT_W)'(out_cap.base-1)) : ((BOT_W)'(top33req >> exp_final)); + + if (~in_bound) out_cap.valid = 1'b0; + + // top/base correction values + // Note the new base == addr >> exp, so addr_hi == FALSE, thus base_cor == 0 + // as such, top_cor can only be either either 0 or +1; + tophi = (out_cap.top >= out_cap.base); + out_cap.top_cor = tophi ? 2'b00 : 2'b01; + out_cap.base_cor = 2'b00; + + return out_cap; + endfunction + + + + // seal/unseal related functions + function automatic full_cap_t seal_cap (full_cap_t in_cap, logic [OTYPE_W-1:0] new_otype); + full_cap_t out_cap; + + out_cap = in_cap; + out_cap.otype = new_otype; + return out_cap; + endfunction + + function automatic full_cap_t unseal_cap (full_cap_t in_cap); + full_cap_t out_cap; + out_cap = in_cap; + out_cap.otype = OTYPE_UNSEALED; + return out_cap; + endfunction + + function automatic logic is_cap_sealed (full_cap_t in_cap); + logic result; + + result = (in_cap.otype != OTYPE_UNSEALED); + return result; + endfunction + + //function automatic logic is_cap_sentry (full_cap_t in_cap); + // logic result; + + // result = (in_cap.perms[PERM_EX]) && ((in_cap.otype == OTYPE_SENTRY) || (in_cap.otype == OTYPE_SENTRY_ID) || + // (in_cap.otype == OTYPE_SENTRY_IE)); + // return result; + //endfunction + + + function automatic logic [3:0] decode_otype (logic [2:0] otype3, logic perm_ex); + logic [3:0] otype4; + + otype4 = {~perm_ex & (otype3 != 0), otype3}; + return otype4; + endfunction + + // reg_cap decompression (to full_cap) + function automatic full_cap_t reg2fullcap (reg_cap_t reg_cap, logic [31:0] addr); + full_cap_t full_cap; + + full_cap.perms = expand_perms(reg_cap.cperms); + full_cap.valid = reg_cap.valid; + full_cap.exp = reg_cap.exp; + full_cap.otype = reg_cap.otype; + full_cap.top_cor = reg_cap.top_cor; + full_cap.base_cor = reg_cap.base_cor; + full_cap.top = reg_cap.top; + full_cap.base = reg_cap.base; + full_cap.cperms = reg_cap.cperms; + full_cap.rsvd = reg_cap.rsvd; + + full_cap.top33 = get_bound33(reg_cap.top, reg_cap.top_cor, reg_cap.exp, addr); + full_cap.base32 = get_bound33(reg_cap.base, {2{reg_cap.base_cor}}, reg_cap.exp, addr); + // full_cap = update_bounds(full_cap, addr); // for some reason this increases area + + full_cap.maska = 0; + full_cap.rlen = 0; + + return full_cap; + endfunction + + // full_cap compression (to reg_cap). + // note we don't recalculate top/base_cor here since the address/bounds of a capability + // won't change without an explicit instruction (only exception is PCC) + function automatic reg_cap_t full2regcap (full_cap_t full_cap); + reg_cap_t reg_cap; + + reg_cap = NULL_REG_CAP; + reg_cap.valid = full_cap.valid; + reg_cap.top_cor = full_cap.top_cor; + reg_cap.base_cor = full_cap.base_cor; + reg_cap.exp = full_cap.exp; + reg_cap.top = full_cap.top; + reg_cap.base = full_cap.base; + reg_cap.cperms = full_cap.cperms; + reg_cap.rsvd = full_cap.rsvd; + reg_cap.otype = full_cap.otype; + + return reg_cap; + endfunction + + // pcc_cap expansion (to full_cap). + // -- pcc is a special case since the address (PC) moves around.. + // so have to adjust correction factors and validate bounds here + // function automatic full_cap_t pcc2fullcap (pcc_cap_t pcc_cap, logic [31:0] pc_addr); + function automatic full_cap_t pcc2fullcap (pcc_cap_t in_pcap); + full_cap_t pcc_fullcap; + + pcc_fullcap.valid = in_pcap.valid; + pcc_fullcap.exp = in_pcap.exp; + pcc_fullcap.top33 = in_pcap.top33; + pcc_fullcap.base32 = in_pcap.base32; + pcc_fullcap.otype = in_pcap.otype; + pcc_fullcap.perms = in_pcap.perms; + pcc_fullcap.top_cor = 2'b0; // will be updated by set_address() + pcc_fullcap.base_cor = 1'b0; + pcc_fullcap.top = TOP_W'(in_pcap.top33 >> (in_pcap.exp)); + pcc_fullcap.base = BOT_W'(in_pcap.base32 >> (in_pcap.exp)); + pcc_fullcap.cperms = in_pcap.cperms; + pcc_fullcap.maska = 0; // not used in pcc_cap + pcc_fullcap.rsvd = in_pcap.rsvd; + pcc_fullcap.rlen = 0; // not used in pcc_cap + + return pcc_fullcap; + endfunction + + // compress full_cap to pcc_cap + function automatic pcc_cap_t full2pcap (full_cap_t full_cap); + pcc_cap_t pcc_cap; + + pcc_cap.valid = full_cap.valid; + pcc_cap.exp = full_cap.exp; + pcc_cap.top33 = full_cap.top33; + pcc_cap.base32 = full_cap.base32; + pcc_cap.otype = full_cap.otype; + pcc_cap.perms = full_cap.perms; + pcc_cap.cperms = full_cap.cperms; + pcc_cap.rsvd = full_cap.rsvd; + + return pcc_cap; + endfunction + + function automatic reg_cap_t pcc2mepcc (pcc_cap_t pcc_cap, logic [31:0] address, logic clrtag); + reg_cap_t reg_cap; + full_cap_t tfcap0, tfcap1; + + tfcap0 = pcc2fullcap(pcc_cap); + // Still need representability check to cover save_pc_if and save_pc_wb cases + tfcap1 = set_address(tfcap0, address, 0, 0); + + reg_cap = full2regcap(tfcap1); + if (clrtag) reg_cap.valid = 1'b0; + + return reg_cap; + endfunction + + // + // pack/unpack the cap+addr between reg and memory + // format 0: lsw32 = addr, msw33 = cap fields + // + // p’7 otype’3 E’4 B’9 T’9 + localparam integer RSVD_LO = 31; + localparam integer CPERMS_LO = 25; + localparam integer OTYPE_LO = 22; + localparam integer CEXP_LO = 18; + localparam integer CEXP5_LO = 17; // IT8 encoding only + localparam integer TOP_LO = 9; + localparam integer BASE_LO = 0; + + // mem2reg, cap meta data, original cap bound encoding, memfmt0 + function automatic reg_cap_t mem2regcap_fmt0 (logic [32:0] msw, logic [32:0] addr33, logic [3:0] clrperm); + reg_cap_t regcap; + logic [EXP_W-1:0] tmp5; + logic [2:0] tmp3; + logic [CPERMS_W-1:0] cperms_mem; + logic [BOT_W-1:0] addrmi9; + logic sealed; + logic valid_in; + + valid_in = msw[32] & addr33[32]; + regcap.valid = valid_in & ~clrperm[3]; + + tmp5 = {1'b0, msw[CEXP_LO+:CEXP_W]}; + if (tmp5 == EXP_W'(RESETCEXP)) tmp5 = RESETEXP; + regcap.exp = tmp5; + + regcap.top = msw[TOP_LO+:TOP_W]; + regcap.base = msw[BASE_LO+:BOT_W]; + regcap.otype = msw[OTYPE_LO+:OTYPE_W]; + + sealed = (regcap.otype != OTYPE_UNSEALED); + cperms_mem = msw[CPERMS_LO+:CPERMS_W]; + regcap.cperms = mask_clcperms(cperms_mem, clrperm, regcap.valid, sealed); + addrmi9 = BOT_W'({1'b0, addr33[31:0]} >> regcap.exp); // ignore the tag valid bit + tmp3 = update_temp_fields(regcap.top, regcap.base, addrmi9); + regcap.top_cor = tmp3[2:1]; + regcap.base_cor = tmp3[0]; + + regcap.rsvd = msw[RSVD_LO]; + + return regcap; + + endfunction + + // mem2reg, cap meta data, IT8 encoding, memfmt0 + function automatic reg_cap_t mem2regcap_it8_fmt0 (logic [32:0] msw, logic [32:0] addr33, logic [3:0] clrperm); // IT8 + reg_cap_t regcap; + logic [EXP_W-1:0] cexp; + logic [TOP_W-2:0] top8, base8; + logic [TOP_W-1:0] top9, base9; + logic denorm, ltop, btop, ttop, tcin; + logic top_hi, addr_hi; + logic [2:0] res3; + + logic [CPERMS_W-1:0] cperms_mem; + logic [BOT_W-1:0] addrmi9; + logic sealed; + logic valid_in; + + valid_in = msw[32] & addr33[32]; + regcap.valid = valid_in & ~clrperm[3]; + + cexp = msw[CEXP5_LO+:CEXP5_W]; + denorm = (cexp == 0); + + btop = msw[BASE_LO+BOT_W-1]; + base8 = msw[BASE_LO+:(BOT_W-1)]; + top8 = msw[TOP_LO+:(TOP_W-1)]; + + tcin = (top8 < base8); // can actually merge it with t_hi in update_temp_fields QQQ + ltop = ~denorm; + ttop = ltop ^ tcin ^ btop; + + regcap.exp = cexp ^ {5{~denorm}}; // this is the ^0b11111 part + top9 = {ttop, top8}; + base9 = {btop, base8}; + regcap.top = top9; + regcap.base = base9; + + regcap.otype = msw[OTYPE_LO+:OTYPE_W]; + + sealed = (regcap.otype != OTYPE_UNSEALED); + cperms_mem = msw[CPERMS_LO+:CPERMS_W]; + regcap.cperms = mask_clcperms(cperms_mem, clrperm, regcap.valid, sealed); + addrmi9 = BOT_W'({1'b0, addr33[31:0]} >> regcap.exp); // ignore the tag valid bit + + // update temp fields + // tmp3 = update_temp_fields(regcap.top, regcap.base, addrmi9); + // top_hi = (top < base); + top_hi = (btop ^ ttop) ? ~ttop : tcin; + addr_hi = (addrmi9 < base9); + + regcap.top_cor = (top_hi == addr_hi)? 2'b00 : ((top_hi && (!addr_hi))? 2'b01 : 2'b11); + regcap.base_cor = (addr_hi) ? 1'b1 : 1'b0; + + regcap.rsvd = msw[RSVD_LO]; + + return regcap; + + endfunction + + // reg to mem, meta data, original cap bound encoding, memfmt0 + function automatic logic[32:0] reg2memcap_fmt0 (reg_cap_t regcap); + + logic [32:0] msw; + + msw[32] = regcap.valid ; + + msw[CEXP_LO+:CEXP_W] = (regcap.exp == RESETEXP) ? RESETCEXP : regcap.exp[CEXP_W-1:0]; + msw[TOP_LO+:TOP_W] = regcap.top ; + msw[BASE_LO+:BOT_W] = regcap.base ; + msw[OTYPE_LO+:OTYPE_W] = regcap.otype ; + msw[CPERMS_LO+:CPERMS_W] = regcap.cperms; + msw[RSVD_LO] = regcap.rsvd; + + return msw; + + endfunction + + // reg to mem, meta data, IT8 encoding, memfmt0 + function automatic logic[32:0] reg2memcap_it8_fmt0 (reg_cap_t regcap); // IT8 + + logic [32:0] msw; + logic denorm, ltop, cor; + logic [9:0] top10, base10, len10; + + cor = (regcap.top_cor == {2{regcap.base_cor}}); + top10 = {~cor, regcap.top}; + base10 = {1'b0, regcap.base}; + len10 = top10 - base10; + ltop = len10[9] | len10[8]; + + denorm = (regcap.exp == 0) && ~ltop; + + msw[32] = regcap.valid; + + msw[CEXP5_LO+:CEXP5_W] = regcap.exp ^ {5{~denorm}}; + msw[TOP_LO+:(TOP_W-1)] = regcap.top[7:0]; + msw[BASE_LO+:BOT_W] = regcap.base ; + msw[OTYPE_LO+:OTYPE_W] = regcap.otype ; + msw[CPERMS_LO+:CPERMS_W] = regcap.cperms; + msw[RSVD_LO] = regcap.rsvd; + + return msw; + + endfunction + + // + // pack/unpack the cap+addr between reg and memory + // format 1: lsw32 = RSVD+EXP+T+B+A9, msw32 = CPERMS+OTYPE+A23 + // + + // mem to reg, meta data, original cap bound encoding, memfmt1 + function automatic reg_cap_t mem2regcap_fmt1 (logic [32:0] msw, logic [32:0] lsw, logic [3:0] clrperm); + reg_cap_t regcap; + logic [2:0] tmp3; + logic sealed; + logic [8:0] addrmi9; + logic [CPERMS_W-1:0] cperms_mem; + logic valid_in; + + // lsw is now EXP+B+T+A + valid_in = msw[32] & lsw[32]; + regcap.valid = valid_in & ~clrperm[3]; + regcap.exp = (lsw[30:27] == RESETCEXP) ? RESETEXP : {1'b0, lsw[30:27]}; + regcap.base = lsw[26:18]; + regcap.top = lsw[17:9]; + addrmi9 = (lsw[30:27] == RESETCEXP) ? {1'b0, lsw[8:1]} : lsw[8:0]; + + regcap.otype = msw[25:23]; + sealed = (regcap.otype != OTYPE_UNSEALED); + + // cperms_mem = {lsw[31], msw[31:26]}; + cperms_mem = msw[31:26]; + regcap.cperms = mask_clcperms(cperms_mem, clrperm, regcap.valid, sealed); + regcap.rsvd = lsw[31]; + + tmp3 = update_temp_fields(regcap.top, regcap.base, addrmi9); + regcap.top_cor = tmp3[2:1]; + regcap.base_cor = tmp3[0]; + + return regcap; + + endfunction + + + // mem to reg, meta data, IT8 encoding, memfmt1 + function automatic reg_cap_t mem2regcap_it8_fmt1 (logic [32:0] msw, logic [32:0] lsw, logic [3:0] clrperm); // xyz + reg_cap_t regcap; + logic [EXP_W-1:0] cexp; + logic [TOP_W-2:0] top8, base8; + logic [TOP_W-1:0] top9, base9; + logic denorm, ltop, btop, ttop, tcin; + logic top_hi, addr_hi; + logic [2:0] res3; + + logic sealed; + logic [8:0] addrmi9; + logic [CPERMS_W-1:0] cperms_mem; + logic valid_in; + + + // lsw is now EXP+T+B+A + valid_in = msw[32] & lsw[32]; + regcap.valid = valid_in & ~clrperm[3]; + + cexp = lsw[30:26]; + denorm = (cexp == 0); + + btop = lsw[17]; + base8 = lsw[16:9]; + top8 = lsw[25:18]; + + tcin = (top8 < base8); // can actually merge it with t_hi in update_temp_fields QQQ + ltop = ~denorm; + ttop = ltop ^ tcin ^ btop; + + regcap.exp = cexp ^ {5{~denorm}}; // this is the ^0b11111 part + top9 = {ttop, top8}; + base9 = {btop, base8}; + regcap.top = top9; + regcap.base = base9; + + // (regcap.exp >= RESETEXP); + addrmi9 = (regcap.exp[4] & regcap.exp[3]) ? {1'b0, lsw[8:1]} : lsw[8:0]; + + regcap.otype = msw[25:23]; + sealed = (regcap.otype != OTYPE_UNSEALED); + + // cperms_mem = {lsw[31], msw[31:26]}; + cperms_mem = msw[31:26]; + regcap.cperms = mask_clcperms(cperms_mem, clrperm, regcap.valid, sealed); + regcap.rsvd = lsw[31]; + + // tmp3 = update_temp_fields(regcap.top, regcap.base, addrmi9); + top_hi = (btop ^ ttop) ? ~ttop : tcin; + addr_hi = (addrmi9 < base9); + + regcap.top_cor = (top_hi == addr_hi)? 2'b00 : ((top_hi && (!addr_hi))? 2'b01 : 2'b11); + regcap.base_cor = (addr_hi) ? 1'b1 : 1'b0; + + return regcap; + + endfunction + + // mem to reg, addr32, both original and IT8 encoding, memfmt1 + function automatic logic[32:0] mem2regaddr_fmt1 (logic [32:0] msw, logic [32:0] lsw, reg_cap_t regcap); // xyz + logic [32:0] addr33; + logic [31:0] addrmi, addrhi, addrlo; + logic [31:0] mask1, mask2; + + // (regcap.exp >= RESETEXP) + if (regcap.exp[4] & regcap.exp[3]) begin + addrhi = 32'h0; + addrmi = {lsw[8:0], 23'h0}; + addrlo = {9'h0, msw[22:0]}; + end else begin + addrmi = {23'h0, lsw[8:0]} << regcap.exp; + mask1 = {32{1'b1}} << regcap.exp; + mask2 = mask1 << 9; + addrhi = ({9'h0, msw[22:0]} << 9) & mask2; + addrlo = {9'h0, msw[22:0]} & (~mask1); + end + + addr33 = {lsw[32], addrhi | addrmi | addrlo}; + + return addr33; + endfunction + + // reg to mem, original cap bound encoding, memfmt1 + function automatic logic[65:0] reg2mem_fmt1 (reg_cap_t reg_cap, logic[31:0] addr); + + logic [32:0] msw, lsw; + logic [31:0] mask1, mask2; + + msw[32] = reg_cap.valid; + msw[31:26] = reg_cap.cperms[5:0]; + msw[25:23] = reg_cap.otype; + lsw[32] = reg_cap.valid ; + lsw[31] = reg_cap.rsvd; + lsw[26:18] = reg_cap.base; + lsw[17:9] = reg_cap.top; + + if (reg_cap.exp == RESETEXP) begin + msw[22:0] = addr[22:0]; + lsw[30:27] = RESETCEXP; + lsw[8:0] = addr[31:23]; + end else begin + mask1 = {32{1'b1}} << reg_cap.exp; + mask2 = mask1 << 9; + + msw[22:0] = 23'((addr & ~mask1) | ((addr & mask2) >> 9)); + lsw[30:27] = reg_cap.exp[CEXP_W-1:0]; + lsw[8:0] = 9'(addr >> reg_cap.exp); + end + + return {msw, lsw}; + + endfunction + + // reg to mem, IT8 encoding, memfmt1 + function automatic logic[65:0] reg2mem_it8_fmt1 (reg_cap_t regcap, logic[31:0] addr); // xyz + + logic [32:0] msw, lsw; + logic [31:0] mask1, mask2; + logic denorm, ltop, cor; + logic [9:0] top10, base10, len10; + + cor = (regcap.top_cor == {2{regcap.base_cor}}); + top10 = {~cor, regcap.top}; + base10 = {1'b0, regcap.base}; + len10 = top10-base10; + ltop = len10[9] | len10[8]; + + denorm = (regcap.exp == 0) && ~ltop; + + msw[32] = regcap.valid; + msw[31:26] = regcap.cperms[5:0]; + msw[25:23] = regcap.otype; + lsw[32] = regcap.valid ; + lsw[31] = regcap.rsvd; + lsw[30:26] = regcap.exp ^ {5{~denorm}} ; + lsw[25:18] = regcap.top[7:0]; + lsw[17:9] = regcap.base; + + // (regcap.exp >= RESETEXP) + if (regcap.exp[4] & regcap.exp[3]) begin + msw[22:0] = addr[22:0]; + lsw[8:0] = addr[31:23]; + end else begin + mask1 = {32{1'b1}} << regcap.exp; + mask2 = mask1 << 9; + msw[22:0] = 23'((addr & ~mask1) | ((addr & mask2) >> 9)); + lsw[8:0] = 9'(addr >> regcap.exp); + end + + return {msw, lsw}; + + endfunction + + // simply cast regcap to a 38-bit vector. + // we can do this with systemverilog casting but let's be explicit here + function automatic logic [REGCAP_W-1:0] reg2vec (reg_cap_t regcap); + + logic [REGCAP_W-1:0] vec_out; + + vec_out[REGCAP_W-1] = regcap.valid ; + vec_out[34+:2] = regcap.top_cor; + vec_out[33+:1] = regcap.base_cor; + vec_out[28+:EXP_W] = regcap.exp; + vec_out[19+:TOP_W] = regcap.top ; + vec_out[10+:BOT_W] = regcap.base ; + vec_out[7+:OTYPE_W] = regcap.otype ; + vec_out[6+:1] = regcap.rsvd; + vec_out[0+:CPERMS_W] = regcap.cperms; + + return vec_out; + endfunction + + function automatic reg_cap_t vec2reg (logic [REGCAP_W-1:0] vec_in); + + reg_cap_t regcap; + + regcap.valid = vec_in[REGCAP_W-1]; + regcap.top_cor = vec_in[34+:2]; + regcap.base_cor = vec_in[33+:1]; + regcap.exp = vec_in[28+:EXP_W]; + regcap.top = vec_in[19+:TOP_W]; + regcap.base = vec_in[10+:BOT_W]; + regcap.otype = vec_in[7+:OTYPE_W]; + regcap.rsvd = vec_in[6+:1]; + regcap.cperms = vec_in[0+:CPERMS_W]; + + return regcap; + endfunction + + // test whether 2 caps are equal + function automatic logic is_equal (full_cap_t cap_a, full_cap_t cap_b, + logic [31:0] addra, logic[31:0] addrb); + + is_equal = (cap_a.valid == cap_b.valid) && + (cap_a.top == cap_b.top) && (cap_a.base == cap_b.base) && + (cap_a.cperms == cap_b.cperms) && (cap_a.rsvd == cap_b.rsvd) && + (cap_a.exp == cap_b.exp) && (cap_a.otype == cap_b.otype) && + (addra == addrb); + return is_equal; + + endfunction + + // clear tag of a regcap if needed, otherwise simply pass through + function automatic reg_cap_t and_regcap_tag (reg_cap_t in_cap, logic tag_mask); + reg_cap_t out_cap; + + out_cap = in_cap; + out_cap.valid = in_cap.valid & tag_mask; + return out_cap; + + endfunction + + // parameters and constants + + parameter logic[6:0] CHERI_INSTR_OPCODE = 7'h5b; + parameter int OPDW = 36; // Must >= number of cheri operator/instructions we support + + typedef enum logic [5:0] { + CGET_PERM = 6'h00, + CGET_TYPE = 6'h01, + CGET_BASE = 6'h02, + CGET_LEN = 6'h03, + CGET_TAG = 6'h04, + CGET_TOP = 6'h05, + CGET_HIGH = 6'h06, + CGET_ADDR = 6'h07, + CSEAL = 6'h08, + CUNSEAL = 6'h09, + CAND_PERM = 6'h0a, + CSET_ADDR = 6'h0b, + CINC_ADDR = 6'h0c, + CINC_ADDR_IMM = 6'h0d, + CSET_BOUNDS = 6'h0e, + CSET_BOUNDS_EX = 6'h0f, + CSET_BOUNDS_IMM = 6'h10, + CIS_SUBSET = 6'h11, + CIS_EQUAL = 6'h12, + CMOVE_CAP = 6'h13, + CSUB_CAP = 6'h14, + CCLEAR_TAG = 6'h15, + CLOAD_CAP = 6'h16, + CSET_HIGH = 6'h17, + CSTORE_CAP = 6'h18, + CCSR_RW = 6'h19, + CJALR = 6'h1a, + CJAL = 6'h1b, + CAUIPCC = 6'h1c, + CAUICGP = 6'h1d, + CRRL = 6'h1e, + CRAM = 6'h1f, + CSET_BOUNDS_RNDN = 6'h20 + } cheri_op_e; + + typedef enum logic [4:0] { + CHERI_CSR_NULL, + CHERI_CSR_RW + } cheri_csr_op_e; + + parameter logic [4:0] CHERI_SCR_MEPCC = 5'd31; + parameter logic [4:0] CHERI_SCR_MSCRATCHC = 5'd30; + parameter logic [4:0] CHERI_SCR_MTDC = 5'd29; + parameter logic [4:0] CHERI_SCR_MTCC = 5'd28; + parameter logic [4:0] CHERI_SCR_ZTOPC = 5'd27; + parameter logic [4:0] CHERI_SCR_DSCRATCHC1 = 5'd26; + parameter logic [4:0] CHERI_SCR_DSCRATCHC0 = 5'd25; + parameter logic [4:0] CHERI_SCR_DEPCC = 5'd24; + + // permission violations + parameter int unsigned W_PVIO = 8; + + parameter logic [2:0] PVIO_TAG = 3'h0; + parameter logic [2:0] PVIO_SEAL = 3'h1; + parameter logic [2:0] PVIO_EX = 3'h2; + parameter logic [2:0] PVIO_LD = 3'h3; + parameter logic [2:0] PVIO_SD = 3'h4; + parameter logic [2:0] PVIO_SC = 3'h5; + parameter logic [2:0] PVIO_ASR = 3'h6; + parameter logic [2:0] PVIO_ALIGN = 3'h7; + + + function automatic logic [4:0] vio_cause_enc (logic bound_vio, logic[W_PVIO-1:0] perm_vio_vec); + logic [4:0] vio_cause; + + if (perm_vio_vec[PVIO_TAG]) + vio_cause = 5'h2; + else if (perm_vio_vec[PVIO_SEAL]) + vio_cause = 5'h3; + else if (perm_vio_vec[PVIO_EX]) + vio_cause = 5'h11; + else if (perm_vio_vec[PVIO_LD]) + vio_cause = 5'h12; + else if (perm_vio_vec[PVIO_SD]) + vio_cause = 5'h13; + else if (perm_vio_vec[PVIO_SC]) + vio_cause = 5'h15; + else if (perm_vio_vec[PVIO_ASR]) + vio_cause = 5'h18; + else if (bound_vio) + vio_cause = 5'h1; + else + vio_cause = 5'h0; + + return vio_cause; + endfunction + +endpackage
diff --git a/hw/ip/cheriot-ibex/rtl/cheri_regfile.sv b/hw/ip/cheriot-ibex/rtl/cheri_regfile.sv new file mode 100644 index 0000000..27c636a --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheri_regfile.sv
@@ -0,0 +1,384 @@ + +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +module cheri_regfile import cheri_pkg::*; #( + parameter int unsigned NREGS = 32, + parameter int unsigned NCAPS = 32, + parameter bit RegFileECC = 1'b0, + parameter int unsigned DataWidth = 32, + parameter bit CheriPPLBC = 1'b0, + parameter bit TRVKBypass = 1'b1 +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + input logic par_rst_ni, + + //Read port R1 + input logic [4:0] raddr_a_i, + output logic [DataWidth-1:0] rdata_a_o, + output reg_cap_t rcap_a_o, + + //Read port R2 + input logic [4:0] raddr_b_i, + output logic [DataWidth-1:0] rdata_b_o, + output reg_cap_t rcap_b_o, + + // Write port W1 + input logic [4:0] waddr_a_i, + input logic [DataWidth-1:0] wdata_a_i, + input reg_cap_t wcap_a_i, + input logic we_a_i, // we always write both cap & data in parallel + + // Tag reservation and revocation port + output logic [31:0] reg_rdy_o, + input logic [4:0] trvk_addr_i, + input logic trvk_en_i, + input logic trvk_clrtag_i, + input logic [6:0] trvk_par_i, // make sure this is included in lockstep compare + input logic [4:0] trsv_addr_i, + input logic trsv_en_i, + input logic [6:0] trsv_par_i, + + output logic alert_o +); + + localparam logic [6:0] DefParBits[0:31] = '{7'h27,7'h0d,7'h6b,7'h41,7'h62,7'h48,7'h2e,7'h04, + 7'h1f,7'h35,7'h53,7'h79,7'h5a,7'h70,7'h16,7'h3c, + 7'h6e,7'h44,7'h22,7'h08,7'h2b,7'h01,7'h67,7'h4d, + 7'h56,7'h7c,7'h1a,7'h30,7'h13,7'h39,7'h5f,7'h75}; + + localparam logic [6:0] TrvkParIncr = 7'h15; + localparam logic [6:0] NullParBits = 7'h2a; // 7-bit parity for 32'h0 + + logic [31:0] rf_reg [31:0]; + logic [31:0] rf_reg_q [NREGS-1:1]; + + logic [6:0] rf_reg_par [31:0]; + logic [6:0] rf_reg_par_q [NREGS-1:0]; + + reg_cap_t rf_cap [31:0]; + reg_cap_t rf_cap_q [NCAPS-1:1]; + + reg_cap_t rcap_a, rcap_b; + + logic [NREGS-1:1] we_a_dec; + logic [NREGS-1:1] trvk_dec, trsv_dec; + logic [31:0] reg_rdy_vec; + + logic pplbc_alert; + + always_comb begin : we_a_decoder + for (int unsigned i = 1; i < NREGS; i++) begin + we_a_dec[i] = (waddr_a_i == 5'(i)) ? we_a_i : 1'b0; + trvk_dec[i] = CheriPPLBC ? (trvk_addr_i == 5'(i)) : 1'b0; + trsv_dec[i] = CheriPPLBC ? (trsv_addr_i == 5'(i)) : 1'b0; + end + end + + // No flops for R0 as it's hard-wired to 0 + for (genvar i = 1; i < NREGS; i++) begin : g_rf_flops + + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rf_reg_q[i] <= 32'h0; + end else if (we_a_dec[i]) begin + rf_reg_q[i] <= wdata_a_i[31:0]; + end + end + + if (RegFileECC) begin : g_reg_par + logic [6:0] wdata_par; + logic trvk_clr_we; + + assign trvk_clr_we = CheriPPLBC & trvk_dec[i] & trvk_en_i & trvk_clrtag_i; + assign wdata_par = wdata_a_i[DataWidth-1:DataWidth-7]; + + // split reset of data and parity to detect spurious reset (fault protection) + always_ff @(posedge clk_i or negedge par_rst_ni) begin + if (!par_rst_ni) begin + rf_reg_par_q[i] <= DefParBits[i]; + end else if (trvk_clr_we && we_a_dec[i]) begin + rf_reg_par_q[i] <= wdata_par ^ TrvkParIncr; + end else if (trvk_clr_we) begin + // update parity bits + rf_reg_par_q[i] <= rf_reg_par_q[i] ^ TrvkParIncr; + end else if (we_a_dec[i]) begin + rf_reg_par_q[i] <= wdata_par; + end + end + end else begin : g_no_reg_par + assign rf_reg_par_q[i] = 7'h0; + end // gen reg_par + + end // g_rf_flops + + + assign rf_reg[0] = 32'h0; + assign rf_reg_par[0] = DefParBits[0]; + for (genvar i=1; i<32 ; i++) begin + if (i < NREGS) begin + assign rf_reg[i] = rf_reg_q[i]; + assign rf_reg_par[i] = rf_reg_par_q[i]; + end else begin + assign rf_reg[i] = 0; + assign rf_reg_par[i] = DefParBits[i]; + end + end + + assign rdata_a_o = DataWidth'({rf_reg_par[raddr_a_i], rf_reg[raddr_a_i]}); + assign rdata_b_o = DataWidth'({rf_reg_par[raddr_b_i], rf_reg[raddr_b_i]}); + + // capability meta data (MSW) + for (genvar i = 1; i < NCAPS; i++) begin : g_cap_flops + logic trvk_clr_we; + + assign trvk_clr_we = CheriPPLBC & trvk_dec[i] & trvk_en_i & trvk_clrtag_i; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rf_cap_q[i] <= NULL_REG_CAP; + end else if (trvk_clr_we && we_a_dec[i]) begin + rf_cap_q[i] <= and_regcap_tag(wcap_a_i, 1'b0); + end else if (trvk_clr_we) begin + // prioritize revocation (later in pipeline) + rf_cap_q[i] <= and_regcap_tag(rf_cap_q[i], 1'b0); + end else if (we_a_dec[i]) begin + rf_cap_q[i] <= wcap_a_i; + end + end + end + + assign rf_cap[0] = NULL_REG_CAP; + for (genvar i=1; i<32 ; i++) begin + if (i < NCAPS) begin + assign rf_cap[i] = rf_cap_q[i]; + end else begin + assign rf_cap[i] = NULL_REG_CAP; + end + end + + assign rcap_a = rf_cap[raddr_a_i]; + assign rcap_b = rf_cap[raddr_b_i]; + + if (CheriPPLBC) begin : g_regrdy + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) + reg_rdy_vec[0] <= 1'b1; + end + + for (genvar i=1; i<NCAPS; i++) begin + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) + reg_rdy_vec[i] <= 1'b1; + else if (trsv_dec[i] & trsv_en_i) // prioritize trsv t + reg_rdy_vec[i] <= 1'b0; + else if (trvk_dec[i] & trvk_en_i) + reg_rdy_vec[i] <= 1'b1; + end // always_ff + end + + // unused bits + for (genvar i=NCAPS; i<32; i++) begin + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) + reg_rdy_vec[i] <= 1'b1; + end + end + + // build the shadow copy of reg_rdy_vec for fault protection + if (RegFileECC) begin : gen_shdw + logic [4:0] trvk_addr_q; + logic trvk_en_q; + logic trvk_clrtag_q; + logic [6:0] trvk_par_q; + logic [4:0] trsv_addr_q; + logic trsv_en_q; + logic [6:0] trsv_par_q; + + logic [31:0] reg_rdy_vec_shdw, reg_rdy_vec_q; + logic [NREGS-1:1] trvk_dec_shdw, trsv_dec_shdw; + logic shdw_mismatch_err, cap_rvk_err; + + + always_comb begin + for (int unsigned i = 1; i < NREGS; i++) begin + trvk_dec_shdw[i] = (trvk_addr_q == 5'(i)); + trsv_dec_shdw[i] = (trsv_addr_q == 5'(i)); + end + end + + always_ff @(posedge clk_i or negedge par_rst_ni) begin + if (!par_rst_ni) begin + trvk_addr_q <= 5'h0; + trvk_en_q <= 1'b0; + trvk_clrtag_q <= 1'b0; + trvk_par_q <= NullParBits; + trsv_addr_q <= 5'h0; + trsv_en_q <= 1'b0; + trsv_par_q <= NullParBits; + reg_rdy_vec_q <= {32{1'b1}}; + end else begin + trvk_addr_q <= trvk_addr_i; + trvk_en_q <= trvk_en_i; + trvk_clrtag_q <= trvk_clrtag_i; + trvk_par_q <= trvk_par_i; + trsv_addr_q <= trsv_addr_i; + trsv_en_q <= trsv_en_i; + trsv_par_q <= trsv_par_i; + reg_rdy_vec_q <= reg_rdy_vec; + end + end + + for (genvar i = 0; i < 32; i++) begin + if ((i == 0) || (i >= NCAPS)) begin + assign reg_rdy_vec_shdw[i] = 1'b1; + end else begin + always_ff @(posedge clk_i or negedge par_rst_ni) begin + if (!par_rst_ni) + reg_rdy_vec_shdw[i] <= 1'b1; + else if (trsv_dec_shdw[i] & trsv_en_q) + reg_rdy_vec_shdw[i] <= 1'b0; + else if (trvk_dec_shdw[i] & trvk_en_q) + reg_rdy_vec_shdw[i] <= 1'b1; + end // always_ff + end + end + + // generate alert + assign shdw_mismatch_err = (reg_rdy_vec_shdw != reg_rdy_vec_q); + + // readback revoked cap to make sure the valid bit is actually cleared + always_comb begin + cap_rvk_err = 0; + for (int unsigned i = 1; i < NCAPS; i++) begin + cap_rvk_err = cap_rvk_err | (trvk_en_q & trvk_clrtag_q & trvk_dec_shdw[i] & rf_cap_q[i].valid); + end + end + + + // check parity of trsv and trvk requests + logic [1:0] trsv_ecc_err, trvk_ecc_err; + + prim_secded_inv_39_32_dec trsv_ecc_i ( + .data_i ({trsv_par_q, 26'h0, trsv_en_q, trsv_addr_q}), + .data_o (), + .syndrome_o(), + .err_o (trsv_ecc_err) + ); + + prim_secded_inv_39_32_dec trsk_ecc_i ( + .data_i ({trvk_par_q, 25'h0, trvk_en_q, trvk_clrtag_q, trvk_addr_q}), + .data_o (), + .syndrome_o(), + .err_o (trvk_ecc_err) + ); + + assign pplbc_alert = shdw_mismatch_err | cap_rvk_err | (|trsv_ecc_err) | (|trvk_ecc_err); + + end else begin : gen_no_shdw // no ECC or shdw checking + assign pplbc_alert = 1'b0; + end + + end else begin : g_no_regrdy + assign reg_rdy_vec = {32{1'b1}}; + assign pplbc_alert = 1'b0; + end // not pplbc + + // + // read back last-writen register for fault protection + // + logic reg_rdbk_err; + + if (RegFileECC) begin : gen_fault_rdbk + logic [NREGS-1:1] we_a_dec_shdw; + logic [4:0] waddr_a_q; + logic [31:0] wdata_a_q; + logic [6:0] wpar_a_q; + logic [37:0] wcap_vec_q; + logic we_a_q; + logic [31:0] wdata_tmp; + logic [6:0] rpar_tmp; + logic [1:0] wreq_ecc_err; + logic rdbk_cmp_err; + + // flop the write request and check parity + // need all fields to compute parity bits + always_ff @(posedge clk_i or negedge par_rst_ni) begin + if (!par_rst_ni) begin + waddr_a_q <= 5'h0; + wdata_a_q <= 32'h0; + wpar_a_q <= NullParBits; + wcap_vec_q <= 38'h0; + we_a_q <= 1'b0; + end else begin + waddr_a_q <= waddr_a_i; + wdata_a_q <= wdata_a_i[31:0]; + wpar_a_q <= wdata_a_i[DataWidth-1:DataWidth-7]; + wcap_vec_q <= reg2vec(wcap_a_i); + we_a_q <= we_a_i; + end + end + + assign wdata_tmp = wdata_a_q ^ wcap_vec_q[31:0] ^ {20'h0, we_a_q, waddr_a_q, wcap_vec_q[37:32]}; + + prim_secded_inv_39_32_dec wdata_ecc_i ( + .data_i ({wpar_a_q, wdata_tmp}), + .data_o (), + .syndrome_o(), + .err_o (wreq_ecc_err) + ); + + // decode and read back to verify (only parity bits) + always_comb begin + for (int unsigned i = 1; i < NREGS; i++) begin + we_a_dec_shdw[i] = (waddr_a_q == 5'(i)) ? we_a_q : 1'b0; + end + end + + assign rpar_tmp = rf_reg_par[waddr_a_q]; + + assign rdbk_cmp_err = (rpar_tmp != wpar_a_q) && (waddr_a_q != 0) && we_a_q; + + assign reg_rdbk_err = (|wreq_ecc_err) | rdbk_cmp_err; + + end else begin : gen_no_fault_rdbk + assign reg_rdbk_err = 1'b0; + end + + assign alert_o = pplbc_alert | reg_rdbk_err; + + reg_cap_t rcap_a_rvkd, rcap_b_rvkd; + + if (TRVKBypass) begin + // Bypass the registier update cycle and directly update the read ports + always_comb begin + reg_rdy_o = reg_rdy_vec | ({NREGS{trvk_en_i}} & {trvk_dec, 1'b0}); + + rcap_a_rvkd = rcap_a; + if (trvk_en_i && trvk_clrtag_i && (trvk_addr_i == raddr_a_i)) + rcap_a_rvkd.valid = 1'b0; + rcap_a_o = rcap_a_rvkd; + + rcap_b_rvkd = rcap_b; + if (trvk_en_i && trvk_clrtag_i && (trvk_addr_i == raddr_b_i)) + rcap_b_rvkd.valid = 1'b0; + rcap_b_o = rcap_b_rvkd; + + end + end else begin + assign reg_rdy_o = reg_rdy_vec; + + assign rcap_a_rvkd = rcap_a; + assign rcap_a_o = rcap_a_rvkd; + assign rcap_b_rvkd = rcap_b; + assign rcap_b_o = rcap_b_rvkd; + end + + + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheri_stkz.sv b/hw/ip/cheriot-ibex/rtl/cheri_stkz.sv new file mode 100644 index 0000000..ba6ce15 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheri_stkz.sv
@@ -0,0 +1,161 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + + +module cheri_stkz import cheri_pkg::*; ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + // CSR register interface + input logic ztop_wr_i, + input logic [31:0] ztop_wdata_i, + input full_cap_t ztop_wfcap_i, + output logic [31:0] ztop_rdata_o, + output reg_cap_t ztop_rcap_o, + + input logic unmasked_intr_i, + + output logic stkz_active_o, + output logic stkz_abort_o, + output logic [31:0] stkz_ptr_o, + output logic [31:0] stkz_base_o, + output logic stkz_err_o, + + // LSU req/resp interface (to be multiplixed/qualified) + input logic lsu_stkz_resp_valid_i, + input logic lsu_stkz_resp_err_i, + input logic lsu_stkz_req_done_i, + output logic stkz_lsu_req_o, + output logic stkz_lsu_we_o, + output logic stkz_lsu_is_cap_o, + output logic [31:0] stkz_lsu_addr_o, + output logic [32:0] stkz_lsu_wdata_o +); + + typedef enum logic [1:0] {STKZ_IDLE, STKZ_ACTIVE, STKZ_ABORT} stkz_fsm_t; + + stkz_fsm_t stkz_fsm_d, stkz_fsm_q; + + logic [29:0] stkz_ptrw, stkz_ptrw_nxt; + logic [29:0] stkz_basew; + logic stkz_start, stkz_done, stkz_stop, stkz_active; + reg_cap_t ztop_rcap, ztop_rcap_nxt; + logic [32:0] ztop_wtop33; + logic [31:0] ztop_wbase32; + logic waddr_eq_base; + logic cmd_cap_good; + reg_cap_t cmd_wcap; + logic cmd_new_cap, cmd_new_null; + logic cmd_is_n2z; + + assign stkz_lsu_wdata_o = 33'h0; + assign stkz_lsu_is_cap_o = 1'b0; // this means we are really writing 33'h0 to memory + assign stkz_lsu_we_o = 1'b1; + assign stkz_lsu_req_o = stkz_active; + assign stkz_lsu_addr_o = {stkz_ptrw_nxt, 2'h0}; + + assign stkz_active_o = stkz_active; + assign stkz_active = (stkz_fsm_q != STKZ_IDLE); + assign stkz_abort_o = (stkz_fsm_q == STKZ_ABORT); + + assign stkz_ptr_o = {stkz_ptrw, 2'h0}; + assign stkz_base_o = {stkz_basew, 2'h0}; + + assign ztop_rdata_o = {stkz_ptrw, 2'h0}; + assign ztop_rcap_o = ztop_rcap; + + assign ztop_wbase32 = ztop_wfcap_i.base32; + assign ztop_wtop33 = ztop_wfcap_i.top33; + + assign cmd_cap_good = ztop_wfcap_i.valid && (ztop_wtop33[32:2] >= ztop_wdata_i[31:2]) && + ztop_wfcap_i.perms[PERM_SD]; + assign cmd_is_n2z = cmd_cap_good && (ztop_wdata_i[31:2] == ztop_wbase32[31:2]); + + assign cmd_new_null = ztop_wr_i && (ztop_wfcap_i == NULL_FULL_CAP) && (ztop_wdata_i == 32'h0); + assign cmd_new_cap = ztop_wr_i && ~cmd_new_null; + + assign stkz_start = cmd_new_cap && cmd_cap_good && (ztop_wdata_i[31:2] > ztop_wbase32[31:2]); + assign stkz_done = (stkz_ptrw_nxt <= stkz_basew); + assign stkz_stop = unmasked_intr_i | cmd_new_null; + + + always_comb begin + logic [2:0] tmp3; + logic [8:0] addrmi9; + + if ((stkz_fsm_q == STKZ_IDLE) && stkz_start) + stkz_fsm_d = STKZ_ACTIVE; + else if ((stkz_fsm_q == STKZ_ACTIVE) & stkz_done & lsu_stkz_req_done_i) // "normal" completion + stkz_fsm_d = STKZ_IDLE; + else if ((stkz_fsm_q == STKZ_ACTIVE) & stkz_stop & lsu_stkz_req_done_i) // abort + stkz_fsm_d = STKZ_IDLE; + else if ((stkz_fsm_q == STKZ_ACTIVE) & stkz_stop) // pending abort, wait till lsu_req_done + stkz_fsm_d = STKZ_ABORT; + else if ((stkz_fsm_q == STKZ_ABORT) & lsu_stkz_req_done_i) + stkz_fsm_d = STKZ_IDLE; // self clear by any furtherload/store activity + else + stkz_fsm_d = stkz_fsm_q; + + // clear tag if writing an ztop value with address == base + cmd_wcap = full2regcap(ztop_wfcap_i); + if (cmd_is_n2z) cmd_wcap.valid = 1'b0; + + // we are doing this in lieu of a full set_address. + // note we only start an zeroization if addr > base32 so no need for representability check + ztop_rcap_nxt = ztop_rcap; + addrmi9 = {stkz_ptrw_nxt, 2'b00} >> ztop_rcap.exp; + tmp3 = update_temp_fields(ztop_rcap.top, ztop_rcap.base, addrmi9); + ztop_rcap_nxt.top_cor = tmp3[2:1]; + ztop_rcap_nxt.base_cor = tmp3[0]; + ztop_rcap_nxt.valid = ztop_rcap.valid & ~stkz_done; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + stkz_fsm_q <= STKZ_IDLE; + stkz_ptrw <= 30'h0; + stkz_ptrw_nxt <= 30'h0; + stkz_basew <= 30'h0; + stkz_err_o <= 1'b0; + ztop_rcap <= NULL_REG_CAP; + end else begin + + stkz_fsm_q <= stkz_fsm_d; + + // zcap is an WARL SCR + // - if active + // - Readback return current progress + // - allow writing NULL to stop (readback NULL in this case) + // + // - if not active, i + // - only allow writing tagged cap (legalized), which starts zeroization, however + // - speical case: write a tagged cap with addr == base will NOT start zeroization but + // will clear tag on read + // + if (ztop_wr_i) begin + stkz_ptrw <= ztop_wdata_i[31:2]; + ztop_rcap <= cmd_wcap; + end else if (stkz_active && lsu_stkz_req_done_i) begin + stkz_ptrw <= stkz_ptrw_nxt; + ztop_rcap <= ztop_rcap_nxt; + end + + // this is the captured hardware zeroization context, only updated for valid zerioation runs + if (stkz_start) begin + stkz_ptrw_nxt <= ztop_wdata_i[31:2] - 1; + stkz_basew <= ztop_wbase32[31:2]; + end else if (stkz_active && lsu_stkz_req_done_i && ~(stkz_done | stkz_stop)) begin + stkz_ptrw_nxt <= stkz_ptrw_nxt - 1; + end + + if (~stkz_active && stkz_start) + stkz_err_o <= 1'b0; + else if (lsu_stkz_resp_valid_i && lsu_stkz_resp_err_i) + stkz_err_o <= 1'b1; + + end + end + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheri_tbre.sv b/hw/ip/cheriot-ibex/rtl/cheri_tbre.sv new file mode 100644 index 0000000..eb5df17 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheri_tbre.sv
@@ -0,0 +1,269 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + + +module cheri_tbre #( + parameter int unsigned FifoSize = 4, // must be power-of-2 + parameter int unsigned AddrHi = 31 +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + // MMIO register interface + input logic [65:0] tbre_ctrl_vec_i, + output logic tbre_stat_o, + output logic tbre_err_o, + + // LSU req/resp interface (to be multiplixed/qualified) + input logic lsu_tbre_resp_valid_i, + input logic lsu_tbre_resp_err_i, + input logic lsu_tbre_resp_is_wr_i, + input logic [32:0] lsu_tbre_raw_lsw_i, + input logic lsu_tbre_req_done_i, + input logic lsu_tbre_addr_incr_i, + output logic tbre_lsu_req_o, + output logic tbre_lsu_is_cap_o, + output logic tbre_lsu_we_o, + output logic [31:0] tbre_lsu_addr_o, + output logic [32:0] tbre_lsu_wdata_o, + + // LSU snoop interface + input logic snoop_lsu_req_done_i, + input logic snoop_lsu_req_i, + input logic snoop_lsu_is_cap_i, + input logic snoop_lsu_we_i, + input logic snoop_lsu_cheri_err_i, + input logic [31:0] snoop_lsu_addr_i, + + // trvk interface + input logic trvk_en_i, + input logic trvk_clrtag_i +); + + localparam FifoPtrW = $clog2(FifoSize); + localparam CapFifoDW = 33+1; + localparam ReqFifoDW = AddrHi-1; + + + logic tbre_go; + logic tbre_add1wait; + logic load_stop_cond, load_gnt; + logic store_gnt; + logic store_req_valid; + logic [31:0] load_addr, store_addr; + logic wait_resp_q; + + logic req_fifo_wr_en, cap_fifo_wr_en, shdw_fifo_wr_en, fifo_rd_en; + + logic [AddrHi-3:0] cur_load_addr8, load_addr8_p1; + logic [FifoPtrW:0] req_fifo_ext_wr_ptr, cap_fifo_ext_wr_ptr, shdw_fifo_ext_wr_ptr; + logic [FifoPtrW:0] os_req_cnt; + logic [FifoPtrW:0] fifo_ext_rd_ptr; + logic [FifoPtrW-1:0] req_fifo_wr_ptr, cap_fifo_wr_ptr, shdw_fifo_wr_ptr; + logic [FifoPtrW-1:0] fifo_rd_ptr; + logic shdw_fifo_wr_data; + logic [CapFifoDW-1:0] cap_fifo_wr_data; + logic [ReqFifoDW-1:0] req_fifo_wr_data; + logic fifo_rd_shdw, fifo_rd_tag, fifo_rd_valid, fifo_rd_err; + logic [31:0] fifo_rd_data; + logic [AddrHi-3:0] fifo_rd_addr8; + logic fifo_not_empty; + + + typedef enum logic [1:0] {TBRE_IDLE, TBRE_LOAD, TBRE_WAIT} tbre_fsm_t; + tbre_fsm_t tbre_fsm_q, tbre_fsm_d; + + typedef enum logic [1:0] {SCH_NONE, SCH_LOAD, SCH_STORE} tbre_sch_t; + tbre_sch_t tbre_sch_q, tbre_sch_d; + + typedef struct packed { + logic go; + logic add1wait; + logic [31:0] start_addr; + logic [31:0] end_addr; + } tbre_ctrl_t; + + tbre_ctrl_t tbre_ctrl; + + // register interface + assign tbre_ctrl.go = tbre_ctrl_vec_i[64]; + assign tbre_ctrl.add1wait = tbre_ctrl_vec_i[65]; + assign tbre_ctrl.start_addr = tbre_ctrl_vec_i[31:0]; + assign tbre_ctrl.end_addr = tbre_ctrl_vec_i[63:32]; + assign tbre_stat_o = (tbre_fsm_q != TBRE_IDLE); + + // note having resp_valid here improves performance but making timing a bit worse + // (data_rvalid --> tbre_lsu_req --> core/tbre mux select --> data_wdata_o + assign tbre_lsu_req_o = ((tbre_sch_q == SCH_LOAD) | ((tbre_sch_q == SCH_STORE) && store_req_valid)) & (~wait_resp_q | (lsu_tbre_resp_valid_i & ~tbre_ctrl.add1wait)); + assign tbre_lsu_is_cap_o = (tbre_sch_q == SCH_LOAD); + assign tbre_lsu_we_o = (tbre_sch_q == SCH_STORE); + assign tbre_lsu_addr_o = (tbre_sch_q == SCH_LOAD) ? load_addr + {lsu_tbre_addr_incr_i, 2'b00} : store_addr; + assign tbre_lsu_wdata_o = {1'b0, fifo_rd_data}; + + assign load_addr8_p1 = cur_load_addr8 + 1; + + assign load_stop_cond = (load_addr8_p1 > tbre_ctrl.end_addr[AddrHi:3]); + assign load_gnt = (tbre_sch_q == SCH_LOAD) & lsu_tbre_req_done_i; + assign store_gnt = (tbre_sch_q == SCH_STORE) & lsu_tbre_req_done_i; + + // expand load/store address by concatnating the MSB from start_address (save some area) + assign load_addr = (AddrHi >= 31) ? {cur_load_addr8, 3'b000} : + {tbre_ctrl.start_addr[31:AddrHi+1], cur_load_addr8, 3'b000}; + assign store_addr = (AddrHi >= 31) ? {fifo_rd_addr8, 3'b000} : + {tbre_ctrl.start_addr[31:AddrHi+1], fifo_rd_addr8, 3'b000}; + + always_comb begin + logic load_stall, req_fifo_full; + + // state machine tracking the progress of memory walk + if ((tbre_fsm_q == TBRE_IDLE) && tbre_ctrl.go) + tbre_fsm_d = TBRE_LOAD; + else if ((tbre_fsm_q == TBRE_LOAD) && load_gnt & load_stop_cond) + tbre_fsm_d = TBRE_WAIT; + else if ((tbre_fsm_q == TBRE_WAIT) && (os_req_cnt == 0)) + tbre_fsm_d = TBRE_IDLE; + else + tbre_fsm_d = tbre_fsm_q; + + // arbitration between load/store requests, throttle if too many outstanding load requests + // TBRE assumes a non-buffered memory model (new req won't be gnt'd if the prev response + // still outstanding). If not, we have to change this to throttle on resp as well since + // the load_store_unit can't handle multiple outstanding requests. + + load_stall = (os_req_cnt >= FifoSize-1); + req_fifo_full = (os_req_cnt >= FifoSize); + + tbre_sch_d = tbre_sch_q; // default + case (tbre_sch_q) + SCH_NONE: + if ((tbre_fsm_q == TBRE_LOAD) && !req_fifo_full) + tbre_sch_d = SCH_LOAD; + else if (store_req_valid) + tbre_sch_d = SCH_STORE; + SCH_LOAD: + if (load_gnt & (load_stall || (tbre_fsm_d == TBRE_WAIT)) & store_req_valid) + tbre_sch_d = SCH_STORE; + else if (load_gnt & (load_stall || (tbre_fsm_d == TBRE_WAIT))) + tbre_sch_d = SCH_NONE; + SCH_STORE: + if ((store_gnt | ~store_req_valid) & (tbre_fsm_q == TBRE_LOAD)) + tbre_sch_d = SCH_LOAD; // no need to check req_fifo_full, since we are dequeing from it + else if (store_gnt|~store_req_valid) // go back to NONE to allow reading fifo further + tbre_sch_d = SCH_NONE; // no bandwidth loss here since the load req will move ahead anyway + default:; + endcase + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + tbre_fsm_q <= TBRE_IDLE; + tbre_sch_q <= SCH_NONE; + cur_load_addr8 <= 'h0; + wait_resp_q <= 1'b0; + tbre_err_o <= 1'b0; + end else begin + + tbre_fsm_q <= tbre_fsm_d; + tbre_sch_q <= tbre_sch_d; + + if (tbre_ctrl.go & (tbre_fsm_q == TBRE_IDLE)) + cur_load_addr8 <= tbre_ctrl.start_addr[AddrHi:3]; + else if (load_gnt) + cur_load_addr8 <= load_addr8_p1; + + if (load_gnt | store_gnt) + wait_resp_q <= 1'b1; + else if (lsu_tbre_resp_valid_i) + wait_resp_q <= 1'b0; + + // for now just capture/latch errors and flag it to firmware + if ((tbre_fsm_q == TBRE_IDLE) && tbre_ctrl.go) + tbre_err_o <= 1'b0; + else if (lsu_tbre_resp_valid_i && lsu_tbre_resp_err_i) + tbre_err_o <= 1'b1; + end + end + + // FIFOs to buffer caps read from the data memory and shadow bits from the shadow map + + // count of outstand load requests in the pipeline + assign os_req_cnt = req_fifo_ext_wr_ptr - fifo_ext_rd_ptr; + + assign req_fifo_wr_ptr = req_fifo_ext_wr_ptr[FifoPtrW-1:0]; + assign cap_fifo_wr_ptr = cap_fifo_ext_wr_ptr[FifoPtrW-1:0]; + assign shdw_fifo_wr_ptr = shdw_fifo_ext_wr_ptr[FifoPtrW-1:0]; + assign fifo_rd_ptr = fifo_ext_rd_ptr[FifoPtrW-1:0]; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + fifo_ext_rd_ptr <= 'h0; + req_fifo_ext_wr_ptr <= 'h0; + cap_fifo_ext_wr_ptr <= 'h0; + shdw_fifo_ext_wr_ptr <= 'h0; + end else begin + // FIFO size is power-of-2 + if (fifo_rd_en) fifo_ext_rd_ptr <= fifo_ext_rd_ptr + 1; + + if (req_fifo_wr_en) req_fifo_ext_wr_ptr <= req_fifo_ext_wr_ptr + 1; + if (cap_fifo_wr_en) cap_fifo_ext_wr_ptr <= cap_fifo_ext_wr_ptr + 1; + if (shdw_fifo_wr_en) shdw_fifo_ext_wr_ptr <= shdw_fifo_ext_wr_ptr + 1; + end + end + + logic [FifoSize-1:0][ReqFifoDW-1:0] req_fifo_mem; // packed entry: addr, valid, 32-bit data + logic [FifoSize-1:0][CapFifoDW-1:0] cap_fifo_mem; // packed entry: addr, valid, 32-bit data + logic [FifoSize-1:0] shdw_fifo_mem; // single shadow bit per entry + + for (genvar i= 0; i < FifoSize; i++) begin : gen_fifo_mem + logic [28:0] req_fifo_item_addr8; + assign req_fifo_item_addr8 = (AddrHi >= 31) ? req_fifo_mem[i][AddrHi-3:0] : + {tbre_ctrl.start_addr[31:AddrHi+1], req_fifo_mem[i][AddrHi-3:0]}; + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + req_fifo_mem[i] <= 0; + cap_fifo_mem[i] <= 0; + shdw_fifo_mem[i] <= 1'b0; + end else begin + // monitoring the ongoing writes to LSU to detect collisiona + // also what about a collision between write request and head of the FIFO? + if (req_fifo_wr_en && (i == req_fifo_wr_ptr)) + req_fifo_mem[i] <= req_fifo_wr_data; + else if ((req_fifo_item_addr8 == snoop_lsu_addr_i[31:3]) && snoop_lsu_req_done_i && snoop_lsu_we_i) + req_fifo_mem[i] <= req_fifo_mem[i] & {1'b0, {(AddrHi-2){1'b1}}}; + + if (cap_fifo_wr_en && (i == cap_fifo_wr_ptr)) cap_fifo_mem[i] <= cap_fifo_wr_data; + if (shdw_fifo_wr_en && (i == shdw_fifo_wr_ptr)) shdw_fifo_mem[i] <= shdw_fifo_wr_data; + end + end // always + end // generate + + // peek into the current FIFO head + assign fifo_rd_addr8 = req_fifo_mem[fifo_rd_ptr][AddrHi-3:0]; + assign fifo_rd_valid = req_fifo_mem[fifo_rd_ptr][AddrHi-2]; + assign fifo_rd_data = cap_fifo_mem[fifo_rd_ptr][31:0]; + assign fifo_rd_tag = cap_fifo_mem[fifo_rd_ptr][32]; + assign fifo_rd_err = cap_fifo_mem[fifo_rd_ptr][33]; + assign fifo_rd_shdw = shdw_fifo_mem[fifo_rd_ptr]; + + // only issue invalidation store requests if + // valid cap returned && no write collision on the address && shadow_bit == 1 + assign store_req_valid = fifo_not_empty & fifo_rd_tag & fifo_rd_shdw & fifo_rd_valid & ~fifo_rd_err; + + assign fifo_not_empty = (req_fifo_ext_wr_ptr != fifo_ext_rd_ptr) && + (cap_fifo_ext_wr_ptr != fifo_ext_rd_ptr) && + (shdw_fifo_ext_wr_ptr != fifo_ext_rd_ptr); + + assign fifo_rd_en = fifo_not_empty & (((tbre_sch_q == SCH_STORE) & store_gnt) | ~store_req_valid); + + assign req_fifo_wr_en = (tbre_sch_q == SCH_LOAD) & load_gnt; + assign req_fifo_wr_data = {1'b1, cur_load_addr8}; + + assign cap_fifo_wr_en = lsu_tbre_resp_valid_i & ~lsu_tbre_resp_is_wr_i; + assign cap_fifo_wr_data = {lsu_tbre_resp_err_i, lsu_tbre_raw_lsw_i}; + + assign shdw_fifo_wr_en = trvk_en_i; + assign shdw_fifo_wr_data = trvk_clrtag_i; + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheri_tbre_wrapper.sv b/hw/ip/cheriot-ibex/rtl/cheri_tbre_wrapper.sv new file mode 100644 index 0000000..de1693f --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheri_tbre_wrapper.sv
@@ -0,0 +1,248 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + + +module cheri_tbre_wrapper import cheri_pkg::*; #( + parameter bit CHERIoTEn = 1'b1, + parameter bit CheriTBRE = 1'b1, + parameter bit CheriStkZ = 1'b1, + parameter bit StkZIntrOK = 1'b0, + parameter int unsigned MMRegDinW = 128, + parameter int unsigned MMRegDoutW = 64 + +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + // MMIO register interface + input logic [MMRegDinW-1:0] mmreg_corein_i, + output logic [MMRegDoutW-1:0] mmreg_coreout_o, + + // LSU req/resp interface (to be multiplixed/qualified) + input logic lsu_tbre_resp_valid_i, + input logic lsu_tbre_resp_err_i, + input logic lsu_tbre_resp_is_wr_i, + input logic [32:0] lsu_tbre_raw_lsw_i, + input logic lsu_tbre_req_done_i, + input logic lsu_tbre_addr_incr_i, + input logic lsu_tbre_sel_i, + output logic tbre_lsu_req_o, + output logic tbre_lsu_is_cap_o, + output logic tbre_lsu_we_o, + output logic [31:0] tbre_lsu_addr_o, + output logic [32:0] tbre_lsu_wdata_o, + + // LSU snoop interface + input logic snoop_lsu_req_done_i, + input logic snoop_lsu_req_i, + input logic snoop_lsu_is_cap_i, + input logic snoop_lsu_we_i, + input logic snoop_lsu_cheri_err_i, + input logic [31:0] snoop_lsu_addr_i, + + // trvk interface + input logic trvk_en_i, + input logic trvk_clrtag_i, + + // Stack fast-clearing signals + input logic ztop_wr_i, + input logic [31:0] ztop_wdata_i, + input full_cap_t ztop_wfcap_i, + output logic [31:0] ztop_rdata_o, + output reg_cap_t ztop_rcap_o, + + input logic unmasked_intr_i, + + output logic stkz_active_o, + output logic stkz_abort_o, + output logic [31:0] stkz_ptr_o, + output logic [31:0] stkz_base_o +); + + localparam nMSTR = 2; + + logic lsu_blk1_resp_valid; + logic lsu_blk1_req_done; + logic blk1_lsu_req; + logic blk1_lsu_is_cap; + logic blk1_lsu_we; + logic [31:0] blk1_lsu_addr; + logic [32:0] blk1_lsu_wdata; + + logic lsu_blk0_resp_valid; + logic lsu_blk0_req_done; + logic blk0_lsu_req; + logic blk0_lsu_is_cap; + logic blk0_lsu_we; + logic [31:0] blk0_lsu_addr; + logic [32:0] blk0_lsu_wdata; + + + logic tbre_stat, tbre_err, stkz_err; + + assign mmreg_coreout_o = {{(MMRegDoutW-10){1'b0}}, 2'b00, 2'b00, stkz_err, stkz_active_o, + 2'b00, tbre_err, tbre_stat}; + + if (CHERIoTEn & CheriTBRE) begin : g_tbre + logic [65:0] tbre_ctrl_vec; + + assign tbre_ctrl_vec = mmreg_corein_i[65:0]; + + cheri_tbre #( + .FifoSize (4), + .AddrHi (23) + ) cheri_tbre_i ( + // Clock and Reset + .clk_i (clk_i), + .rst_ni (rst_ni), + .tbre_ctrl_vec_i (tbre_ctrl_vec), + .tbre_stat_o (tbre_stat), + .tbre_err_o (tbre_err), + .lsu_tbre_resp_valid_i (lsu_blk1_resp_valid), + .lsu_tbre_resp_err_i (lsu_tbre_resp_err_i), + .lsu_tbre_resp_is_wr_i (lsu_tbre_resp_is_wr_i), + .lsu_tbre_raw_lsw_i (lsu_tbre_raw_lsw_i), + .lsu_tbre_req_done_i (lsu_blk1_req_done), + .lsu_tbre_addr_incr_i (lsu_tbre_addr_incr_i), + .tbre_lsu_req_o (blk1_lsu_req), + .tbre_lsu_is_cap_o (blk1_lsu_is_cap), + .tbre_lsu_we_o (blk1_lsu_we), + .tbre_lsu_addr_o (blk1_lsu_addr), + .tbre_lsu_wdata_o (blk1_lsu_wdata), + .snoop_lsu_req_done_i (snoop_lsu_req_done_i), + .snoop_lsu_req_i (snoop_lsu_req_i), + .snoop_lsu_is_cap_i (snoop_lsu_is_cap_i), + .snoop_lsu_we_i (snoop_lsu_we_i), + .snoop_lsu_cheri_err_i (snoop_lsu_cheri_err_i), + .snoop_lsu_addr_i (snoop_lsu_addr_i), + .trvk_en_i (trvk_en_i), + .trvk_clrtag_i (trvk_clrtag_i) + ); + end else begin + assign tbre_stat = 1'b0; + assign tbre_err = 1'b0; + assign blk1_lsu_req = 1'b0; + assign blk1_lsu_is_cap = 1'b0; + assign blk1_lsu_we = 1'b0; + assign blk1_lsu_addr = 32'h0; + assign blk1_lsu_wdata = 33'h0; + end + + if (CHERIoTEn & CheriStkZ) begin : g_stkz + logic unmasked_intr; + assign unmasked_intr = StkZIntrOK & unmasked_intr_i; + + cheri_stkz cheri_stkz_i ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .ztop_wr_i (ztop_wr_i), + .ztop_wdata_i (ztop_wdata_i), + .ztop_wfcap_i (ztop_wfcap_i), + .ztop_rdata_o (ztop_rdata_o), + .ztop_rcap_o (ztop_rcap_o), + .unmasked_intr_i (unmasked_intr ), + .stkz_active_o (stkz_active_o ), + .stkz_abort_o (stkz_abort_o ), + .stkz_ptr_o (stkz_ptr_o ), + .stkz_base_o (stkz_base_o ), + .stkz_err_o (stkz_err ), + .lsu_stkz_resp_valid_i (lsu_blk0_resp_valid ), + .lsu_stkz_resp_err_i (lsu_tbre_resp_err_i ), + .lsu_stkz_req_done_i (lsu_blk0_req_done ), + .stkz_lsu_req_o (blk0_lsu_req ), + .stkz_lsu_we_o (blk0_lsu_we ), + .stkz_lsu_is_cap_o (blk0_lsu_is_cap ), + .stkz_lsu_addr_o (blk0_lsu_addr ), + .stkz_lsu_wdata_o (blk0_lsu_wdata ) + ); + + end else begin + assign stkz_active_o = 1'b0; + assign stkz_abort_o = 1'b0; + assign stkz_ptr_o = 32'h3; // use this to flag stkz feature doesn't exist + assign stkz_base_o = 32'h0; + assign stkz_err = 1'b0; + + assign ztop_rcap_o = NULL_REG_CAP; + assign ztop_rdata_o = 32'h0000_aa55; + + assign blk0_lsu_req = 1'b0; + assign blk0_lsu_is_cap = 1'b0; + assign blk0_lsu_we = 1'b0; + assign blk0_lsu_addr = 32'h0; + assign blk0_lsu_wdata = 33'h0; + end + + // + // Arbitration for LSU interface between tbre and stkz engines + // reuse the obimux logic + // + logic [nMSTR-1:0] mstr_arbit, mstr_arbit_q, mstr_arbit_comb; + logic [nMSTR-1:0] mstr_req; + logic req_pending, req_pending_q; + logic slv_req, slv_gnt; + + assign slv_req = |mstr_req; + + // arbitration by strict priority assignment - mst_req[0] == highest priority + for (genvar i = 0; i < nMSTR; i++) begin + logic [7:0] pri_mask; + assign pri_mask = 8'hff >> (8-i); // max 8 masters, should be enough + assign mstr_arbit[i] = mstr_req[i] & ~(|(mstr_req & pri_mask[nMSTR-1:0])); + end + + // Handling delayed-gnt case. + // make the next arbiration decision immediately if any master_req active + // If slv_gnt doesn't happen in the same cycle, register the decision till + // slv_gant so that the address/wdata/ctrl can be hold steady when presenting + // the next request to the slave. + // Corner case: + // -- adding the lsu_tbre_sel term to req_pending (allow the arbitration to + // change when LSU is handling CPU requests. + // this is needed since TBRE could cancel write requests in the case of + // a pipeline hazard (cpu write to the same location TBRE is working on) + + assign mstr_arbit_comb = req_pending_q ? mstr_arbit_q : mstr_arbit; + assign req_pending = |mstr_req & ~slv_gnt & ~req_pending_q & lsu_tbre_sel_i; + + always @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + req_pending_q <= 1'b0; + mstr_arbit_q <= 0; + end else begin + if (slv_gnt) req_pending_q <= 1'b0; + else if (req_pending) req_pending_q <= 1'b1; + if (req_pending) mstr_arbit_q <= mstr_arbit; + end + end + + // muxing the outgoing control signals + assign slv_gnt = lsu_tbre_req_done_i; + assign mstr_req = {blk1_lsu_req, blk0_lsu_req}; + + assign tbre_lsu_req_o = slv_req; + assign tbre_lsu_is_cap_o = mstr_arbit_comb[1] ? blk1_lsu_is_cap : blk0_lsu_is_cap; + assign tbre_lsu_we_o = mstr_arbit_comb[1] ? blk1_lsu_we : blk0_lsu_we; + assign tbre_lsu_addr_o = mstr_arbit_comb[1] ? blk1_lsu_addr : blk0_lsu_addr; + assign tbre_lsu_wdata_o = mstr_arbit_comb[1] ? blk1_lsu_wdata : blk0_lsu_wdata; + + assign lsu_blk1_req_done = mstr_arbit_comb[1] & lsu_tbre_req_done_i; + assign lsu_blk0_req_done = mstr_arbit_comb[0] & lsu_tbre_req_done_i; + + // + logic resp_sel_q; + always @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + resp_sel_q <= 1'b0; + end else if (lsu_tbre_req_done_i) begin + resp_sel_q <= (mstr_arbit_comb[1]); + end + end + + assign lsu_blk0_resp_valid = ~resp_sel_q & lsu_tbre_resp_valid_i; + assign lsu_blk1_resp_valid = resp_sel_q & lsu_tbre_resp_valid_i; + + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheri_trvk_stage.sv b/hw/ip/cheriot-ibex/rtl/cheri_trvk_stage.sv new file mode 100644 index 0000000..b22ce70 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheri_trvk_stage.sv
@@ -0,0 +1,131 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +module cheri_trvk_stage #( + parameter int unsigned HeapBase = 32'h2001_0000, + parameter int unsigned TSMapSize = 1024 +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + input logic rf_trsv_en_i, + input logic [4:0] rf_trsv_addr_i, + + // from LSU + input logic lsu_resp_valid_i, + input logic lsu_load_err_i, + input logic [31:0] rf_wdata_lsu_i, + input cheri_pkg::reg_cap_t rf_wcap_lsu_i, + + input logic lsu_resp_is_wr_i, + + input logic lsu_tbre_resp_valid_i, + input logic lsu_tbre_resp_err_i, + + output logic [4:0] rf_trvk_addr_o, + output logic rf_trvk_en_o, + output logic rf_trvk_clrtag_o, + + output logic tbre_trvk_en_o, + output logic tbre_trvk_clrtag_o, + + output logic tsmap_cs_o, + output logic [15:0] tsmap_addr_o, + input logic [31:0] tsmap_rdata_i +); + + import cheri_pkg::*; + + reg_cap_t in_cap_q; + logic [31:0] in_data_q; + + logic cpu_op_active; + logic [2:0] cpu_op_valid_q, tbre_op_valid_q, cap_good_q; + logic cpu_op_valid, tbre_op_valid, cap_good; + logic [4:0] trsv_addr; + logic [4:0] trsv_addr_q[2:0]; + logic trvk_status; + + logic [31:0] base32; + logic [31:0] tsmap_ptr; + logic [4:0] bitpos_q; // bit index in a 32-bit word + logic range_ok; + logic [2:1] range_ok_q; + + + assign base32 = get_bound33(in_cap_q.base, {2{in_cap_q.base_cor}}, in_cap_q.exp, in_data_q); + assign tsmap_ptr = (base32 - HeapBase) >> 3; + + assign tsmap_addr_o = tsmap_ptr[15:5]; + + // not a sealling cap and pointing to valid TSMAP range + assign range_ok = (tsmap_ptr[31:5] <= TSMapSize) && + ~((in_cap_q.cperms[4:3]==2'b00) && (|in_cap_q.cperms[2:0])); + assign tsmap_cs_o = (cpu_op_valid_q[0] | tbre_op_valid_q[0]) & cap_good_q[0]; + + assign rf_trvk_en_o = cpu_op_valid_q[2]; + assign rf_trvk_clrtag_o = trvk_status & cap_good_q[2] & range_ok_q[2]; + assign rf_trvk_addr_o = trsv_addr_q[2]; + + assign tbre_trvk_en_o = tbre_op_valid_q[2]; + assign tbre_trvk_clrtag_o = trvk_status & cap_good_q[2] & range_ok_q[2]; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + cpu_op_active <= 1'b0; + trsv_addr <= 5'h0; + end else begin + if (rf_trsv_en_i) cpu_op_active <= 1'b1; + else if (lsu_resp_valid_i) cpu_op_active <= 1'b0; + + if (rf_trsv_en_i) trsv_addr <= rf_trsv_addr_i; + end + end + + + assign cpu_op_valid = cpu_op_active & lsu_resp_valid_i; // CPU op only active when Load cap + assign tbre_op_valid = lsu_tbre_resp_valid_i & ~lsu_resp_is_wr_i; // TBRE Load + assign cap_good = (cpu_op_active & lsu_resp_valid_i & ~lsu_load_err_i & rf_wcap_lsu_i.valid) | + (lsu_tbre_resp_valid_i & ~lsu_tbre_resp_err_i & rf_wcap_lsu_i.valid); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + cpu_op_valid_q <= 0; + tbre_op_valid_q <= 0; + cap_good_q <= 0; + in_cap_q <= NULL_REG_CAP; + in_data_q <= 32'h0; + bitpos_q <= 0; + trvk_status <= 1'b0; + range_ok_q <= 0; + trsv_addr_q[0] <= 5'b0; + trsv_addr_q[1] <= 5'b0; + trsv_addr_q[2] <= 5'b0; + end else begin + // control signal per stage + cpu_op_valid_q <= {cpu_op_valid_q[1:0], cpu_op_valid}; + tbre_op_valid_q <= {tbre_op_valid_q[1:0], tbre_op_valid}; + cap_good_q <= {cap_good_q[1:0], cap_good}; + trsv_addr_q[0] <= trsv_addr; + trsv_addr_q[1] <= trsv_addr_q[0]; + trsv_addr_q[2] <= trsv_addr_q[1]; + + // stage 0 status: register loaded cap + if ((cpu_op_valid & ~lsu_load_err_i) | (tbre_op_valid & ~lsu_tbre_resp_err_i)) begin + in_cap_q <= rf_wcap_lsu_i; + in_data_q <= rf_wdata_lsu_i; + end + + // stage 1 status: + bitpos_q <= tsmap_ptr[4:0]; + range_ok_q[1] <= range_ok; + + // stage 2: index map data + range_ok_q[2] <= range_ok_q[1]; + trvk_status <= tsmap_rdata_i[bitpos_q]; + end + end + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_alu.sv b/hw/ip/cheriot-ibex/rtl/cheriot_alu.sv new file mode 100644 index 0000000..32d2fe7 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_alu.sv
@@ -0,0 +1,1400 @@ +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Arithmetic logic unit + */ +module cheriot_alu #( + parameter cheriot_pkg::rv32b_e RV32B = cheriot_pkg::RV32BNone +) ( + input cheriot_pkg::alu_op_e operator_i, + input logic [31:0] operand_a_i, + input logic [31:0] operand_b_i, + + input logic instr_first_cycle_i, + + input logic [32:0] multdiv_operand_a_i, + input logic [32:0] multdiv_operand_b_i, + + input logic multdiv_sel_i, + + input logic [31:0] imd_val_q_i[2], + output logic [31:0] imd_val_d_o[2], + output logic [1:0] imd_val_we_o, + + output logic [31:0] adder_result_o, + output logic [33:0] adder_result_ext_o, + + output logic [31:0] result_o, + output logic comparison_result_o, + output logic is_equal_result_o +); + import cheriot_pkg::*; + + logic [31:0] operand_a_rev; + logic [32:0] operand_b_neg; + + // bit reverse operand_a for left shifts and bit counting + for (genvar k = 0; k < 32; k++) begin : gen_rev_operand_a + assign operand_a_rev[k] = operand_a_i[31-k]; + end + + /////////// + // Adder // + /////////// + + logic adder_op_a_shift1; + logic adder_op_a_shift2; + logic adder_op_a_shift3; + logic adder_op_b_negate; + logic [32:0] adder_in_a, adder_in_b; + logic [31:0] adder_result; + + always_comb begin + adder_op_a_shift1 = 1'b0; + adder_op_a_shift2 = 1'b0; + adder_op_a_shift3 = 1'b0; + adder_op_b_negate = 1'b0; + unique case (operator_i) + // Adder OPs + ALU_SUB, + + // Comparator OPs + ALU_EQ, ALU_NE, + ALU_GE, ALU_GEU, + ALU_LT, ALU_LTU, + ALU_SLT, ALU_SLTU, + + // MinMax OPs (RV32B Ops) + ALU_MIN, ALU_MINU, + ALU_MAX, ALU_MAXU: adder_op_b_negate = 1'b1; + + // Address Calculation OPs (RV32B Ops) + ALU_SH1ADD: if (RV32B != RV32BNone) adder_op_a_shift1 = 1'b1; + ALU_SH2ADD: if (RV32B != RV32BNone) adder_op_a_shift2 = 1'b1; + ALU_SH3ADD: if (RV32B != RV32BNone) adder_op_a_shift3 = 1'b1; + + default:; + endcase + end + + // prepare operand a + always_comb begin + unique case(1'b1) + multdiv_sel_i: adder_in_a = multdiv_operand_a_i; + adder_op_a_shift1: adder_in_a = {operand_a_i[30:0],2'b01}; + adder_op_a_shift2: adder_in_a = {operand_a_i[29:0],3'b001}; + adder_op_a_shift3: adder_in_a = {operand_a_i[28:0],4'b0001}; + default: adder_in_a = {operand_a_i,1'b1}; + endcase + end + + // prepare operand b + assign operand_b_neg = {operand_b_i,1'b0} ^ {33{1'b1}}; + always_comb begin + unique case (1'b1) + multdiv_sel_i: adder_in_b = multdiv_operand_b_i; + adder_op_b_negate: adder_in_b = operand_b_neg; + default: adder_in_b = {operand_b_i, 1'b0}; + endcase + end + + // actual adder + assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b); + + assign adder_result = adder_result_ext_o[32:1]; + + assign adder_result_o = adder_result; + + //////////////// + // Comparison // + //////////////// + + logic is_equal; + logic is_greater_equal; // handles both signed and unsigned forms + logic cmp_signed; + + always_comb begin + unique case (operator_i) + ALU_GE, + ALU_LT, + ALU_SLT, + // RV32B only + ALU_MIN, + ALU_MAX: cmp_signed = 1'b1; + + default: cmp_signed = 1'b0; + endcase + end + + assign is_equal = (adder_result == 32'b0); + assign is_equal_result_o = is_equal; + + // Is greater equal + always_comb begin + if ((operand_a_i[31] ^ operand_b_i[31]) == 1'b0) begin + is_greater_equal = (adder_result[31] == 1'b0); + end else begin + is_greater_equal = operand_a_i[31] ^ (cmp_signed); + end + end + + // GTE unsigned: + // (a[31] == 1 && b[31] == 1) => adder_result[31] == 0 + // (a[31] == 0 && b[31] == 0) => adder_result[31] == 0 + // (a[31] == 1 && b[31] == 0) => 1 + // (a[31] == 0 && b[31] == 1) => 0 + + // GTE signed: + // (a[31] == 1 && b[31] == 1) => adder_result[31] == 0 + // (a[31] == 0 && b[31] == 0) => adder_result[31] == 0 + // (a[31] == 1 && b[31] == 0) => 0 + // (a[31] == 0 && b[31] == 1) => 1 + + // generate comparison result + logic cmp_result; + + always_comb begin + unique case (operator_i) + ALU_EQ: cmp_result = is_equal; + ALU_NE: cmp_result = ~is_equal; + ALU_GE, ALU_GEU, + ALU_MAX, ALU_MAXU: cmp_result = is_greater_equal; // RV32B only + ALU_LT, ALU_LTU, + ALU_MIN, ALU_MINU, //RV32B only + ALU_SLT, ALU_SLTU: cmp_result = ~is_greater_equal; + + default: cmp_result = is_equal; + endcase + end + + assign comparison_result_o = cmp_result; + + /////////// + // Shift // + /////////// + + // The shifter structure consists of a 33-bit shifter: 32-bit operand + 1 bit extension for + // arithmetic shifts and one-shift support. + // Rotations and funnel shifts are implemented as multi-cycle instructions. + // The shifter is also used for single-bit instructions and bit-field place as detailed below. + // + // Standard Shifts + // =============== + // For standard shift instructions, the direction of the shift is to the right by default. For + // left shifts, the signal shift_left signal is set. If so, the operand is initially reversed, + // shifted to the right by the specified amount and shifted back again. For arithmetic- and + // one-shifts the 33rd bit of the shifter operand can is set accordingly. + // + // Multicycle Shifts + // ================= + // + // Rotation + // -------- + // For rotations, the operand signals operand_a_i and operand_b_i are kept constant to rs1 and + // rs2 respectively. + // + // Rotation pseudocode: + // shift_amt = rs2 & 31; + // multicycle_result = (rs1 >> shift_amt) | (rs1 << (32 - shift_amt)); + // ^-- cycle 0 -----^ ^-- cycle 1 --------------^ + // + // Funnel Shifts + // ------------- + // For funnel shifs, operand_a_i is tied to rs1 in the first cycle and rs3 in the + // second cycle. operand_b_i is always tied to rs2. The order of applying the shift amount or + // its complement is determined by bit [5] of shift_amt. + // + // Funnel shift Pseudocode: (fsl) + // shift_amt = rs2 & 63; + // shift_amt_compl = 32 - shift_amt[4:0] + // if (shift_amt >=33): + // multicycle_result = (rs1 >> shift_amt_compl[4:0]) | (rs3 << shift_amt[4:0]); + // ^-- cycle 0 ----------------^ ^-- cycle 1 ------------^ + // else if (shift_amt <= 31 && shift_amt > 0): + // multicycle_result = (rs1 << shift_amt[4:0]) | (rs3 >> shift_amt_compl[4:0]); + // ^-- cycle 0 ----------^ ^-- cycle 1 -------------------^ + // For shift_amt == 0, 32, both shift_amt[4:0] and shift_amt_compl[4:0] == '0. + // these cases need to be handled separately outside the shifting structure: + // else if (shift_amt == 32): + // multicycle_result = rs3 + // else if (shift_amt == 0): + // multicycle_result = rs1. + // + // Single-Bit Instructions + // ======================= + // Single bit instructions operate on bit operand_b_i[4:0] of operand_a_i. + + // The operations bset, bclr and binv are implemented by generation of a bit-mask using the + // shifter structure. This is done by left-shifting the operand 32'h1 by the required amount. + // The signal shift_sbmode multiplexes the shifter input and sets the signal shift_left. + // Further processing is taken care of by a separate structure. + // + // For bext, the bit defined by operand_b_i[4:0] is to be returned. This is done by simply + // shifting operand_a_i to the right by the required amount and returning bit [0] of the result. + // + // Bit-Field Place + // =============== + // The shifter structure is shared to compute bfp_mask << bfp_off. + + logic shift_left; + logic shift_ones; + logic shift_arith; + logic shift_funnel; + logic shift_sbmode; + logic [5:0] shift_amt; + logic [5:0] shift_amt_compl; // complementary shift amount (32 - shift_amt) + + logic [31:0] shift_operand; + logic signed [32:0] shift_result_ext_signed; + logic [32:0] shift_result_ext; + logic unused_shift_result_ext; + logic [31:0] shift_result; + logic [31:0] shift_result_rev; + + // zbf + logic bfp_op; + logic [4:0] bfp_len; + logic [4:0] bfp_off; + logic [31:0] bfp_mask; + logic [31:0] bfp_mask_rev; + logic [31:0] bfp_result; + + // bfp: shares the shifter structure to compute bfp_mask << bfp_off + assign bfp_op = (RV32B != RV32BNone) ? (operator_i == ALU_BFP) : 1'b0; + assign bfp_len = {~(|operand_b_i[27:24]), operand_b_i[27:24]}; // len = 0 encodes for len = 16 + assign bfp_off = operand_b_i[20:16]; + assign bfp_mask = (RV32B != RV32BNone) ? ~(32'hffff_ffff << bfp_len) : '0; + for (genvar i = 0; i < 32; i++) begin : gen_rev_bfp_mask + assign bfp_mask_rev[i] = bfp_mask[31-i]; + end + + assign bfp_result =(RV32B != RV32BNone) ? + (~shift_result & operand_a_i) | ((operand_b_i & bfp_mask) << bfp_off) : '0; + + // bit shift_amt[5]: word swap bit: only considered for FSL/FSR. + // if set, reverse operations in first and second cycle. + assign shift_amt[5] = operand_b_i[5] & shift_funnel; + assign shift_amt_compl = 32 - operand_b_i[4:0]; + + always_comb begin + if (bfp_op) begin + shift_amt[4:0] = bfp_off; // length field of bfp control word + end else begin + shift_amt[4:0] = instr_first_cycle_i ? + (operand_b_i[5] && shift_funnel ? shift_amt_compl[4:0] : operand_b_i[4:0]) : + (operand_b_i[5] && shift_funnel ? operand_b_i[4:0] : shift_amt_compl[4:0]); + end + end + + // single-bit mode: shift + assign shift_sbmode = (RV32B != RV32BNone) ? + (operator_i == ALU_BSET) | (operator_i == ALU_BCLR) | (operator_i == ALU_BINV) : 1'b0; + + // left shift if this is: + // * a standard left shift (slo, sll) + // * a rol in the first cycle + // * a ror in the second cycle + // * fsl: without word-swap bit: first cycle, else: second cycle + // * fsr: without word-swap bit: second cycle, else: first cycle + // * a single-bit instruction: bclr, bset, binv (excluding bext) + // * bfp: bfp_mask << bfp_off + always_comb begin + unique case (operator_i) + ALU_SLL: shift_left = 1'b1; + ALU_SLO: shift_left = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b1 : 1'b0; + ALU_BFP: shift_left = (RV32B != RV32BNone) ? 1'b1 : 1'b0; + ALU_ROL: shift_left = (RV32B != RV32BNone) ? instr_first_cycle_i : 0; + ALU_ROR: shift_left = (RV32B != RV32BNone) ? ~instr_first_cycle_i : 0; + ALU_FSL: shift_left = (RV32B != RV32BNone) ? + (shift_amt[5] ? ~instr_first_cycle_i : instr_first_cycle_i) : 1'b0; + ALU_FSR: shift_left = (RV32B != RV32BNone) ? + (shift_amt[5] ? instr_first_cycle_i : ~instr_first_cycle_i) : 1'b0; + default: shift_left = 1'b0; + endcase + if (shift_sbmode) begin + shift_left = 1'b1; + end + end + + assign shift_arith = (operator_i == ALU_SRA); + assign shift_ones = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? + (operator_i == ALU_SLO) | (operator_i == ALU_SRO) : 1'b0; + assign shift_funnel = (RV32B != RV32BNone) ? + (operator_i == ALU_FSL) | (operator_i == ALU_FSR) : 1'b0; + + // shifter structure. + always_comb begin + // select shifter input + // for bfp, sbmode and shift_left the corresponding bit-reversed input is chosen. + if (RV32B == RV32BNone) begin + shift_operand = shift_left ? operand_a_rev : operand_a_i; + end else begin + unique case (1'b1) + bfp_op: shift_operand = bfp_mask_rev; + shift_sbmode: shift_operand = 32'h8000_0000; + default: shift_operand = shift_left ? operand_a_rev : operand_a_i; + endcase + end + + shift_result_ext_signed = + $signed({shift_ones | (shift_arith & shift_operand[31]), shift_operand}) >>> shift_amt[4:0]; + shift_result_ext = $unsigned(shift_result_ext_signed); + + shift_result = shift_result_ext[31:0]; + unused_shift_result_ext = shift_result_ext[32]; + + for (int unsigned i = 0; i < 32; i++) begin + shift_result_rev[i] = shift_result[31-i]; + end + + shift_result = shift_left ? shift_result_rev : shift_result; + + end + + /////////////////// + // Bitwise Logic // + /////////////////// + + logic bwlogic_or; + logic bwlogic_and; + logic [31:0] bwlogic_operand_b; + logic [31:0] bwlogic_or_result; + logic [31:0] bwlogic_and_result; + logic [31:0] bwlogic_xor_result; + logic [31:0] bwlogic_result; + + logic bwlogic_op_b_negate; + + always_comb begin + unique case (operator_i) + // Logic-with-negate OPs (RV32B Ops) + ALU_XNOR, + ALU_ORN, + ALU_ANDN: bwlogic_op_b_negate = (RV32B != RV32BNone) ? 1'b1 : 1'b0; + ALU_CMIX: bwlogic_op_b_negate = (RV32B != RV32BNone) ? ~instr_first_cycle_i : 1'b0; + default: bwlogic_op_b_negate = 1'b0; + endcase + end + + assign bwlogic_operand_b = bwlogic_op_b_negate ? operand_b_neg[32:1] : operand_b_i; + + assign bwlogic_or_result = operand_a_i | bwlogic_operand_b; + assign bwlogic_and_result = operand_a_i & bwlogic_operand_b; + assign bwlogic_xor_result = operand_a_i ^ bwlogic_operand_b; + + assign bwlogic_or = (operator_i == ALU_OR) | (operator_i == ALU_ORN); + assign bwlogic_and = (operator_i == ALU_AND) | (operator_i == ALU_ANDN); + + always_comb begin + unique case (1'b1) + bwlogic_or: bwlogic_result = bwlogic_or_result; + bwlogic_and: bwlogic_result = bwlogic_and_result; + default: bwlogic_result = bwlogic_xor_result; + endcase + end + + logic [5:0] bitcnt_result; + logic [31:0] minmax_result; + logic [31:0] pack_result; + logic [31:0] sext_result; + logic [31:0] singlebit_result; + logic [31:0] rev_result; + logic [31:0] shuffle_result; + logic [31:0] xperm_result; + logic [31:0] butterfly_result; + logic [31:0] invbutterfly_result; + logic [31:0] clmul_result; + logic [31:0] multicycle_result; + + if (RV32B != RV32BNone) begin : g_alu_rvb + + ///////////////// + // Bitcounting // + ///////////////// + + // The bit-counter structure computes the number of set bits in its operand. Partial results + // (from left to right) are needed to compute the control masks for computation of + // bcompress/bdecompress by the butterfly network, if implemented. + // For cpop, clz and ctz, only the end result is used. + + logic zbe_op; + logic bitcnt_ctz; + logic bitcnt_clz; + logic bitcnt_cz; + logic [31:0] bitcnt_bits; + logic [31:0] bitcnt_mask_op; + logic [31:0] bitcnt_bit_mask; + logic [ 5:0] bitcnt_partial [32]; + logic [31:0] bitcnt_partial_lsb_d; + logic [31:0] bitcnt_partial_msb_d; + + + assign bitcnt_ctz = operator_i == ALU_CTZ; + assign bitcnt_clz = operator_i == ALU_CLZ; + assign bitcnt_cz = bitcnt_ctz | bitcnt_clz; + assign bitcnt_result = bitcnt_partial[31]; + + // Bit-mask generation for clz and ctz: + // The bit mask is generated by spreading the lowest-order set bit in the operand to all + // higher order bits. The resulting mask is inverted to cover the lowest order zeros. In order + // to create the bit mask for leading zeros, the input operand needs to be reversed. + assign bitcnt_mask_op = bitcnt_clz ? operand_a_rev : operand_a_i; + + always_comb begin + bitcnt_bit_mask = bitcnt_mask_op; + bitcnt_bit_mask |= bitcnt_bit_mask << 1; + bitcnt_bit_mask |= bitcnt_bit_mask << 2; + bitcnt_bit_mask |= bitcnt_bit_mask << 4; + bitcnt_bit_mask |= bitcnt_bit_mask << 8; + bitcnt_bit_mask |= bitcnt_bit_mask << 16; + bitcnt_bit_mask = ~bitcnt_bit_mask; + end + + assign zbe_op = (operator_i == ALU_BCOMPRESS) | (operator_i == ALU_BDECOMPRESS); + + always_comb begin + case (1'b1) + zbe_op: bitcnt_bits = operand_b_i; + bitcnt_cz: bitcnt_bits = bitcnt_bit_mask & ~bitcnt_mask_op; // clz / ctz + default: bitcnt_bits = operand_a_i; // cpop + endcase + end + + // The parallel prefix counter is of the structure of a Brent-Kung Adder. In the first + // log2(width) stages, the sum of the n preceding bit lines is computed for the bit lines at + // positions 2**n-1 (power-of-two positions) where n denotes the current stage. + // In stage n=log2(width), the count for position width-1 (the MSB) is finished. + // For the intermediate values, an inverse adder tree then computes the bit counts for the bit + // lines at positions + // m = 2**(n-1) + i*2**(n-2), where i = [1 ... width / 2**(n-1)-1] and n = [log2(width) ... 2]. + // Thus, at every subsequent stage the result of two previously unconnected sub-trees is + // summed, starting at the node summing bits [width/2-1 : 0] and [3*width/4-1: width/2] + // and moving to iteratively sum up all the sub-trees. + // The inverse adder tree thus features log2(width) - 1 stages the first of these stages is a + // single addition at position 3*width/4 - 1. It does not interfere with the last + // stage of the primary adder tree. These stages can thus be folded together, resulting in a + // total of 2*log2(width)-2 stages. + // For more details refer to R. Brent, H. T. Kung, "A Regular Layout for Parallel Adders", + // (1982). + // For a bitline at position p, only bits + // bitcnt_partial[max(i, such that p % log2(i) == 0)-1 : 0] are needed for generation of the + // butterfly network control signals. The adders in the intermediate value adder tree thus need + // not be full 5-bit adders. We leave the optimization to the synthesis tools. + // + // Consider the following 8-bit example for illustraton. + // + // let bitcnt_bits = 8'babcdefgh. + // + // a b c d e f g h + // | /: | /: | /: | /: + // |/ : |/ : |/ : |/ : + // stage 1: + : + : + : + : + // | : /: : | : /: : + // |,--+ : : |,--+ : : + // stage 2: + : : : + : : : + // | : | : /: : : : + // |,-----,--+ : : : : ^-primary adder tree + // stage 3: + : + : : : : : ------------------------- + // : | /| /| /| /| /| : ,-intermediate adder tree + // : |/ |/ |/ |/ |/ : : + // stage 4 : + + + + + : : + // : : : : : : : : + // bitcnt_partial[i] 7 6 5 4 3 2 1 0 + + always_comb begin + bitcnt_partial = '{default: '0}; + // stage 1 + for (int unsigned i = 1; i < 32; i += 2) begin + bitcnt_partial[i] = {5'h0, bitcnt_bits[i]} + {5'h0, bitcnt_bits[i-1]}; + end + // stage 2 + for (int unsigned i = 3; i < 32; i += 4) begin + bitcnt_partial[i] = bitcnt_partial[i-2] + bitcnt_partial[i]; + end + // stage 3 + for (int unsigned i = 7; i < 32; i += 8) begin + bitcnt_partial[i] = bitcnt_partial[i-4] + bitcnt_partial[i]; + end + // stage 4 + for (int unsigned i = 15; i < 32; i += 16) begin + bitcnt_partial[i] = bitcnt_partial[i-8] + bitcnt_partial[i]; + end + // stage 5 + bitcnt_partial[31] = bitcnt_partial[15] + bitcnt_partial[31]; + // ^- primary adder tree + // ------------------------------- + // ,-intermediate value adder tree + bitcnt_partial[23] = bitcnt_partial[15] + bitcnt_partial[23]; + + // stage 6 + for (int unsigned i = 11; i < 32; i += 8) begin + bitcnt_partial[i] = bitcnt_partial[i-4] + bitcnt_partial[i]; + end + + // stage 7 + for (int unsigned i = 5; i < 32; i += 4) begin + bitcnt_partial[i] = bitcnt_partial[i-2] + bitcnt_partial[i]; + end + // stage 8 + bitcnt_partial[0] = {5'h0, bitcnt_bits[0]}; + for (int unsigned i = 2; i < 32; i += 2) begin + bitcnt_partial[i] = bitcnt_partial[i-1] + {5'h0, bitcnt_bits[i]}; + end + end + + /////////////// + // Min / Max // + /////////////// + + assign minmax_result = cmp_result ? operand_a_i : operand_b_i; + + ////////// + // Pack // + ////////// + + logic packu; + logic packh; + assign packu = operator_i == ALU_PACKU; + assign packh = operator_i == ALU_PACKH; + + always_comb begin + unique case (1'b1) + packu: pack_result = {operand_b_i[31:16], operand_a_i[31:16]}; + packh: pack_result = {16'h0, operand_b_i[7:0], operand_a_i[7:0]}; + default: pack_result = {operand_b_i[15:0], operand_a_i[15:0]}; + endcase + end + + ////////// + // Sext // + ////////// + + assign sext_result = (operator_i == ALU_SEXTB) ? + { {24{operand_a_i[7]}}, operand_a_i[7:0]} : { {16{operand_a_i[15]}}, operand_a_i[15:0]}; + + ///////////////////////////// + // Single-bit Instructions // + ///////////////////////////// + + always_comb begin + unique case (operator_i) + ALU_BSET: singlebit_result = operand_a_i | shift_result; + ALU_BCLR: singlebit_result = operand_a_i & ~shift_result; + ALU_BINV: singlebit_result = operand_a_i ^ shift_result; + default: singlebit_result = {31'h0, shift_result[0]}; // ALU_BEXT + endcase + end + + //////////////////////////////////// + // General Reverse and Or-combine // + //////////////////////////////////// + + // Only a subset of the general reverse and or-combine instructions are implemented in the + // balanced version of the B extension. Currently rev8 (shift_amt = 5'b11000) and orc.b + // (shift_amt = 5'b00111) are supported in the base extension. + + logic [4:0] zbp_shift_amt; + logic gorc_op; + + assign gorc_op = (operator_i == ALU_GORC); + assign zbp_shift_amt[2:0] = + (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? shift_amt[2:0] : {3{shift_amt[0]}}; + assign zbp_shift_amt[4:3] = + (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? shift_amt[4:3] : {2{shift_amt[3]}}; + + always_comb begin + rev_result = operand_a_i; + + if (zbp_shift_amt[0]) begin + rev_result = (gorc_op ? rev_result : 32'h0) | + ((rev_result & 32'h5555_5555) << 1) | + ((rev_result & 32'haaaa_aaaa) >> 1); + end + + if (zbp_shift_amt[1]) begin + rev_result = (gorc_op ? rev_result : 32'h0) | + ((rev_result & 32'h3333_3333) << 2) | + ((rev_result & 32'hcccc_cccc) >> 2); + end + + if (zbp_shift_amt[2]) begin + rev_result = (gorc_op ? rev_result : 32'h0) | + ((rev_result & 32'h0f0f_0f0f) << 4) | + ((rev_result & 32'hf0f0_f0f0) >> 4); + end + + if (zbp_shift_amt[3]) begin + rev_result = ((RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) && + gorc_op ? rev_result : 32'h0) | + ((rev_result & 32'h00ff_00ff) << 8) | + ((rev_result & 32'hff00_ff00) >> 8); + end + + if (zbp_shift_amt[4]) begin + rev_result = ((RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) && + gorc_op ? rev_result : 32'h0) | + ((rev_result & 32'h0000_ffff) << 16) | + ((rev_result & 32'hffff_0000) >> 16); + end + end + + logic crc_hmode; + logic crc_bmode; + logic [31:0] clmul_result_rev; + + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin : gen_alu_rvb_otearlgrey_full + + ///////////////////////// + // Shuffle / Unshuffle // + ///////////////////////// + + localparam logic [31:0] SHUFFLE_MASK_L [4] = + '{32'h00ff_0000, 32'h0f00_0f00, 32'h3030_3030, 32'h4444_4444}; + localparam logic [31:0] SHUFFLE_MASK_R [4] = + '{32'h0000_ff00, 32'h00f0_00f0, 32'h0c0c_0c0c, 32'h2222_2222}; + + localparam logic [31:0] FLIP_MASK_L [4] = + '{32'h2200_1100, 32'h0044_0000, 32'h4411_0000, 32'h1100_0000}; + localparam logic [31:0] FLIP_MASK_R [4] = + '{32'h0088_0044, 32'h0000_2200, 32'h0000_8822, 32'h0000_0088}; + + logic [31:0] SHUFFLE_MASK_NOT [4]; + for(genvar i = 0; i < 4; i++) begin : gen_shuffle_mask_not + assign SHUFFLE_MASK_NOT[i] = ~(SHUFFLE_MASK_L[i] | SHUFFLE_MASK_R[i]); + end + + logic shuffle_flip; + assign shuffle_flip = operator_i == ALU_UNSHFL; + + logic [3:0] shuffle_mode; + + always_comb begin + shuffle_result = operand_a_i; + + if (shuffle_flip) begin + shuffle_mode[3] = shift_amt[0]; + shuffle_mode[2] = shift_amt[1]; + shuffle_mode[1] = shift_amt[2]; + shuffle_mode[0] = shift_amt[3]; + end else begin + shuffle_mode = shift_amt[3:0]; + end + + if (shuffle_flip) begin + shuffle_result = (shuffle_result & 32'h8822_4411) | + ((shuffle_result << 6) & FLIP_MASK_L[0]) | + ((shuffle_result >> 6) & FLIP_MASK_R[0]) | + ((shuffle_result << 9) & FLIP_MASK_L[1]) | + ((shuffle_result >> 9) & FLIP_MASK_R[1]) | + ((shuffle_result << 15) & FLIP_MASK_L[2]) | + ((shuffle_result >> 15) & FLIP_MASK_R[2]) | + ((shuffle_result << 21) & FLIP_MASK_L[3]) | + ((shuffle_result >> 21) & FLIP_MASK_R[3]); + end + + if (shuffle_mode[3]) begin + shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[0]) | + (((shuffle_result << 8) & SHUFFLE_MASK_L[0]) | + ((shuffle_result >> 8) & SHUFFLE_MASK_R[0])); + end + if (shuffle_mode[2]) begin + shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[1]) | + (((shuffle_result << 4) & SHUFFLE_MASK_L[1]) | + ((shuffle_result >> 4) & SHUFFLE_MASK_R[1])); + end + if (shuffle_mode[1]) begin + shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[2]) | + (((shuffle_result << 2) & SHUFFLE_MASK_L[2]) | + ((shuffle_result >> 2) & SHUFFLE_MASK_R[2])); + end + if (shuffle_mode[0]) begin + shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[3]) | + (((shuffle_result << 1) & SHUFFLE_MASK_L[3]) | + ((shuffle_result >> 1) & SHUFFLE_MASK_R[3])); + end + + if (shuffle_flip) begin + shuffle_result = (shuffle_result & 32'h8822_4411) | + ((shuffle_result << 6) & FLIP_MASK_L[0]) | + ((shuffle_result >> 6) & FLIP_MASK_R[0]) | + ((shuffle_result << 9) & FLIP_MASK_L[1]) | + ((shuffle_result >> 9) & FLIP_MASK_R[1]) | + ((shuffle_result << 15) & FLIP_MASK_L[2]) | + ((shuffle_result >> 15) & FLIP_MASK_R[2]) | + ((shuffle_result << 21) & FLIP_MASK_L[3]) | + ((shuffle_result >> 21) & FLIP_MASK_R[3]); + end + end + + ////////////// + // Crossbar // + ////////////// + // The crossbar permutation instructions xperm.[nbh] (Zbp) can be implemented using 8 + // parallel 4-bit-wide, 8-input crossbars. Basically, we permute the 8 nibbles of operand_a_i + // based on operand_b_i. + + // Generate selector indices and valid signals. + // - sel_n[x] indicates which nibble of operand_a_i is selected for output nibble x. + // - vld_n[x] indicates if the selection is valid. + logic [7:0][2:0] sel_n; // nibbles + logic [7:0] vld_n; // nibbles + logic [3:0][1:0] sel_b; // bytes + logic [3:0] vld_b; // bytes + logic [1:0][0:0] sel_h; // half words + logic [1:0] vld_h; // half words + + // Per nibble, 3 bits are needed for the selection. Other bits must be zero. + // sel_n bit mask: 32'b0111_0111_0111_0111_0111_0111_0111_0111 + // vld_n bit mask: 32'b1000_1000_1000_1000_1000_1000_1000_1000 + for (genvar i = 0; i < 8; i++) begin : gen_sel_vld_n + assign sel_n[i] = operand_b_i[i*4 +: 3]; + assign vld_n[i] = ~|operand_b_i[i*4 + 3 +: 1]; + end + + // Per byte, 2 bits are needed for the selection. Other bits must be zero. + // sel_b bit mask: 32'b0000_0011_0000_0011_0000_0011_0000_0011 + // vld_b bit mask: 32'b1111_1100_1111_1100_1111_1100_1111_1100 + for (genvar i = 0; i < 4; i++) begin : gen_sel_vld_b + assign sel_b[i] = operand_b_i[i*8 +: 2]; + assign vld_b[i] = ~|operand_b_i[i*8 + 2 +: 6]; + end + + // Per half word, 1 bit is needed for the selection only. All other bits must be zero. + // sel_h bit mask: 32'b0000_0000_0000_0001_0000_0000_0000_0001 + // vld_h bit mask: 32'b1111_1111_1111_1110_1111_1111_1111_1110 + for (genvar i = 0; i < 2; i++) begin : gen_sel_vld_h + assign sel_h[i] = operand_b_i[i*16 +: 1]; + assign vld_h[i] = ~|operand_b_i[i*16 + 1 +: 15]; + end + + // Convert selector indices and valid signals to control the nibble-based + // crossbar logic. + logic [7:0][2:0] sel; + logic [7:0] vld; + always_comb begin + unique case (operator_i) + ALU_XPERM_N: begin + // No conversion needed. + sel = sel_n; + vld = vld_n; + end + + ALU_XPERM_B: begin + // Convert byte to nibble indicies. + for (int b = 0; b < 4; b++) begin + sel[b*2 + 0] = {sel_b[b], 1'b0}; + sel[b*2 + 1] = {sel_b[b], 1'b1}; + vld[b*2 +: 2] = {2{vld_b[b]}}; + end + end + + ALU_XPERM_H: begin + // Convert half-word to nibble indices. + for (int h = 0; h < 2; h++) begin + sel[h*4 + 0] = {sel_h[h], 2'b00}; + sel[h*4 + 1] = {sel_h[h], 2'b01}; + sel[h*4 + 2] = {sel_h[h], 2'b10}; + sel[h*4 + 3] = {sel_h[h], 2'b11}; + vld[h*4 +: 4] = {4{vld_h[h]}}; + end + end + + default: begin + // Tie valid to zero to disable the crossbar unless we need it. + sel = sel_n; + vld = '0; + end + endcase + end + + // The actual nibble-based crossbar logic. + logic [7:0][3:0] val_n; + logic [7:0][3:0] xperm_n; + assign val_n = operand_a_i; + for (genvar i = 0; i < 8; i++) begin : gen_xperm_n + assign xperm_n[i] = vld[i] ? val_n[sel[i]] : '0; + end + assign xperm_result = xperm_n; + + /////////////////////////////////////////////////// + // Carry-less Multiply + Cyclic Redundancy Check // + /////////////////////////////////////////////////// + + // Carry-less multiplication can be understood as multiplication based on + // the addition interpreted as the bit-wise xor operation. + // + // Example: 1101 X 1011 = 1111111: + // + // 1011 X 1101 + // ----------- + // 1101 + // xor 1101 + // --------- + // 10111 + // xor 0000 + // ---------- + // 010111 + // xor 1101 + // ----------- + // 1111111 + // + // Architectural details: + // A 32 x 32-bit array + // [ operand_b[i] ? (operand_a << i) : '0 for i in 0 ... 31 ] + // is generated. The entries of the array are pairwise 'xor-ed' + // together in a 5-stage binary tree. + // + // + // Cyclic Redundancy Check: + // + // CRC-32 (CRC-32/ISO-HDLC) and CRC-32C (CRC-32/ISCSI) are directly implemented. For + // documentation of the crc configuration (crc-polynomials, initialization, reflection, etc.) + // see http://reveng.sourceforge.net/crc-catalogue/all.htm + // A useful guide to crc arithmetic and algorithms is given here: + // http://www.piclist.com/techref/method/math/crcguide.html. + // + // The CRC operation solves the following equation using binary polynomial arithmetic: + // + // rev(rd)(x) = rev(rs1)(x) * x**n mod {1, P}(x) + // + // where P denotes lower 32 bits of the corresponding CRC polynomial, rev(a) the bit reversal + // of a, n = 8,16, or 32 for .b, .h, .w -variants. {a, b} denotes bit concatenation. + // + // Using barret reduction, one can show that + // + // M(x) mod P(x) = R(x) = + // (M(x) * x**n) & {deg(P(x)'{1'b1}}) ^ (M(x) x**-(deg(P(x) - n)) cx mu(x) cx P(x), + // + // Where mu(x) = polydiv(x**64, {1,P}) & 0xffffffff. Here, 'cx' refers to carry-less + // multiplication. Substituting rev(rd)(x) for R(x) and rev(rs1)(x) for M(x) and solving for + // rd(x) with P(x) a crc32 polynomial (deg(P(x)) = 32), we get + // + // rd = rev( (rev(rs1) << n) ^ ((rev(rs1) >> (32-n)) cx mu cx P) + // = (rs1 >> n) ^ rev(rev( (rs1 << (32-n)) cx rev(mu)) cx P) + // ^-- cycle 0--------------------^ + // ^- cycle 1 -------------------------------------------^ + // + // In the last step we used the fact that carry-less multiplication is bit-order agnostic: + // rev(a cx b) = rev(a) cx rev(b). + + logic clmul_rmode; + logic clmul_hmode; + logic [31:0] clmul_op_a; + logic [31:0] clmul_op_b; + logic [31:0] operand_b_rev; + logic [31:0] clmul_and_stage[32]; + logic [31:0] clmul_xor_stage1[16]; + logic [31:0] clmul_xor_stage2[8]; + logic [31:0] clmul_xor_stage3[4]; + logic [31:0] clmul_xor_stage4[2]; + + logic [31:0] clmul_result_raw; + + for (genvar i = 0; i < 32; i++) begin : gen_rev_operand_b + assign operand_b_rev[i] = operand_b_i[31-i]; + end + + assign clmul_rmode = operator_i == ALU_CLMULR; + assign clmul_hmode = operator_i == ALU_CLMULH; + + // CRC + localparam logic [31:0] CRC32_POLYNOMIAL = 32'h04c1_1db7; + localparam logic [31:0] CRC32_MU_REV = 32'hf701_1641; + + localparam logic [31:0] CRC32C_POLYNOMIAL = 32'h1edc_6f41; + localparam logic [31:0] CRC32C_MU_REV = 32'hdea7_13f1; + + logic crc_op; + + logic crc_cpoly; + + logic [31:0] crc_operand; + logic [31:0] crc_poly; + logic [31:0] crc_mu_rev; + + assign crc_op = (operator_i == ALU_CRC32C_W) | (operator_i == ALU_CRC32_W) | + (operator_i == ALU_CRC32C_H) | (operator_i == ALU_CRC32_H) | + (operator_i == ALU_CRC32C_B) | (operator_i == ALU_CRC32_B); + + assign crc_cpoly = (operator_i == ALU_CRC32C_W) | + (operator_i == ALU_CRC32C_H) | + (operator_i == ALU_CRC32C_B); + + assign crc_hmode = (operator_i == ALU_CRC32_H) | (operator_i == ALU_CRC32C_H); + assign crc_bmode = (operator_i == ALU_CRC32_B) | (operator_i == ALU_CRC32C_B); + + assign crc_poly = crc_cpoly ? CRC32C_POLYNOMIAL : CRC32_POLYNOMIAL; + assign crc_mu_rev = crc_cpoly ? CRC32C_MU_REV : CRC32_MU_REV; + + always_comb begin + unique case (1'b1) + crc_bmode: crc_operand = {operand_a_i[7:0], 24'h0}; + crc_hmode: crc_operand = {operand_a_i[15:0], 16'h0}; + default: crc_operand = operand_a_i; + endcase + end + + // Select clmul input + always_comb begin + if (crc_op) begin + clmul_op_a = instr_first_cycle_i ? crc_operand : imd_val_q_i[0]; + clmul_op_b = instr_first_cycle_i ? crc_mu_rev : crc_poly; + end else begin + clmul_op_a = clmul_rmode | clmul_hmode ? operand_a_rev : operand_a_i; + clmul_op_b = clmul_rmode | clmul_hmode ? operand_b_rev : operand_b_i; + end + end + + for (genvar i = 0; i < 32; i++) begin : gen_clmul_and_op + assign clmul_and_stage[i] = clmul_op_b[i] ? clmul_op_a << i : '0; + end + + for (genvar i = 0; i < 16; i++) begin : gen_clmul_xor_op_l1 + assign clmul_xor_stage1[i] = clmul_and_stage[2*i] ^ clmul_and_stage[2*i+1]; + end + + for (genvar i = 0; i < 8; i++) begin : gen_clmul_xor_op_l2 + assign clmul_xor_stage2[i] = clmul_xor_stage1[2*i] ^ clmul_xor_stage1[2*i+1]; + end + + for (genvar i = 0; i < 4; i++) begin : gen_clmul_xor_op_l3 + assign clmul_xor_stage3[i] = clmul_xor_stage2[2*i] ^ clmul_xor_stage2[2*i+1]; + end + + for (genvar i = 0; i < 2; i++) begin : gen_clmul_xor_op_l4 + assign clmul_xor_stage4[i] = clmul_xor_stage3[2*i] ^ clmul_xor_stage3[2*i+1]; + end + + assign clmul_result_raw = clmul_xor_stage4[0] ^ clmul_xor_stage4[1]; + + for (genvar i = 0; i < 32; i++) begin : gen_rev_clmul_result + assign clmul_result_rev[i] = clmul_result_raw[31-i]; + end + + // clmulr_result = rev(clmul(rev(a), rev(b))) + // clmulh_result = clmulr_result >> 1 + always_comb begin + case (1'b1) + clmul_rmode: clmul_result = clmul_result_rev; + clmul_hmode: clmul_result = {1'b0, clmul_result_rev[31:1]}; + default: clmul_result = clmul_result_raw; + endcase + end + end else begin : gen_alu_rvb_not_otearlgrey_full + assign shuffle_result = '0; + assign xperm_result = '0; + assign clmul_result = '0; + // support signals + assign clmul_result_rev = '0; + assign crc_bmode = '0; + assign crc_hmode = '0; + end + + if (RV32B == RV32BFull) begin : gen_alu_rvb_full + + /////////////// + // Butterfly // + /////////////// + + // The butterfly / inverse butterfly network executing bcompress/bdecompress (zbe) + // instructions. For bdecompress, the control bits mask of a local left region is generated + // by the inverse of a n-bit left rotate and complement upon wrap (LROTC) operation by the + // number of ones in the deposit bitmask to the right of the segment. n hereby denotes the + // width of the according segment. The bitmask for a pertaining local right region is equal + // to the corresponding local left region. Bcompress uses an analogue inverse process. + // Consider the following 8-bit example. For details, see Hilewitz et al. "Fast Bit Gather, + // Bit Scatter and Bit Permuation Instructions for Commodity Microprocessors", (2008). + // + // The bcompress/bdecompress instructions are completed in 2 cycles. In the first cycle, the + // control bitmask is prepared by executing the parallel prefix bit count. In the second + // cycle, the bit swapping is executed according to the control masks. + + // 8-bit example: (Hilewitz et al.) + // Consider the instruction bdecompress operand_a_i deposit_mask + // Let operand_a_i = 8'babcd_efgh + // deposit_mask = 8'b1010_1101 + // + // control bitmask for stage 1: + // - number of ones in the right half of the deposit bitmask: 3 + // - width of the segment: 4 + // - control bitmask = ~LROTC(4'b0, 3)[3:0] = 4'b1000 + // + // control bitmask: c3 c2 c1 c0 c3 c2 c1 c0 + // 1 0 0 0 1 0 0 0 + // <- L -----> <- R -----> + // operand_a_i a b c d e f g h + // :\ | | | /: | | | + // : +|---|--|-+ : | | | + // :/ | | | \: | | | + // stage 1 e b c d a f g h + // <L-> <R-> <L-> <R-> + // control bitmask: c3 c2 c3 c2 c1 c0 c1 c0 + // 1 1 1 1 1 0 1 0 + // :\ :\ /: /: :\ | /: | + // : +:-+-:+ : : +|-+ : | + // :/ :/ \: \: :/ | \: | + // stage 2 c d e b g f a h + // L R L R L R L R + // control bitmask: c3 c3 c2 c2 c1 c1 c0 c0 + // 1 1 0 0 1 1 0 0 + // :\/: | | :\/: | | + // : : | | : : | | + // :/\: | | :/\: | | + // stage 3 d c e b f g a h + // & deposit bitmask: 1 0 1 0 1 1 0 1 + // result: d 0 e 0 f g 0 h + + logic [ 5:0] bitcnt_partial_q [32]; + + // first cycle + // Store partial bitcnts + for (genvar i = 0; i < 32; i++) begin : gen_bitcnt_reg_in_lsb + assign bitcnt_partial_lsb_d[i] = bitcnt_partial[i][0]; + end + + for (genvar i = 0; i < 16; i++) begin : gen_bitcnt_reg_in_b1 + assign bitcnt_partial_msb_d[i] = bitcnt_partial[2*i+1][1]; + end + + for (genvar i = 0; i < 8; i++) begin : gen_bitcnt_reg_in_b2 + assign bitcnt_partial_msb_d[16+i] = bitcnt_partial[4*i+3][2]; + end + + for (genvar i = 0; i < 4; i++) begin : gen_bitcnt_reg_in_b3 + assign bitcnt_partial_msb_d[24+i] = bitcnt_partial[8*i+7][3]; + end + + for (genvar i = 0; i < 2; i++) begin : gen_bitcnt_reg_in_b4 + assign bitcnt_partial_msb_d[28+i] = bitcnt_partial[16*i+15][4]; + end + + assign bitcnt_partial_msb_d[30] = bitcnt_partial[31][5]; + assign bitcnt_partial_msb_d[31] = 1'b0; // unused + + // Second cycle + // Load partial bitcnts + always_comb begin + bitcnt_partial_q = '{default: '0}; + + for (int unsigned i = 0; i < 32; i++) begin : gen_bitcnt_reg_out_lsb + bitcnt_partial_q[i][0] = imd_val_q_i[0][i]; + end + + for (int unsigned i = 0; i < 16; i++) begin : gen_bitcnt_reg_out_b1 + bitcnt_partial_q[2*i+1][1] = imd_val_q_i[1][i]; + end + + for (int unsigned i = 0; i < 8; i++) begin : gen_bitcnt_reg_out_b2 + bitcnt_partial_q[4*i+3][2] = imd_val_q_i[1][16+i]; + end + + for (int unsigned i = 0; i < 4; i++) begin : gen_bitcnt_reg_out_b3 + bitcnt_partial_q[8*i+7][3] = imd_val_q_i[1][24+i]; + end + + for (int unsigned i = 0; i < 2; i++) begin : gen_bitcnt_reg_out_b4 + bitcnt_partial_q[16*i+15][4] = imd_val_q_i[1][28+i]; + end + + bitcnt_partial_q[31][5] = imd_val_q_i[1][30]; + end + + logic [31:0] butterfly_mask_l[5]; + logic [31:0] butterfly_mask_r[5]; + logic [31:0] butterfly_mask_not[5]; + logic [31:0] lrotc_stage [5]; // left rotate and complement upon wrap + + // number of bits in local r = 32 / 2**(stage + 1) = 16/2**stage + `define _N(stg) (16 >> stg) + + // bcompress / bdecompress control bit generation + for (genvar stg = 0; stg < 5; stg++) begin : gen_butterfly_ctrl_stage + // number of segs: 2** stg + for (genvar seg=0; seg<2**stg; seg++) begin : gen_butterfly_ctrl + + assign lrotc_stage[stg][2*`_N(stg)*(seg+1)-1 : 2*`_N(stg)*seg] = + {{`_N(stg){1'b0}},{`_N(stg){1'b1}}} << + bitcnt_partial_q[`_N(stg)*(2*seg+1)-1][$clog2(`_N(stg)):0]; + + assign butterfly_mask_l[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)] + = ~lrotc_stage[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)]; + + assign butterfly_mask_r[stg][`_N(stg)*(2*seg+1)-1 : `_N(stg)*(2*seg)] + = ~lrotc_stage[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)]; + + assign butterfly_mask_l[stg][`_N(stg)*(2*seg+1)-1 : `_N(stg)*(2*seg)] = '0; + assign butterfly_mask_r[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)] = '0; + end + end + `undef _N + + for (genvar stg = 0; stg < 5; stg++) begin : gen_butterfly_not + assign butterfly_mask_not[stg] = + ~(butterfly_mask_l[stg] | butterfly_mask_r[stg]); + end + + always_comb begin + butterfly_result = operand_a_i; + + butterfly_result = butterfly_result & butterfly_mask_not[0] | + ((butterfly_result & butterfly_mask_l[0]) >> 16)| + ((butterfly_result & butterfly_mask_r[0]) << 16); + + butterfly_result = butterfly_result & butterfly_mask_not[1] | + ((butterfly_result & butterfly_mask_l[1]) >> 8)| + ((butterfly_result & butterfly_mask_r[1]) << 8); + + butterfly_result = butterfly_result & butterfly_mask_not[2] | + ((butterfly_result & butterfly_mask_l[2]) >> 4)| + ((butterfly_result & butterfly_mask_r[2]) << 4); + + butterfly_result = butterfly_result & butterfly_mask_not[3] | + ((butterfly_result & butterfly_mask_l[3]) >> 2)| + ((butterfly_result & butterfly_mask_r[3]) << 2); + + butterfly_result = butterfly_result & butterfly_mask_not[4] | + ((butterfly_result & butterfly_mask_l[4]) >> 1)| + ((butterfly_result & butterfly_mask_r[4]) << 1); + + butterfly_result = butterfly_result & operand_b_i; + end + + always_comb begin + invbutterfly_result = operand_a_i & operand_b_i; + + invbutterfly_result = invbutterfly_result & butterfly_mask_not[4] | + ((invbutterfly_result & butterfly_mask_l[4]) >> 1)| + ((invbutterfly_result & butterfly_mask_r[4]) << 1); + + invbutterfly_result = invbutterfly_result & butterfly_mask_not[3] | + ((invbutterfly_result & butterfly_mask_l[3]) >> 2)| + ((invbutterfly_result & butterfly_mask_r[3]) << 2); + + invbutterfly_result = invbutterfly_result & butterfly_mask_not[2] | + ((invbutterfly_result & butterfly_mask_l[2]) >> 4)| + ((invbutterfly_result & butterfly_mask_r[2]) << 4); + + invbutterfly_result = invbutterfly_result & butterfly_mask_not[1] | + ((invbutterfly_result & butterfly_mask_l[1]) >> 8)| + ((invbutterfly_result & butterfly_mask_r[1]) << 8); + + invbutterfly_result = invbutterfly_result & butterfly_mask_not[0] | + ((invbutterfly_result & butterfly_mask_l[0]) >> 16)| + ((invbutterfly_result & butterfly_mask_r[0]) << 16); + end + end else begin : gen_alu_rvb_not_full + logic [31:0] unused_imd_val_q_1; + assign unused_imd_val_q_1 = imd_val_q_i[1]; + assign butterfly_result = '0; + assign invbutterfly_result = '0; + // support signals + assign bitcnt_partial_lsb_d = '0; + assign bitcnt_partial_msb_d = '0; + end + + ////////////////////////////////////// + // Multicycle Bitmanip Instructions // + ////////////////////////////////////// + // Ternary instructions + Shift Rotations + Bit Compress/Decompress + CRC + // For ternary instructions (zbt), operand_a_i is tied to rs1 in the first cycle and rs3 in the + // second cycle. operand_b_i is always tied to rs2. + + always_comb begin + unique case (operator_i) + ALU_CMOV: begin + multicycle_result = (operand_b_i == 32'h0) ? operand_a_i : imd_val_q_i[0]; + imd_val_d_o = '{operand_a_i, 32'h0}; + if (instr_first_cycle_i) begin + imd_val_we_o = 2'b01; + end else begin + imd_val_we_o = 2'b00; + end + end + + ALU_CMIX: begin + multicycle_result = imd_val_q_i[0] | bwlogic_and_result; + imd_val_d_o = '{bwlogic_and_result, 32'h0}; + if (instr_first_cycle_i) begin + imd_val_we_o = 2'b01; + end else begin + imd_val_we_o = 2'b00; + end + end + + ALU_FSR, ALU_FSL, + ALU_ROL, ALU_ROR: begin + if (shift_amt[4:0] == 5'h0) begin + multicycle_result = shift_amt[5] ? operand_a_i : imd_val_q_i[0]; + end else begin + multicycle_result = imd_val_q_i[0] | shift_result; + end + imd_val_d_o = '{shift_result, 32'h0}; + if (instr_first_cycle_i) begin + imd_val_we_o = 2'b01; + end else begin + imd_val_we_o = 2'b00; + end + end + + ALU_CRC32_W, ALU_CRC32C_W, + ALU_CRC32_H, ALU_CRC32C_H, + ALU_CRC32_B, ALU_CRC32C_B: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin + unique case (1'b1) + crc_bmode: multicycle_result = clmul_result_rev ^ (operand_a_i >> 8); + crc_hmode: multicycle_result = clmul_result_rev ^ (operand_a_i >> 16); + default: multicycle_result = clmul_result_rev; + endcase + imd_val_d_o = '{clmul_result_rev, 32'h0}; + if (instr_first_cycle_i) begin + imd_val_we_o = 2'b01; + end else begin + imd_val_we_o = 2'b00; + end + end else begin + imd_val_d_o = '{operand_a_i, 32'h0}; + imd_val_we_o = 2'b00; + multicycle_result = '0; + end + end + + ALU_BCOMPRESS, ALU_BDECOMPRESS: begin + if (RV32B == RV32BFull) begin + multicycle_result = (operator_i == ALU_BDECOMPRESS) ? butterfly_result : + invbutterfly_result; + imd_val_d_o = '{bitcnt_partial_lsb_d, bitcnt_partial_msb_d}; + if (instr_first_cycle_i) begin + imd_val_we_o = 2'b11; + end else begin + imd_val_we_o = 2'b00; + end + end else begin + imd_val_d_o = '{operand_a_i, 32'h0}; + imd_val_we_o = 2'b00; + multicycle_result = '0; + end + end + + default: begin + imd_val_d_o = '{operand_a_i, 32'h0}; + imd_val_we_o = 2'b00; + multicycle_result = '0; + end + endcase + end + + + end else begin : g_no_alu_rvb + logic [31:0] unused_imd_val_q[2]; + assign unused_imd_val_q = imd_val_q_i; + logic [31:0] unused_butterfly_result; + assign unused_butterfly_result = butterfly_result; + logic [31:0] unused_invbutterfly_result; + assign unused_invbutterfly_result = invbutterfly_result; + // RV32B result signals + assign bitcnt_result = '0; + assign minmax_result = '0; + assign pack_result = '0; + assign sext_result = '0; + assign singlebit_result = '0; + assign rev_result = '0; + assign shuffle_result = '0; + assign xperm_result = '0; + assign butterfly_result = '0; + assign invbutterfly_result = '0; + assign clmul_result = '0; + assign multicycle_result = '0; + // RV32B support signals + assign imd_val_d_o = '{default: '0}; + assign imd_val_we_o = '{default: '0}; + end + + //////////////// + // Result mux // + //////////////// + + always_comb begin + result_o = '0; + + unique case (operator_i) + // Bitwise Logic Operations (negate: RV32B) + ALU_XOR, ALU_XNOR, + ALU_OR, ALU_ORN, + ALU_AND, ALU_ANDN: result_o = bwlogic_result; + + // Adder Operations + ALU_ADD, ALU_SUB, + // RV32B + ALU_SH1ADD, ALU_SH2ADD, + ALU_SH3ADD: result_o = adder_result; + + // Shift Operations + ALU_SLL, ALU_SRL, + ALU_SRA, + // RV32B + ALU_SLO, ALU_SRO: result_o = shift_result; + + // Shuffle Operations (RV32B) + ALU_SHFL, ALU_UNSHFL: result_o = shuffle_result; + + // Crossbar Permutation Operations (RV32B) + ALU_XPERM_N, ALU_XPERM_B, ALU_XPERM_H: result_o = xperm_result; + + // Comparison Operations + ALU_EQ, ALU_NE, + ALU_GE, ALU_GEU, + ALU_LT, ALU_LTU, + ALU_SLT, ALU_SLTU: result_o = {31'h0,cmp_result}; + + // MinMax Operations (RV32B) + ALU_MIN, ALU_MAX, + ALU_MINU, ALU_MAXU: result_o = minmax_result; + + // Bitcount Operations (RV32B) + ALU_CLZ, ALU_CTZ, + ALU_CPOP: result_o = {26'h0, bitcnt_result}; + + // Pack Operations (RV32B) + ALU_PACK, ALU_PACKH, + ALU_PACKU: result_o = pack_result; + + // Sign-Extend (RV32B) + ALU_SEXTB, ALU_SEXTH: result_o = sext_result; + + // Ternary Bitmanip Operations (RV32B) + ALU_CMIX, ALU_CMOV, + ALU_FSL, ALU_FSR, + // Rotate Shift (RV32B) + ALU_ROL, ALU_ROR, + // Cyclic Redundancy Checks (RV32B) + ALU_CRC32_W, ALU_CRC32C_W, + ALU_CRC32_H, ALU_CRC32C_H, + ALU_CRC32_B, ALU_CRC32C_B, + // Bit Compress / Decompress (RV32B) + ALU_BCOMPRESS, ALU_BDECOMPRESS: result_o = multicycle_result; + + // Single-Bit Bitmanip Operations (RV32B) + ALU_BSET, ALU_BCLR, + ALU_BINV, ALU_BEXT: result_o = singlebit_result; + + // General Reverse / Or-combine (RV32B) + ALU_GREV, ALU_GORC: result_o = rev_result; + + // Bit Field Place (RV32B) + ALU_BFP: result_o = bfp_result; + + // Carry-less Multiply Operations (RV32B) + ALU_CLMUL, ALU_CLMULR, + ALU_CLMULH: result_o = clmul_result; + + default: ; + endcase + end + + logic unused_shift_amt_compl; + assign unused_shift_amt_compl = shift_amt_compl[5]; + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_branch_predict.sv b/hw/ip/cheriot-ibex/rtl/cheriot_branch_predict.sv new file mode 100644 index 0000000..e99089b --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_branch_predict.sv
@@ -0,0 +1,100 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Branch Predictor + * + * This implements static branch prediction. It takes an instruction and its PC and determines if + * it's a branch or a jump and calculates its target. For jumps it will always predict taken. For + * branches it will predict taken if the PC offset is negative. + * + * This handles both compressed and uncompressed instructions. Compressed instructions must be in + * the lower 16-bits of instr. + * + * The predictor is entirely combinational but takes clk/rst_n signals for use by assertions. + */ + +`include "prim_assert.sv" + +module cheriot_branch_predict ( + input logic clk_i, + input logic rst_ni, + + // Instruction from fetch stage + input logic [31:0] fetch_rdata_i, + input logic [31:0] fetch_pc_i, + input logic fetch_valid_i, + + // Prediction for supplied instruction + output logic predict_branch_taken_o, + output logic [31:0] predict_branch_pc_o +); + import cheriot_pkg::*; + + logic [31:0] imm_j_type; + logic [31:0] imm_b_type; + logic [31:0] imm_cj_type; + logic [31:0] imm_cb_type; + + logic [31:0] branch_imm; + + logic [31:0] instr; + + logic instr_j; + logic instr_b; + logic instr_cj; + logic instr_cb; + + logic instr_b_taken; + + // Provide short internal name for fetch_rdata_i due to reduce line wrapping + assign instr = fetch_rdata_i; + + // Extract and sign-extend to 32-bit the various immediates that may be used to calculate the + // target + + // Uncompressed immediates + assign imm_j_type = { {12{instr[31]}}, instr[19:12], instr[20], instr[30:21], 1'b0 }; + assign imm_b_type = { {19{instr[31]}}, instr[31], instr[7], instr[30:25], instr[11:8], 1'b0 }; + + // Compressed immediates + assign imm_cj_type = { {20{instr[12]}}, instr[12], instr[8], instr[10:9], instr[6], instr[7], + instr[2], instr[11], instr[5:3], 1'b0 }; + + assign imm_cb_type = { {23{instr[12]}}, instr[12], instr[6:5], instr[2], instr[11:10], + instr[4:3], 1'b0}; + + // Determine if the instruction is a branch or a jump + + // Uncompressed branch/jump + assign instr_b = opcode_e'(instr[6:0]) == OPCODE_BRANCH; + assign instr_j = opcode_e'(instr[6:0]) == OPCODE_JAL; + + // Compressed branch/jump + assign instr_cb = (instr[1:0] == 2'b01) & ((instr[15:13] == 3'b110) | (instr[15:13] == 3'b111)); + assign instr_cj = (instr[1:0] == 2'b01) & ((instr[15:13] == 3'b101) | (instr[15:13] == 3'b001)); + + // Select out the branch offset for target calculation based upon the instruction type + always_comb begin + branch_imm = imm_b_type; + + unique case (1'b1) + instr_j : branch_imm = imm_j_type; + instr_b : branch_imm = imm_b_type; + instr_cj : branch_imm = imm_cj_type; + instr_cb : branch_imm = imm_cb_type; + default : ; + endcase + end + + `ASSERT_IF(BranchInsTypeOneHot, $onehot0({instr_j, instr_b, instr_cj, instr_cb}), fetch_valid_i) + + // Determine branch prediction, taken if offset is negative + assign instr_b_taken = (instr_b & imm_b_type[31]) | (instr_cb & imm_cb_type[31]); + + // Always predict jumps taken otherwise take prediction from `instr_b_taken` + assign predict_branch_taken_o = fetch_valid_i & (instr_j | instr_cj | instr_b_taken); + // Calculate target + assign predict_branch_pc_o = fetch_pc_i + branch_imm; +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_compressed_decoder.sv b/hw/ip/cheriot-ibex/rtl/cheriot_compressed_decoder.sv new file mode 100644 index 0000000..1ebcf3c --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_compressed_decoder.sv
@@ -0,0 +1,362 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Compressed instruction decoder + * + * Decodes RISC-V compressed instructions into their RV32 equivalent. + * This module is fully combinatorial, clock and reset are used for + * assertions only. + */ + +`include "prim_assert.sv" + +module cheriot_compressed_decoder # ( + parameter bit CHERIoTEn = 1'b1 +) ( + input logic clk_i, + input logic rst_ni, + input logic valid_i, + input logic [31:0] instr_i, + input logic cheri_pmode_i, + output logic [31:0] instr_o, + output logic is_compressed_o, + output logic illegal_instr_o +); + import cheriot_pkg::*; + + // valid_i indicates if instr_i is valid and is used for assertions only. + // The following signal is used to avoid possible lint errors. + logic unused_valid; + assign unused_valid = valid_i; + + //////////////////////// + // Compressed decoder // + //////////////////////// + + always_comb begin + // By default, forward incoming instruction, mark it as legal. + instr_o = instr_i; + illegal_instr_o = 1'b0; + + // Check if incoming instruction is compressed. + unique case (instr_i[1:0]) + // C0 + 2'b00: begin + unique case (instr_i[15:13]) + 3'b000: begin + if (CHERIoTEn & cheri_pmode_i) + // c.incaddr4cspn -> cincoffsetimm cd', csp, imm + instr_o = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5], + instr_i[6], 2'b00, 5'h02, 3'b001, 2'b01, instr_i[4:2], {OPCODE_CHERI}}; + else + // c.addi4spn -> addi rd', x2, imm + instr_o = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5], + instr_i[6], 2'b00, 5'h02, 3'b000, 2'b01, instr_i[4:2], {OPCODE_OP_IMM}}; + if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1; + end + + 3'b010: begin + // c.lw -> lw rd', imm(rs1') + instr_o = {5'b0, instr_i[5], instr_i[12:10], instr_i[6], + 2'b00, 2'b01, instr_i[9:7], 3'b010, 2'b01, instr_i[4:2], {OPCODE_LOAD}}; + end + + 3'b011: begin + if (CHERIoTEn & cheri_pmode_i) begin + // CHERI: c.clc -> clc rd', imm(rs1'); reuse c.ld + instr_o = {4'b0, instr_i[6:5], instr_i[12:10], + 3'b000, 2'b01, instr_i[9:7], 3'b011, 2'b01, instr_i[4:2], {OPCODE_LOAD}}; + end else begin + instr_o = instr_i; + illegal_instr_o = 1'b1; + end + end + + 3'b110: begin + // c.sw -> sw rs2', imm(rs1') + instr_o = {5'b0, instr_i[5], instr_i[12], 2'b01, instr_i[4:2], + 2'b01, instr_i[9:7], 3'b010, instr_i[11:10], instr_i[6], + 2'b00, {OPCODE_STORE}}; + end + + 3'b001, + 3'b100, + 3'b101: begin + illegal_instr_o = 1'b1; + end + + 3'b111: begin + if (CHERIoTEn & cheri_pmode_i) begin + // CHERI: c.csc -> csc rs2', imm(rs1'); reuse c.sd + instr_o = {4'b0, instr_i[6:5], instr_i[12], 2'b01, instr_i[4:2], + 2'b01, instr_i[9:7], 3'b011, instr_i[11:10], 3'b000, {OPCODE_STORE}}; + end else begin + instr_o = instr_i; + illegal_instr_o = 1'b1; + end + + end + + default: begin + illegal_instr_o = 1'b1; + end + endcase + end + + // C1 + // + // Register address checks for RV32E are performed in the regular instruction decoder. + // If this check fails, an illegal instruction exception is triggered and the controller + // writes the actual faulting instruction to mtval. + 2'b01: begin + unique case (instr_i[15:13]) + 3'b000: begin + // c.addi -> addi rd, rd, nzimm + // c.nop + instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], + instr_i[11:7], 3'b0, instr_i[11:7], {OPCODE_OP_IMM}}; + end + + 3'b001, 3'b101: begin + // 001: c.jal -> jal x1, imm + // 101: c.j -> jal x0, imm + instr_o = {instr_i[12], instr_i[8], instr_i[10:9], instr_i[6], + instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], + {9 {instr_i[12]}}, 4'b0, ~instr_i[15], {OPCODE_JAL}}; + end + + 3'b010: begin + // c.li -> addi rd, x0, nzimm + // (c.li hints are translated into an addi hint) + instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0, + 3'b0, instr_i[11:7], {OPCODE_OP_IMM}}; + end + + 3'b011: begin + // c.lui -> lui rd, imm + // (c.lui hints are translated into a lui hint) + instr_o = {{15 {instr_i[12]}}, instr_i[6:2], instr_i[11:7], {OPCODE_LUI}}; + + // c.incaddr16csp -> cincoffsetimm csp, csp, nzimm + if (CHERIoTEn & cheri_pmode_i && (instr_i[11:7] == 5'h02)) begin + instr_o = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2], + instr_i[6], 4'b0, 5'h02, 3'b001, 5'h02, {OPCODE_CHERI}}; + end else if (instr_i[11:7] == 5'h02) begin + // c.addi16sp -> addi x2, x2, nzimm + instr_o = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2], + instr_i[6], 4'b0, 5'h02, 3'b000, 5'h02, {OPCODE_OP_IMM}}; + end + + if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; + end + + 3'b100: begin + unique case (instr_i[11:10]) + 2'b00, + 2'b01: begin + // 00: c.srli -> srli rd, rd, shamt + // 01: c.srai -> srai rd, rd, shamt + // (c.srli/c.srai hints are translated into a srli/srai hint) + instr_o = {1'b0, instr_i[10], 5'b0, instr_i[6:2], 2'b01, instr_i[9:7], + 3'b101, 2'b01, instr_i[9:7], {OPCODE_OP_IMM}}; + if (instr_i[12] == 1'b1) illegal_instr_o = 1'b1; + end + + 2'b10: begin + // c.andi -> andi rd, rd, imm + instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], + 3'b111, 2'b01, instr_i[9:7], {OPCODE_OP_IMM}}; + end + + 2'b11: begin + unique case ({instr_i[12], instr_i[6:5]}) + 3'b000: begin + // c.sub -> sub rd', rd', rs2' + instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], + 3'b000, 2'b01, instr_i[9:7], {OPCODE_OP}}; + end + + 3'b001: begin + // c.xor -> xor rd', rd', rs2' + instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b100, + 2'b01, instr_i[9:7], {OPCODE_OP}}; + end + + 3'b010: begin + // c.or -> or rd', rd', rs2' + instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b110, + 2'b01, instr_i[9:7], {OPCODE_OP}}; + end + + 3'b011: begin + // c.and -> and rd', rd', rs2' + instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b111, + 2'b01, instr_i[9:7], {OPCODE_OP}}; + end + + 3'b100, + 3'b101, + 3'b110, + 3'b111: begin + // 100: c.subw + // 101: c.addw + illegal_instr_o = 1'b1; + end + + default: begin + illegal_instr_o = 1'b1; + end + endcase + end + + default: begin + illegal_instr_o = 1'b1; + end + endcase + end + + 3'b110, 3'b111: begin + // 0: c.beqz -> beq rs1', x0, imm + // 1: c.bnez -> bne rs1', x0, imm + instr_o = {{4 {instr_i[12]}}, instr_i[6:5], instr_i[2], 5'b0, 2'b01, + instr_i[9:7], 2'b00, instr_i[13], instr_i[11:10], instr_i[4:3], + instr_i[12], {OPCODE_BRANCH}}; + end + + default: begin + illegal_instr_o = 1'b1; + end + endcase + end + + // C2 + // + // Register address checks for RV32E are performed in the regular instruction decoder. + // If this check fails, an illegal instruction exception is triggered and the controller + // writes the actual faulting instruction to mtval. + 2'b10: begin + unique case (instr_i[15:13]) + 3'b000: begin + // c.slli -> slli rd, rd, shamt + // (c.ssli hints are translated into a slli hint) + instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], {OPCODE_OP_IMM}}; + if (instr_i[12] == 1'b1) illegal_instr_o = 1'b1; // reserved for custom extensions + end + + 3'b010: begin + // c.lwsp -> lw rd, imm(x2) + instr_o = {4'b0, instr_i[3:2], instr_i[12], instr_i[6:4], 2'b00, 5'h02, + 3'b010, instr_i[11:7], OPCODE_LOAD}; + if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; + end + + 3'b011: begin + if (CHERIoTEn & cheri_pmode_i) begin + // c.clcsp -> clc cd, imm(c2), reused c.ldsp + instr_o = {3'b0, instr_i[4:2], instr_i[12], instr_i[6:5], 3'b000, 5'h02, + 3'b011, instr_i[11:7], OPCODE_LOAD}; + if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; + end else begin + instr_o = instr_i; + illegal_instr_o = 1'b1; + end + end + + 3'b100: begin + if (instr_i[12] == 1'b0) begin + if (instr_i[6:2] != 5'b0) begin + // c.mv -> add rd/rs1, x0, rs2 + // (c.mv hints are translated into an add hint) + instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], {OPCODE_OP}}; + end else begin + // c.jr -> jalr x0, rd/rs1, 0 + instr_o = {12'b0, instr_i[11:7], 3'b0, 5'b0, {OPCODE_JALR}}; + if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; + end + end else begin + if (instr_i[6:2] != 5'b0) begin + // c.add -> add rd, rd, rs2 + // (c.add hints are translated into an add hint) + instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], {OPCODE_OP}}; + end else begin + if (instr_i[11:7] == 5'b0) begin + // c.ebreak -> ebreak + instr_o = {32'h00_10_00_73}; + end else begin + // c.jalr -> jalr x1, rs1, 0 + instr_o = {12'b0, instr_i[11:7], 3'b000, 5'b00001, {OPCODE_JALR}}; + end + end + end + end + + 3'b110: begin + // c.swsp -> sw rs2, imm(x2) + instr_o = {4'b0, instr_i[8:7], instr_i[12], instr_i[6:2], 5'h02, 3'b010, + instr_i[11:9], 2'b00, {OPCODE_STORE}}; + end + + 3'b111: begin + if (CHERIoTEn & cheri_pmode_i) begin + // c.cscsp -> csc cs2, imm(c2), reuse c.sdsp + instr_o = {3'b0, instr_i[9:7], instr_i[12], instr_i[6:2], 5'h02, 3'b011, + instr_i[11:10], 3'b000, {OPCODE_STORE}}; + end else begin + instr_o = instr_i; + illegal_instr_o = 1'b1; + end + end + + + 3'b001, + 3'b101: begin + illegal_instr_o = 1'b1; + end + + default: begin + illegal_instr_o = 1'b1; + end + endcase + end + + // Incoming instruction is not compressed. + 2'b11:; + + default: begin + illegal_instr_o = 1'b1; + end + endcase + end + + assign is_compressed_o = (instr_i[1:0] != 2'b11); + + //////////////// + // Assertions // + //////////////// + + // The valid_i signal used to gate below assertions must be known. + `ASSERT_KNOWN(IbexInstrValidKnown, valid_i) + + // Selectors must be known/valid. + `ASSERT(IbexInstrLSBsKnown, valid_i |-> + !$isunknown(instr_i[1:0])) + `ASSERT(IbexC0Known1, (valid_i && (instr_i[1:0] == 2'b00)) |-> + !$isunknown(instr_i[15:13])) + `ASSERT(IbexC1Known1, (valid_i && (instr_i[1:0] == 2'b01)) |-> + !$isunknown(instr_i[15:13])) + `ASSERT(IbexC1Known2, (valid_i && (instr_i[1:0] == 2'b01) && (instr_i[15:13] == 3'b100)) |-> + !$isunknown(instr_i[11:10])) + `ASSERT(IbexC1Known3, (valid_i && + (instr_i[1:0] == 2'b01) && (instr_i[15:13] == 3'b100) && (instr_i[11:10] == 2'b11)) |-> + !$isunknown({instr_i[12], instr_i[6:5]})) + `ASSERT(IbexC2Known1, (valid_i && (instr_i[1:0] == 2'b10)) |-> + !$isunknown(instr_i[15:13])) + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_controller.sv b/hw/ip/cheriot-ibex/rtl/cheriot_controller.sv new file mode 100644 index 0000000..6e2109e --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_controller.sv
@@ -0,0 +1,962 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Main controller of the processor + */ + +`include "prim_assert.sv" +`include "dv_fcov_macros.svh" + +module cheriot_controller #( + parameter bit CHERIoTEn = 1'b1, + parameter bit WritebackStage = 0, + parameter bit BranchPredictor = 0 + ) ( + input logic clk_i, + input logic rst_ni, + input logic cheri_pmode_i, + + output logic ctrl_busy_o, // core is busy processing instrs + + // decoder related signals + input logic illegal_insn_i, // decoder has an invalid instr + input logic ecall_insn_i, // decoder has ECALL instr + input logic mret_insn_i, // decoder has MRET instr + input logic dret_insn_i, // decoder has DRET instr + input logic wfi_insn_i, // decoder has WFI instr + input logic ebrk_insn_i, // decoder has EBREAK instr + input logic csr_pipe_flush_i, // do CSR-related pipeline flush + input logic csr_access_i, // decoder has CSR access instr + input logic csr_cheri_always_ok_i, // cheri safe-listed CSR registers + + // instr from IF-ID pipeline stage + input logic instr_valid_i, // instr is valid + input logic [31:0] instr_i, // uncompressed instr data for mtval + input logic [15:0] instr_compressed_i, // instr compressed data for mtval + input logic instr_is_compressed_i, // instr is compressed + input logic instr_bp_taken_i, // instr was predicted taken branch + input logic instr_fetch_err_i, // instr has error + input logic instr_fetch_err_plus2_i, // instr error is x32 + input logic instr_fetch_cheri_acc_vio_i, + input logic instr_fetch_cheri_bound_vio_i, + + input logic [31:0] pc_id_i, // instr address + + // to IF-ID pipeline stage + output logic instr_valid_clear_o, // kill instr in IF-ID reg + output logic id_in_ready_o, // ID stage is ready for new instr + output logic controller_run_o, // Controller is in standard instruction + // run mode + + // to prefetcher + output logic instr_req_o, // start fetching instructions + output logic pc_set_o, // jump to address set by pc_mux + output cheriot_pkg::pc_sel_e pc_mux_o, // IF stage fetch address selector + // (boot, normal, exception...) + output logic nt_branch_mispredict_o, // Not-taken branch in ID/EX was + // mispredicted (predicted taken) + output cheriot_pkg::exc_pc_sel_e exc_pc_mux_o, // IF stage selector for exception PC + output cheriot_pkg::exc_cause_e exc_cause_o, // for IF stage, CSRs + + // LSU + input logic [31:0] lsu_addr_last_i, // for mtval + input logic load_err_i, + input logic store_err_i, + input logic lsu_err_is_cheri_i, + output logic wb_exception_o, // Instruction in WB taking an exception + output logic id_exception_o, // Instruction in ID taking an exception + output logic id_exception_nc_o, // no-cheri + + // jump/branch signals + input logic branch_set_i, // branch set signal (branch definitely + // taken) + input logic branch_not_set_i, // branch is definitely not taken + input logic jump_set_i, // jump taken set signal + + // interrupt signals + input logic csr_mstatus_mie_i, // M-mode interrupt enable bit + input logic irq_pending_i, // interrupt request pending + input cheriot_pkg::irqs_t irqs_i, // interrupt requests qualified with + // mie CSR + input logic irq_nm_i, // non-maskeable interrupt + output logic nmi_mode_o, // core executing NMI handler + + // debug signals + input logic debug_req_i, + output cheriot_pkg::dbg_cause_e debug_cause_o, + output logic debug_csr_save_o, + output logic debug_mode_o, + input logic debug_single_step_i, + input logic debug_ebreakm_i, + input logic debug_ebreaku_i, + input logic trigger_match_i, + + output logic csr_save_if_o, + output logic csr_save_id_o, + output logic csr_save_wb_o, + output logic csr_restore_mret_id_o, + output logic csr_restore_dret_id_o, + output logic csr_save_cause_o, + output logic csr_mepcc_clrtag_o, + + output logic [31:0] csr_mtval_o, + input cheriot_pkg::priv_lvl_e priv_mode_i, + input logic csr_mstatus_tw_i, + input logic csr_pcc_perm_sr_i, + + // stall & flush signals + input logic stall_id_i, + input logic stall_wb_i, + output logic flush_id_o, + input logic ready_wb_i, + + // performance monitors + output logic perf_jump_o, // we are executing a jump + // instruction (j, jr, jal, jalr) + output logic perf_tbranch_o, // we are executing a taken branch + // instruction + input logic instr_is_cheri_i, // from decoder + input logic cheri_ex_valid_i, // from cheri EX + input logic cheri_ex_err_i, + input logic cheri_wb_err_i, + input logic [11:0] cheri_ex_err_info_i, + input logic [15:0] cheri_wb_err_info_i, + input logic cheri_branch_req_i, + input logic [31:0] cheri_branch_target_i +); + import cheriot_pkg::*; + + // FSM state encoding + //typedef enum logic [3:0] { + // RESET, BOOT_SET, WAIT_SLEEP, SLEEP, FIRST_FETCH, DECODE, FLUSH, + // IRQ_TAKEN, DBG_TAKEN_IF, DBG_TAKEN_ID + //} ctrl_fsm_e; + + ctrl_fsm_e ctrl_fsm_cs, ctrl_fsm_ns; + + logic nmi_mode_q, nmi_mode_d; + logic debug_mode_q, debug_mode_d; + logic load_err_q, load_err_d; + logic store_err_q, store_err_d; + logic lsu_err_is_cheri_q; + logic exc_req_q, exc_req_d, exc_req_nc, exc_req_wb; + logic illegal_insn_q, illegal_insn_d; + logic cheri_ex_err_q, cheri_ex_err_d; + logic cheri_wb_err_q; + logic cheri_asr_err_q, cheri_asr_err_d; + + // Of the various exception/fault signals, which one takes priority in FLUSH and hence controls + // what happens next (setting exc_cause, csr_mtval etc) + logic instr_fetch_err_prio; + logic illegal_insn_prio; + logic ecall_insn_prio; + logic ebrk_insn_prio; + logic store_err_prio; + logic load_err_prio; + logic cheri_ex_err_prio; + logic cheri_wb_err_prio; + logic cheri_asr_err_prio; + + logic stall; + logic halt_if; + logic retain_id; + logic flush_id; + logic illegal_dret; + logic illegal_umode; + logic exc_req_lsu; + logic special_req; + logic special_req_pc_change; + logic special_req_flush_only; + logic do_single_step_d; + logic do_single_step_q; + logic enter_debug_mode_prio_d; + logic enter_debug_mode_prio_q; + logic enter_debug_mode; + logic ebreak_into_debug; + logic handle_irq; + logic id_wb_pending; + + logic [3:0] mfip_id; + logic unused_irq_timer; + + logic ecall_insn; + logic mret_insn; + logic dret_insn; + logic wfi_insn; + logic ebrk_insn; + logic csr_pipe_flush; + logic instr_fetch_err; + logic cheri_ex_err; + logic mret_cheri_asr_err; + logic csr_cheri_asr_err; + +`ifndef SYNTHESIS +`ifndef DII_SIM + // synopsys translate_off + // make sure we are called later so that we do not generate messages for + // glitches + always_ff @(negedge clk_i) begin + // print warning in case of decoding errors + if ((ctrl_fsm_cs == DECODE) && instr_valid_i && !instr_fetch_err_i && !wb_exception_o && illegal_insn_d) begin + $display("%t: Illegal instruction (hart %0x) at PC 0x%h: 0x%h", $time, cheriot_core.hart_id_i, + cheriot_id_stage.pc_id_i, + (instr_is_compressed_i ? instr_compressed_i : instr_i)); + // cheriot_id_stage.instr_rdata_i); + end + end + // synopsys translate_on +`endif +`endif + + //////////////// + // Exceptions // + //////////////// + + assign load_err_d = load_err_i; + assign store_err_d = store_err_i; + + // Decoder doesn't take instr_valid into account, factor it in here. + assign ecall_insn = ecall_insn_i & instr_valid_i; + assign mret_insn = mret_insn_i & instr_valid_i; + assign dret_insn = dret_insn_i & instr_valid_i; + assign wfi_insn = wfi_insn_i & instr_valid_i; + assign ebrk_insn = ebrk_insn_i & instr_valid_i; + assign csr_pipe_flush = csr_pipe_flush_i & instr_valid_i; + assign instr_fetch_err = instr_fetch_err_i & instr_valid_i; + assign cheri_ex_err = cheri_ex_err_i & instr_is_cheri_i & instr_valid_i; + + // "Executing DRET outside of Debug Mode causes an illegal instruction exception." + // [Debug Spec v0.13.2, p.41] + assign illegal_dret = dret_insn & ~debug_mode_q; + + // Some instructions can only be executed in M-Mode + assign illegal_umode = (priv_mode_i != PRIV_LVL_M) & + // MRET must be in M-Mode. TW means trap WFI to M-Mode. + (mret_insn | (csr_mstatus_tw_i & wfi_insn)); + + assign mret_cheri_asr_err = CHERIoTEn & cheri_pmode_i & ~csr_pcc_perm_sr_i & mret_insn; + assign csr_cheri_asr_err = CHERIoTEn & cheri_pmode_i & ~csr_pcc_perm_sr_i & instr_valid_i & + csr_access_i & ~illegal_insn_i & ~csr_cheri_always_ok_i; + + // This is recorded in the illegal_insn_q flop to help timing. Specifically + // it is needed to break the path from cheriot_cs_registers/illegal_csr_insn_o + // to pc_set_o. Clear when controller is in FLUSH so it won't remain set + // once illegal instruction is handled. + // All terms in this expression are qualified by instr_valid_i + assign illegal_insn_d = illegal_insn_i | illegal_dret | illegal_umode; + assign cheri_ex_err_d = cheri_pmode_i & cheri_ex_err & (ctrl_fsm_cs != FLUSH); + + assign cheri_asr_err_d = (~illegal_insn_i & csr_cheri_asr_err) | mret_cheri_asr_err; + + // exception requests + // requests are flopped in exc_req_q. This is cleared when controller is in + // the FLUSH state so the cycle following exc_req_q won't remain set for an + // exception request that has just been handled. + // All terms in this expression are qualified by instr_valid_i + assign exc_req_d = (ecall_insn | ebrk_insn | illegal_insn_d | instr_fetch_err | (cheri_pmode_i & cheri_ex_err) | + cheri_asr_err_d) & (ctrl_fsm_cs != FLUSH); + assign exc_req_nc = (ecall_insn | ebrk_insn | illegal_insn_d | instr_fetch_err | cheri_asr_err_d) & + (ctrl_fsm_cs != FLUSH); + + // LSU exception requests + assign exc_req_lsu = store_err_i | load_err_i; + assign exc_req_wb = exc_req_lsu | (cheri_pmode_i & cheri_wb_err_i); + + assign id_exception_o = exc_req_d; + assign id_exception_nc_o = exc_req_nc; + + // special requests: special instructions, pipeline flushes, exceptions... + // All terms in these expressions are qualified by instr_valid_i except exc_req_lsu which can come + // from the Writeback stage with no instr_valid_i from the ID stage + + // These special requests only cause a pipeline flush and in particular don't cause a PC change + // that is outside the normal execution flow + assign special_req_flush_only = wfi_insn | csr_pipe_flush; + + // These special requests cause a change in PC + assign special_req_pc_change = mret_insn | dret_insn | exc_req_d | exc_req_wb; + + // generic special request signal, applies to all instructions + assign special_req = special_req_pc_change | special_req_flush_only; + + // Is there an instruction in ID or WB that has yet to complete? + assign id_wb_pending = instr_valid_i | ~ready_wb_i; + + // Exception/fault prioritisation is taken from Table 3.7 of Priviledged Spec v1.11 + if (WritebackStage) begin : g_wb_exceptions + always_comb begin + instr_fetch_err_prio = 0; + illegal_insn_prio = 0; + ecall_insn_prio = 0; + ebrk_insn_prio = 0; + store_err_prio = 0; + load_err_prio = 0; + cheri_ex_err_prio = 0; + cheri_wb_err_prio = 0; + cheri_asr_err_prio = 0; + + // Note that with the writeback stage store/load errors occur on the instruction in writeback, + // all other exception/faults occur on the instruction in ID/EX. The faults from writeback + // must take priority as that instruction is architecurally ordered before the one in ID/EX. + if (store_err_q) begin + store_err_prio = 1'b1; + end else if (load_err_q) begin + load_err_prio = 1'b1; + end else if (cheri_pmode_i & cheri_wb_err_q) begin + cheri_wb_err_prio = 1'b1; + end else if (instr_fetch_err) begin + instr_fetch_err_prio = 1'b1; + end else if (illegal_insn_q) begin + illegal_insn_prio = 1'b1; + end else if (ecall_insn) begin + ecall_insn_prio = 1'b1; + end else if (ebrk_insn) begin + ebrk_insn_prio = 1'b1; + end else if (cheri_pmode_i & cheri_ex_err_q) begin + cheri_ex_err_prio = 1'b1; + end else if (cheri_asr_err_q) begin + cheri_asr_err_prio = 1'b1; + end + end + + // Instruction in writeback is generating an exception so instruction in ID must not execute + assign wb_exception_o = load_err_q | store_err_q | load_err_i | store_err_i | (cheri_pmode_i & cheri_wb_err_i); + end else begin : g_no_wb_exceptions + always_comb begin + instr_fetch_err_prio = 0; + illegal_insn_prio = 0; + ecall_insn_prio = 0; + ebrk_insn_prio = 0; + store_err_prio = 0; + load_err_prio = 0; + cheri_wb_err_prio = 0; + cheri_ex_err_prio = 0; + cheri_asr_err_prio = 0; + + if (instr_fetch_err) begin + instr_fetch_err_prio = 1'b1; + end else if (illegal_insn_q) begin + illegal_insn_prio = 1'b1; + end else if (ecall_insn) begin + ecall_insn_prio = 1'b1; + end else if (ebrk_insn) begin + ebrk_insn_prio = 1'b1; + end else if (cheri_ex_err_q) begin + cheri_ex_err_prio = 1'b1; + end else if (store_err_q) begin + store_err_prio = 1'b1; + end else if (load_err_q) begin + load_err_prio = 1'b1; + end else if (cheri_wb_err_q) begin + cheri_wb_err_prio = 1'b1; + end else if (cheri_asr_err_q) begin + cheri_asr_err_prio = 1'b1; + end + end + assign wb_exception_o = 1'b0; + end + + `ASSERT_IF(IbexExceptionPrioOnehot, + $onehot({instr_fetch_err_prio, + illegal_insn_prio, + ecall_insn_prio, + ebrk_insn_prio, + store_err_prio, + load_err_prio, + cheri_wb_err_prio, + cheri_ex_err_prio, + cheri_asr_err_prio}), + (ctrl_fsm_cs == FLUSH) & csr_save_cause_o) + + //////////////// + // Interrupts // + //////////////// + + // Enter debug mode due to an external debug_req_i or because the core is in + // single step mode (dcsr.step == 1). Single step must be qualified with + // instruction valid otherwise the core will immediately enter debug mode + // due to a recently flushed IF (or a delay in an instruction returning from + // memory) before it has had anything to single step. + // Also enter debug mode on a trigger match (hardware breakpoint) + + // Set `do_single_step_q` when a valid instruction is seen outside of debug mode and core is in + // single step mode. The first valid instruction on debug mode entry will clear it. Hold its value + // when there is no valid instruction so `do_single_step_d` remains asserted until debug mode is + // entered. + assign do_single_step_d = instr_valid_i ? ~debug_mode_q & debug_single_step_i : do_single_step_q; + // Enter debug mode due to: + // * external `debug_req_i` + // * core in single step mode (dcsr.step == 1). + // * trigger match (hardware breakpoint) + // + // `debug_req_i` and `do_single_step_d` request debug mode with priority. This results in a debug + // mode entry even if the controller goes to `FLUSH` in preparation for handling an exception or + // interrupt. `trigger_match_i` is not a priority entry into debug mode as it must be ignored + // where control flow changes such that the instruction causing the trigger is no longer being + // executed. + assign enter_debug_mode_prio_d = (debug_req_i | do_single_step_d) & ~debug_mode_q; + assign enter_debug_mode = enter_debug_mode_prio_d | (trigger_match_i & ~debug_mode_q); + + // Set when an ebreak should enter debug mode rather than jump to exception + // handler + assign ebreak_into_debug = priv_mode_i == PRIV_LVL_M ? debug_ebreakm_i : + priv_mode_i == PRIV_LVL_U ? debug_ebreaku_i : + 1'b0; + + // Interrupts including NMI are ignored, + // - while in debug mode [Debug Spec v0.13.2, p.39], + // - while in NMI mode (nested NMIs are not supported, NMI has highest priority and + // cannot be interrupted by regular interrupts). + assign handle_irq = ~debug_mode_q & ~nmi_mode_q & + (irq_nm_i | (irq_pending_i & csr_mstatus_mie_i)); + + // generate ID of fast interrupts, highest priority to lowest ID + always_comb begin : gen_mfip_id + mfip_id = 4'd0; + + for (int i = 14; i >= 0; i--) begin + if (irqs_i.irq_fast[i]) begin + mfip_id = i[3:0]; + end + end + end + + assign unused_irq_timer = irqs_i.irq_timer; + + ///////////////////// + // Core controller // + ///////////////////// + + always_comb begin + // Default values + instr_req_o = 1'b1; + + csr_save_if_o = 1'b0; + csr_save_id_o = 1'b0; + csr_save_wb_o = 1'b0; + csr_restore_mret_id_o = 1'b0; + csr_restore_dret_id_o = 1'b0; + csr_save_cause_o = 1'b0; + csr_mepcc_clrtag_o = 1'b0; + csr_mtval_o = '0; + + // The values of pc_mux and exc_pc_mux are only relevant if pc_set is set. Some of the states + // below always set pc_mux and exc_pc_mux but only set pc_set if certain conditions are met. + // This avoid having to factor those conditions into the pc_mux and exc_pc_mux select signals + // helping timing. + pc_mux_o = PC_BOOT; + pc_set_o = 1'b0; + nt_branch_mispredict_o = 1'b0; + + exc_pc_mux_o = EXC_PC_IRQ; + exc_cause_o = EXC_CAUSE_INSN_ADDR_MISA; // = 6'h00 + + ctrl_fsm_ns = ctrl_fsm_cs; + + ctrl_busy_o = 1'b1; + + halt_if = 1'b0; + retain_id = 1'b0; + flush_id = 1'b0; + + debug_csr_save_o = 1'b0; + debug_cause_o = DBG_CAUSE_EBREAK; + debug_mode_d = debug_mode_q; + nmi_mode_d = nmi_mode_q; + + perf_tbranch_o = 1'b0; + perf_jump_o = 1'b0; + + controller_run_o = 1'b0; + + unique case (ctrl_fsm_cs) + RESET: begin + instr_req_o = 1'b0; + pc_mux_o = PC_BOOT; + pc_set_o = 1'b1; + ctrl_fsm_ns = BOOT_SET; + end + + BOOT_SET: begin + // copy boot address to instr fetch address + instr_req_o = 1'b1; + pc_mux_o = PC_BOOT; + pc_set_o = 1'b1; + + ctrl_fsm_ns = FIRST_FETCH; + end + + WAIT_SLEEP: begin + ctrl_busy_o = 1'b0; + instr_req_o = 1'b0; + halt_if = 1'b1; + flush_id = 1'b1; + ctrl_fsm_ns = SLEEP; + end + + SLEEP: begin + // instruction in IF stage is already valid + // we begin execution when an interrupt has arrived + instr_req_o = 1'b0; + halt_if = 1'b1; + flush_id = 1'b1; + + // normal execution flow + // in debug mode or single step mode we leave immediately (wfi=nop) + if (irq_nm_i || irq_pending_i || debug_req_i || debug_mode_q || debug_single_step_i) begin + ctrl_fsm_ns = FIRST_FETCH; + end else begin + // Make sure clock remains disabled. + ctrl_busy_o = 1'b0; + end + end + + FIRST_FETCH: begin + // Stall because of IF miss + if (id_in_ready_o) begin + ctrl_fsm_ns = DECODE; + end + + // handle interrupts + if (handle_irq) begin + // We are handling an interrupt. Set halt_if to tell IF not to give + // us any more instructions before it redirects to the handler, but + // don't set flush_id: we must allow this instruction to complete + // (since it might have outstanding loads or stores). + ctrl_fsm_ns = IRQ_TAKEN; + halt_if = 1'b1; + end + + // enter debug mode + if (enter_debug_mode) begin + ctrl_fsm_ns = DBG_TAKEN_IF; + // Halt IF only for now, ID will be flushed in DBG_TAKEN_IF as the + // ID state is needed for correct debug mode entry + halt_if = 1'b1; + end + end + + DECODE: begin + // normal operating mode of the ID stage, in case of debug and interrupt requests, + // priorities are as follows (lower number == higher priority) + // 1. currently running (multicycle) instructions and exceptions caused by these + // 2. debug requests + // 3. interrupt requests + + controller_run_o = 1'b1; + + // Set PC mux for branch and jump here to ease timing. Value is only relevant if pc_set_o is + // also set. Setting the mux value here avoids factoring in special_req and instr_valid_i + // which helps timing. + pc_mux_o = PC_JUMP; + + + // Get ready for special instructions, exceptions, pipeline flushes + if (special_req) begin + // Halt IF but don't flush ID. This leaves a valid instruction in + // ID so controller can determine appropriate action in the + // FLUSH state. + retain_id = 1'b1; + + // Wait for the writeback stage to either be ready for a new instruction or raise its own + // exception before going to FLUSH. If the instruction in writeback raises an exception it + // must take priority over any exception from an instruction in ID/EX. Only once the + // writeback stage is ready can we be certain that won't happen. Without a writeback + // stage ready_wb_i == 1 so the FSM will always go directly to FLUSH. + + if (ready_wb_i | wb_exception_o) begin + ctrl_fsm_ns = FLUSH; + end + end + + if (branch_set_i || jump_set_i || (cheri_pmode_i & cheri_branch_req_i)) begin + // Only set the PC if the branch predictor hasn't already done the branch for us + pc_set_o = BranchPredictor ? ~instr_bp_taken_i : 1'b1; + + perf_tbranch_o = branch_set_i; + perf_jump_o = jump_set_i; + end + + if (BranchPredictor) begin + if (instr_bp_taken_i & branch_not_set_i) begin + // If the instruction is a branch that was predicted to be taken but was not taken + // signal a mispredict. + nt_branch_mispredict_o = 1'b1; + end + end + + // If entering debug mode or handling an IRQ the core needs to wait until any instruction in + // ID or WB has finished executing. Stall IF during that time. + if ((enter_debug_mode || handle_irq) && (stall || id_wb_pending)) begin + halt_if = 1'b1; + end + + if (!stall && !special_req && !id_wb_pending) begin + if (enter_debug_mode) begin + // enter debug mode + ctrl_fsm_ns = DBG_TAKEN_IF; + // Halt IF only for now, ID will be flushed in DBG_TAKEN_IF as the + // ID state is needed for correct debug mode entry + halt_if = 1'b1; + end else if (handle_irq) begin + // handle interrupt (not in debug mode) + ctrl_fsm_ns = IRQ_TAKEN; + // We are handling an interrupt (not in debug mode). Set halt_if to + // tell IF not to give us any more instructions before it redirects + // to the handler, but don't set flush_id: we must allow this + // instruction to complete (since it might have outstanding loads + // or stores). + halt_if = 1'b1; + end + end + + end // DECODE + + IRQ_TAKEN: begin + pc_mux_o = PC_EXC; + exc_pc_mux_o = EXC_PC_IRQ; + + if (handle_irq) begin + pc_set_o = 1'b1; + + csr_save_if_o = 1'b1; + csr_save_cause_o = 1'b1; + + // interrupt priorities according to Privileged Spec v1.11 p.31 + if (irq_nm_i && !nmi_mode_q) begin + exc_cause_o = EXC_CAUSE_IRQ_NM; + nmi_mode_d = 1'b1; // enter NMI mode + end else if (irqs_i.irq_fast != 15'b0) begin + // generate exception cause ID from fast interrupt ID: + // - first bit distinguishes interrupts from exceptions, + // - second bit adds 16 to fast interrupt ID + // for example EXC_CAUSE_IRQ_FAST_0 = {1'b1, 5'd16} + exc_cause_o = exc_cause_e'({2'b11, mfip_id}); + end else if (irqs_i.irq_external) begin + exc_cause_o = EXC_CAUSE_IRQ_EXTERNAL_M; + end else if (irqs_i.irq_software) begin + exc_cause_o = EXC_CAUSE_IRQ_SOFTWARE_M; + end else begin // irqs_i.irq_timer + exc_cause_o = EXC_CAUSE_IRQ_TIMER_M; + end + end + + ctrl_fsm_ns = DECODE; + end + + DBG_TAKEN_IF: begin + pc_mux_o = PC_EXC; + exc_pc_mux_o = EXC_PC_DBD; + + // enter debug mode and save PC in IF to dpc + // jump to debug exception handler in debug memory + flush_id = 1'b1; + pc_set_o = 1'b1; + + csr_save_if_o = 1'b1; + debug_csr_save_o = 1'b1; + + csr_save_cause_o = 1'b1; + if (trigger_match_i) begin + debug_cause_o = DBG_CAUSE_TRIGGER; + end else if (debug_single_step_i) begin + debug_cause_o = DBG_CAUSE_STEP; + end else begin + debug_cause_o = DBG_CAUSE_HALTREQ; + end + + // enter debug mode + debug_mode_d = 1'b1; + + ctrl_fsm_ns = DECODE; + end + + DBG_TAKEN_ID: begin + // enter debug mode and save PC in ID to dpc, used when encountering + // 1. EBREAK during debug mode + // 2. EBREAK with forced entry into debug mode (ebreakm or ebreaku set). + // regular ebreak's go through FLUSH. + // + // for 1. do not update dcsr and dpc, for 2. do so [Debug Spec v0.13.2, p.39] + // jump to debug exception handler in debug memory + flush_id = 1'b1; + pc_mux_o = PC_EXC; + pc_set_o = 1'b1; + exc_pc_mux_o = EXC_PC_DBD; + + // update dcsr and dpc + if (ebreak_into_debug && !debug_mode_q) begin // ebreak with forced entry + + // dpc (set to the address of the EBREAK, i.e. set to PC in ID stage) + csr_save_cause_o = 1'b1; + csr_save_id_o = 1'b1; + + // dcsr + debug_csr_save_o = 1'b1; + debug_cause_o = DBG_CAUSE_EBREAK; + end + + // enter debug mode + debug_mode_d = 1'b1; + + ctrl_fsm_ns = DECODE; + end + + FLUSH: begin + // flush the pipeline + halt_if = 1'b1; + flush_id = 1'b1; + ctrl_fsm_ns = DECODE; + + // As pc_mux and exc_pc_mux can take various values in this state they aren't set early + // here. + + // exceptions: set exception PC, save PC and exception cause + // exc_req_lsu is high for one clock cycle only (in DECODE) + if (exc_req_q || store_err_q || load_err_q || (cheri_pmode_i & cheri_wb_err_q)) begin + pc_set_o = 1'b1; + pc_mux_o = PC_EXC; + exc_pc_mux_o = debug_mode_q ? EXC_PC_DBG_EXC : EXC_PC_EXC; + + if (WritebackStage) begin : g_writeback_mepc_save + // With the writeback stage present whether an instruction accessing memory will cause + // an exception is only known when it is in writeback. So when taking such an exception + // epc must come from writeback. + csr_save_id_o = ~(store_err_q | load_err_q | (cheri_pmode_i & cheri_wb_err_q)); + csr_save_wb_o = store_err_q | load_err_q | (cheri_pmode_i & cheri_wb_err_q); + end else begin : g_no_writeback_mepc_save + csr_save_id_o = 1'b0; + end + + csr_save_cause_o = 1'b1; + + // Exception/fault prioritisation logic will have set exactly 1 X_prio signal + unique case (1'b1) + instr_fetch_err_prio: begin + if (instr_fetch_cheri_acc_vio_i) begin // tag violation + exc_cause_o = EXC_CAUSE_CHERI_FAULT; + csr_mtval_o = {21'h0, 1'b1, 5'h0, 5'h2}; // s=1, cap_idx=0 + end else if (instr_fetch_cheri_bound_vio_i) begin // bound violation + exc_cause_o = EXC_CAUSE_CHERI_FAULT; + csr_mtval_o = {21'h0, 1'b1, 5'h0, 5'h1}; // s=1, cap_idx=0 + csr_mepcc_clrtag_o = 1'b1; + end else begin // ext memory error + exc_cause_o = EXC_CAUSE_INSTR_ACCESS_FAULT; + csr_mtval_o = instr_fetch_err_plus2_i ? (pc_id_i + 32'd2) : pc_id_i; + end + end + illegal_insn_prio: begin + exc_cause_o = EXC_CAUSE_ILLEGAL_INSN; + csr_mtval_o = (CHERIoTEn & cheri_pmode_i) ? 32'h0 : + (instr_is_compressed_i ? {16'b0, instr_compressed_i} : instr_i); + end + ecall_insn_prio: begin + exc_cause_o = (priv_mode_i == PRIV_LVL_M) ? EXC_CAUSE_ECALL_MMODE : + EXC_CAUSE_ECALL_UMODE; + end + ebrk_insn_prio: begin + if (debug_mode_q | ebreak_into_debug) begin + /* + * EBREAK in debug mode re-enters debug mode + * + * "The only exception is EBREAK. When that is executed in Debug + * Mode, it halts the hart again but without updating dpc or + * dcsr." [Debug Spec v0.13.2, p.39] + */ + + /* + * dcsr.ebreakm == 1: + * "EBREAK instructions in M-mode enter Debug Mode." + * [Debug Spec v0.13.2, p.42] + */ + pc_set_o = 1'b0; + csr_save_id_o = 1'b0; + csr_save_cause_o = 1'b0; + ctrl_fsm_ns = DBG_TAKEN_ID; + flush_id = 1'b0; + end else begin + /* + * "The EBREAK instruction is used by debuggers to cause control + * to be transferred back to a debugging environment. It + * generates a breakpoint exception and performs no other + * operation. [...] ECALL and EBREAK cause the receiving + * privilege mode's epc register to be set to the address of the + * ECALL or EBREAK instruction itself, not the address of the + * following instruction." [Privileged Spec v1.11, p.40] + */ + exc_cause_o = EXC_CAUSE_BREAKPOINT; + if (CHERIoTEn && cheri_pmode_i) csr_mtval_o = pc_id_i; // kliu added to match sail + end + end + store_err_prio: begin + if (cheri_pmode_i & lsu_err_is_cheri_q) begin + if (cheri_wb_err_info_i[11]) begin + exc_cause_o = EXC_CAUSE_STORE_ADDR_MISALIGN; + csr_mtval_o = lsu_addr_last_i; + end else begin + exc_cause_o = EXC_CAUSE_CHERI_FAULT; + csr_mtval_o = {21'h0, cheri_wb_err_info_i[10:0]}; + end + end else begin + exc_cause_o = EXC_CAUSE_STORE_ACCESS_FAULT; + csr_mtval_o = lsu_addr_last_i; + end + end + load_err_prio: begin + if (cheri_pmode_i & lsu_err_is_cheri_q) begin + if (cheri_wb_err_info_i[11]) begin + exc_cause_o = EXC_CAUSE_LOAD_ADDR_MISALIGN; + csr_mtval_o = lsu_addr_last_i; + end else begin + exc_cause_o = EXC_CAUSE_CHERI_FAULT; + csr_mtval_o = {21'h0, cheri_wb_err_info_i[10:0]}; + end + end else begin + exc_cause_o = EXC_CAUSE_LOAD_ACCESS_FAULT; + csr_mtval_o = lsu_addr_last_i; + end + end + cheri_ex_err_prio: begin + if (cheri_pmode_i) begin + exc_cause_o = EXC_CAUSE_CHERI_FAULT; + csr_mtval_o = {21'h0, cheri_ex_err_info_i[10:0]}; + end + end + cheri_wb_err_prio: begin + if (cheri_pmode_i) begin + if (cheri_wb_err_info_i[12]) begin // illegal SCR addr + exc_cause_o = EXC_CAUSE_ILLEGAL_INSN; + csr_mtval_o = {21'h0, cheri_wb_err_info_i[10:0]}; + end else begin + exc_cause_o = EXC_CAUSE_CHERI_FAULT; + csr_mtval_o = {21'h0, cheri_wb_err_info_i[10:0]}; + end + end + end + cheri_asr_err_prio: begin + exc_cause_o = EXC_CAUSE_CHERI_FAULT; + //csr_mtval_o = instr_is_compressed_i ? {16'b0, instr_compressed_i} : instr_i; + csr_mtval_o = {21'b0, 1'b1, 5'h0, 5'h18}; // S=1, cap_idx=0 (pcc), err=0x18 + end + + default: ; + endcase + end else begin + // special instructions and pipeline flushes + if (mret_insn) begin + pc_mux_o = PC_ERET; + pc_set_o = 1'b1; + csr_restore_mret_id_o = 1'b1; + if (nmi_mode_q) begin + nmi_mode_d = 1'b0; // exit NMI mode + end + end else if (dret_insn) begin + pc_mux_o = PC_DRET; + pc_set_o = 1'b1; + debug_mode_d = 1'b0; + csr_restore_dret_id_o = 1'b1; + end else if (wfi_insn) begin + ctrl_fsm_ns = WAIT_SLEEP; + end else if (csr_pipe_flush && handle_irq) begin + // start handling IRQs when doing CSR-related pipeline flushes + ctrl_fsm_ns = IRQ_TAKEN; + end + end // exc_req_q + + // Entering debug mode due to either single step or debug_req. Ensure + // registers are set for exception but then enter debug handler rather + // than exception handler [Debug Spec v0.13.2, p.44] + // Leave all other signals as is to ensure CSRs and PC get set as if + // core was entering exception handler, entry to debug mode will then + // see the appropriate state and setup dpc correctly. + // If an EBREAK instruction is causing us to enter debug mode on the + // same cycle as a debug_req or single step, honor the EBREAK and + // proceed to DBG_TAKEN_ID. + if (enter_debug_mode_prio_q && !(ebrk_insn_prio && ebreak_into_debug)) begin + ctrl_fsm_ns = DBG_TAKEN_IF; + end + end // FLUSH + + default: begin + instr_req_o = 1'b0; + ctrl_fsm_ns = RESET; + end + endcase + end + + assign flush_id_o = flush_id; + + // signal to CSR when in debug mode + assign debug_mode_o = debug_mode_q; + + // signal to CSR when in an NMI handler (for nested exception handling) + assign nmi_mode_o = nmi_mode_q; + + /////////////////// + // Stall control // + /////////////////// + + // If high current instruction cannot complete this cycle. Either because it needs more cycles to + // finish (stall_id_i) or because the writeback stage cannot accept it yet (stall_wb_i). If there + // is no writeback stage stall_wb_i is a constant 0. + assign stall = stall_id_i | stall_wb_i; + + // signal to IF stage that ID stage is ready for next instr + assign id_in_ready_o = ~stall & ~halt_if & ~retain_id; + + // kill instr in IF-ID pipeline reg that are done, or if a + // multicycle instr causes an exception for example + // retain_id is another kind of stall, where the instr_valid bit must remain + // set (unless flush_id is set also). It cannot be factored directly into + // stall as this causes a combinational loop. + assign instr_valid_clear_o = ~(stall | retain_id) | flush_id; + + // update registers + always_ff @(posedge clk_i or negedge rst_ni) begin : update_regs + if (!rst_ni) begin + ctrl_fsm_cs <= RESET; + nmi_mode_q <= 1'b0; + do_single_step_q <= 1'b0; + debug_mode_q <= 1'b0; + enter_debug_mode_prio_q <= 1'b0; + load_err_q <= 1'b0; + store_err_q <= 1'b0; + lsu_err_is_cheri_q <= 1'b0; + exc_req_q <= 1'b0; + illegal_insn_q <= 1'b0; + cheri_ex_err_q <= 1'b0; + cheri_wb_err_q <= 1'b0; + cheri_asr_err_q <= 1'b0; + end else begin + ctrl_fsm_cs <= ctrl_fsm_ns; + nmi_mode_q <= nmi_mode_d; + do_single_step_q <= do_single_step_d; + debug_mode_q <= debug_mode_d; + enter_debug_mode_prio_q <= enter_debug_mode_prio_d; + load_err_q <= load_err_d; + store_err_q <= store_err_d; + lsu_err_is_cheri_q <= lsu_err_is_cheri_i; + exc_req_q <= exc_req_d; + illegal_insn_q <= illegal_insn_d; + cheri_ex_err_q <= cheri_ex_err_d; + cheri_wb_err_q <= cheri_wb_err_i; + cheri_asr_err_q <= cheri_asr_err_d; + end + end + + `ifdef RVFI + // Workaround for internal verilator error when using hierarchical refers to calcuate this + // directly in cheriot_core + logic rvfi_flush_next; + + assign rvfi_flush_next = ctrl_fsm_ns == FLUSH; + `endif + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_core.sv b/hw/ip/cheriot-ibex/rtl/cheriot_core.sv new file mode 100644 index 0000000..7069082 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_core.sv
@@ -0,0 +1,2255 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +`ifdef RISCV_FORMAL + `define RVFI +`endif + +`include "prim_assert.sv" + +/** + * Top level module of the ibex RISC-V core + */ + +//import cheri_pkg::*; + +module cheriot_core import cheriot_pkg::*; import cheri_pkg::*; #( + parameter bit PMPEnable = 1'b0, + parameter int unsigned PMPGranularity = 0, + parameter int unsigned PMPNumRegions = 4, + parameter int unsigned MHPMCounterNum = 0, + parameter int unsigned MHPMCounterWidth = 40, + parameter bit RV32E = 1'b0, + parameter rv32m_e RV32M = RV32MFast, + parameter rv32b_e RV32B = RV32BNone, + parameter bit BranchTargetALU = 1'b0, + parameter bit WritebackStage = 1'b0, + parameter bit ICache = 1'b0, + parameter bit ICacheECC = 1'b0, + parameter int unsigned BusSizeECC = BUS_SIZE, + parameter int unsigned TagSizeECC = IC_TAG_SIZE, + parameter int unsigned LineSizeECC = IC_LINE_SIZE, + parameter bit BranchPredictor = 1'b0, + parameter bit DbgTriggerEn = 1'b0, + parameter int unsigned DbgHwBreakNum = 1, + parameter bit ResetAll = 1'b0, + parameter lfsr_seed_t RndCnstLfsrSeed = RndCnstLfsrSeedDefault, + parameter lfsr_perm_t RndCnstLfsrPerm = RndCnstLfsrPermDefault, + parameter bit SecureIbex = 1'b0, + parameter bit DummyInstructions = 1'b0, + parameter bit RegFileECC = 1'b0, + parameter int unsigned RegFileDataWidth = 32, + parameter int unsigned DmHaltAddr = 32'h1A110800, + parameter int unsigned DmExceptionAddr = 32'h1A110808, + // CHERIoT paramters + parameter bit CHERIoTEn = 1'b1, + parameter int unsigned DataWidth = 33, + parameter int unsigned HeapBase = 32'h2001_0000, + parameter int unsigned TSMapBase = 32'h2002_f000, + parameter int unsigned TSMapSize = 1024, + parameter bit MemCapFmt = 1'b0, + parameter bit CheriPPLBC = 1'b1, + parameter bit CheriSBND2 = 1'b0, + parameter bit CheriTBRE = 1'b1, + parameter bit CheriStkZ = 1'b1, + parameter int unsigned MMRegDinW = 128, + parameter int unsigned MMRegDoutW = 64, + parameter bit CheriCapIT8 = 1'b0 +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + input logic [31:0] hart_id_i, + input logic [31:0] boot_addr_i, + input logic cheri_pmode_i, + input logic cheri_tsafe_en_i, + + // Instruction memory interface + output logic instr_req_o, + input logic instr_gnt_i, + input logic instr_rvalid_i, + output logic [31:0] instr_addr_o, + input logic [31:0] instr_rdata_i, + input logic instr_err_i, + + // Data memory interface + output logic data_req_o, + output logic data_is_cap_o, + input logic data_gnt_i, + input logic data_rvalid_i, + output logic data_we_o, + output logic [3:0] data_be_o, + output logic [31:0] data_addr_o, + output logic [DataWidth-1:0] data_wdata_o, + input logic [DataWidth-1:0] data_rdata_i, + input logic data_err_i, + + // Register file interface + output logic dummy_instr_id_o, + output logic [4:0] rf_raddr_a_o, + output logic [4:0] rf_raddr_b_o, + output logic [4:0] rf_waddr_wb_o, + output logic rf_we_wb_o, + output logic [RegFileDataWidth-1:0] rf_wdata_wb_ecc_o, + output reg_cap_t rf_wcap_wb_o, + input logic [RegFileDataWidth-1:0] rf_rdata_a_ecc_i, + input logic [RegFileDataWidth-1:0] rf_rdata_b_ecc_i, + input reg_cap_t rf_rcap_a_i, + input reg_cap_t rf_rcap_b_i, + input logic [31:0] rf_reg_rdy_i, + + output logic rf_trsv_en_o, + output logic [4:0] rf_trsv_addr_o, + output logic [6:0] rf_trsv_par_o, + output logic [4:0] rf_trvk_addr_o, + output logic rf_trvk_en_o, + output logic rf_trvk_clrtag_o, + output logic [6:0] rf_trvk_par_o, + output logic tsmap_cs_o, + output logic [15:0] tsmap_addr_o, + input logic [31:0] tsmap_rdata_i, + input logic [MMRegDinW-1:0] mmreg_corein_i, + output logic [MMRegDoutW-1:0] mmreg_coreout_o, + output logic cheri_fatal_err_o, + + // RAMs interface + output logic [IC_NUM_WAYS-1:0] ic_tag_req_o, + output logic ic_tag_write_o, + output logic [IC_INDEX_W-1:0] ic_tag_addr_o, + output logic [TagSizeECC-1:0] ic_tag_wdata_o, + input logic [TagSizeECC-1:0] ic_tag_rdata_i [IC_NUM_WAYS], + output logic [IC_NUM_WAYS-1:0] ic_data_req_o, + output logic ic_data_write_o, + output logic [IC_INDEX_W-1:0] ic_data_addr_o, + output logic [LineSizeECC-1:0] ic_data_wdata_o, + input logic [LineSizeECC-1:0] ic_data_rdata_i [IC_NUM_WAYS], + input logic ic_scr_key_valid_i, + + // Interrupt inputs + input logic irq_software_i, + input logic irq_timer_i, + input logic irq_external_i, + input logic [14:0] irq_fast_i, + input logic irq_nm_i, // non-maskeable interrupt + output logic irq_pending_o, + + // Debug Interface + input logic debug_req_i, + output crash_dump_t crash_dump_o, + output logic double_fault_seen_o, + + // RISC-V Formal Interface + // Does not comply with the coding standards of _i/_o suffixes, but follows + // the convention of RISC-V Formal Interface Specification. +`ifdef RVFI + output logic rvfi_valid, + output logic [63:0] rvfi_order, + output logic [31:0] rvfi_insn, + output logic rvfi_trap, + output logic rvfi_halt, + output logic rvfi_intr, + output logic [ 1:0] rvfi_mode, + output logic [ 1:0] rvfi_ixl, + output logic [ 4:0] rvfi_rs1_addr, + output logic [ 4:0] rvfi_rs2_addr, + output logic [ 4:0] rvfi_rs3_addr, + output logic [31:0] rvfi_rs1_rdata, + output reg_cap_t rvfi_rs1_rcap, + output logic [31:0] rvfi_rs2_rdata, + output reg_cap_t rvfi_rs2_rcap, + output logic [31:0] rvfi_rs3_rdata, + output logic [ 4:0] rvfi_rd_addr, + output logic [31:0] rvfi_rd_wdata, + output reg_cap_t rvfi_rd_wcap, + output logic [31:0] rvfi_pc_rdata, + output logic [31:0] rvfi_pc_wdata, + output logic rvfi_mem_is_cap, + output logic [31:0] rvfi_mem_addr, + output logic [ 3:0] rvfi_mem_rmask, + output logic [ 3:0] rvfi_mem_wmask, + output logic [DataWidth-1:0] rvfi_mem_rdata, + output reg_cap_t rvfi_mem_rcap, + output logic [DataWidth-1:0] rvfi_mem_wdata, + output reg_cap_t rvfi_mem_wcap, + output logic [31:0] rvfi_ext_mip, + output logic rvfi_ext_nmi, + output logic rvfi_ext_debug_req, + output logic [63:0] rvfi_ext_mcycle, +`endif + + // CPU Control Signals + input fetch_enable_t fetch_enable_i, + output logic alert_minor_o, + output logic alert_major_o, + output logic icache_inval_o, + output logic core_busy_o +); + + localparam int unsigned PMP_NUM_CHAN = 3; + localparam bit DataIndTiming = SecureIbex; + localparam bit PCIncrCheck = SecureIbex; + localparam bit ShadowCSR = 1'b0; + + // IF/ID signals + logic dummy_instr_id; + logic instr_valid_id; + logic instr_executing_id; + logic instr_new_id; + logic [31:0] instr_rdata_id; // Instruction sampled inside IF stage + logic [31:0] instr_rdata_alu_id; // Instruction sampled inside IF stage (replicated to + // ease fan-out) + logic [15:0] instr_rdata_c_id; // Compressed instruction sampled inside IF stage + logic instr_is_compressed_id; + logic instr_perf_count_id; + logic instr_bp_taken_id; + logic instr_fetch_err; // Bus error on instr fetch + logic instr_fetch_err_plus2; // Instruction error is misaligned + logic instr_fetch_cheri_acc_vio; + logic instr_fetch_cheri_bound_vio; + logic illegal_c_insn_id; // Illegal compressed instruction sent to ID stage + + logic [31:0] pc_if; // Program counter in IF stage + logic [31:0] pc_id; // Program counter in ID stage + logic [31:0] pc_wb; // Program counter in WB stage + logic [33:0] imd_val_d_ex[2]; // Intermediate register for multicycle Ops + logic [33:0] imd_val_q_ex[2]; // Intermediate register for multicycle Ops + logic [1:0] imd_val_we_ex; + + logic data_ind_timing; + logic dummy_instr_en; + logic [2:0] dummy_instr_mask; + logic dummy_instr_seed_en; + logic [31:0] dummy_instr_seed; + logic icache_enable; + logic icache_inval; + logic pc_mismatch_alert; + logic csr_shadow_err; + + logic instr_first_cycle_id; + logic instr_valid_clear; + logic pc_set; + logic nt_branch_mispredict; + logic [31:0] nt_branch_addr; + pc_sel_e pc_mux_id; // Mux selector for next PC + exc_pc_sel_e exc_pc_mux_id; // Mux selector for exception PC + exc_cause_e exc_cause; // Exception cause + + logic lsu_load_err; + logic lsu_store_err; + logic lsu_err_is_cheri; + + // LSU signals + logic lsu_addr_incr_req; + logic [31:0] lsu_addr_last; + logic [31:0] lsu_addr; + + // Jump and branch target and decision (EX->IF) + logic [31:0] branch_target_ex_rv32; + logic [31:0] branch_target_ex_cheri; + logic [31:0] branch_target_ex; + logic branch_decision; + + // Core busy signals + logic ctrl_busy; + logic if_busy; + logic lsu_busy; + + logic lsu_busy_tbre; + + // Register File + logic [4:0] rf_raddr_a; + logic [31:0] rf_rdata_a; + logic [4:0] rf_raddr_b; + logic [31:0] rf_rdata_b; + logic rf_ren_a; + logic rf_ren_b; + logic [4:0] rf_waddr_wb; + logic [31:0] rf_wdata_wb; + + reg_cap_t rf_wcap_wb; + + // Writeback register write data that can be used on the forwarding path (doesn't factor in memory + // read data as this is too late for the forwarding path) + logic [31:0] rf_wdata_fwd_wb; + + reg_cap_t rf_wcap_fwd_wb; + + logic [32:0] rf_wdata_lsu; + reg_cap_t rf_wcap_lsu; + logic rf_we_wb; + logic rf_we_lsu; + logic rf_ecc_err_comb; + + logic [4:0] rf_waddr_id; + logic [31:0] rf_wdata_id; + logic rf_we_id; + logic rf_rd_a_wb_match; + logic rf_rd_b_wb_match; + + // ALU Control + alu_op_e alu_operator_ex; + logic [31:0] alu_operand_a_ex; + logic [31:0] alu_operand_b_ex; + + logic [31:0] bt_a_operand; + logic [31:0] bt_b_operand; + + logic [31:0] alu_adder_result_ex; // Used to forward computed address to LSU + logic [31:0] result_ex; + + // Multiplier Control + logic mult_en_ex; + logic div_en_ex; + logic mult_sel_ex; + logic div_sel_ex; + md_op_e multdiv_operator_ex; + logic [1:0] multdiv_signed_mode_ex; + logic [31:0] multdiv_operand_a_ex; + logic [31:0] multdiv_operand_b_ex; + logic multdiv_ready_id; + + // CSR control + logic csr_access; + csr_op_e csr_op; + logic csr_op_en; + csr_num_e csr_addr; + logic [31:0] csr_rdata; + logic [31:0] csr_wdata; + logic illegal_csr_insn_id; // CSR access to non-existent register, + // with wrong priviledge level, + // or missing write permissions + + // Data Memory Control + logic lsu_we; + logic [1:0] lsu_type; + logic lsu_sign_ext; + logic lsu_req; + logic [32:0] lsu_wdata; + reg_cap_t lsu_wcap; + logic lsu_req_done; + + // stall control + logic id_in_ready; + logic ex_valid; + + logic lsu_resp_valid; + logic lsu_resp_err; + + // Signals between instruction core interface and pipe (if and id stages) + logic instr_req_int; // Id stage asserts a req to instruction core interface + logic instr_req_gated; + + // Writeback stage + logic en_wb; + wb_instr_type_e instr_type_wb; + logic ready_wb; + logic rf_write_wb; + logic outstanding_load_wb; + logic outstanding_store_wb; + + // Interrupts + logic nmi_mode; + irqs_t irqs; + logic csr_mstatus_mie; + logic [31:0] csr_mepc, csr_depc; + + // PMP signals + logic [33:0] csr_pmp_addr [PMPNumRegions]; + pmp_cfg_t csr_pmp_cfg [PMPNumRegions]; + pmp_mseccfg_t csr_pmp_mseccfg; + logic pmp_req_err [PMP_NUM_CHAN]; + logic data_req_out; + + logic csr_save_if; + logic csr_save_id; + logic csr_save_wb; + logic csr_restore_mret_id; + logic csr_restore_dret_id; + logic csr_save_cause; + logic csr_mepcc_clrtag; + logic csr_mtvec_init; + logic [31:0] csr_mtvec; + logic [31:0] csr_mtval; + logic csr_mstatus_tw; + priv_lvl_e priv_mode_id; + priv_lvl_e priv_mode_lsu; + + // debug mode and dcsr configuration + logic debug_mode; + dbg_cause_e debug_cause; + logic debug_csr_save; + logic debug_single_step; + logic debug_ebreakm; + logic debug_ebreaku; + logic trigger_match; + + // signals relating to instruction movements between pipeline stages + // used by performance counters and RVFI + logic instr_id_done; + logic instr_done_wb; + + logic perf_instr_ret_wb; + logic perf_instr_ret_compressed_wb; + logic perf_instr_ret_wb_spec; + logic perf_instr_ret_compressed_wb_spec; + logic perf_iside_wait; + logic perf_dside_wait; + logic perf_mul_wait; + logic perf_div_wait; + logic perf_jump; + logic perf_branch; + logic perf_tbranch; + logic perf_load; + logic perf_store; + + // for RVFI + logic illegal_insn_id, unused_illegal_insn_id; // ID stage sees an illegal instruction + + pcc_cap_t pcc_cap_r, pcc_cap_w; + + logic cheri_branch_req; + logic cheri_branch_req_spec; + logic instr_is_cheri_id; + logic instr_is_rv32lsu_id; + logic cheri_exec_id; + logic [11:0] cheri_imm12; + logic [19:0] cheri_imm20; + logic [20:0] cheri_imm21; + logic [4:0] cheri_cs2_dec; + logic cheri_load_id; + logic cheri_store_id; + logic cheri_rf_we; + logic [31:0] cheri_result_data; + reg_cap_t cheri_result_cap; + logic cheri_ex_valid; + logic cheri_ex_err; + logic [11:0] cheri_ex_err_info; + logic cheri_wb_err; + logic [15:0] cheri_wb_err_info; + logic [OPDW-1:0] cheri_operator; + + logic rv32_lsu_req; + logic rv32_lsu_we; + logic [1:0] rv32_lsu_type; + logic [31:0] rv32_lsu_wdata; + logic rv32_lsu_sign_ext; + logic rv32_lsu_addr_incr_req; + logic [31:0] rv32_lsu_addr_last; + logic rv32_lsu_resp_valid; + + logic cheri_csr_access; + logic [4:0] cheri_csr_addr; + logic [31:0] cheri_csr_wdata; + reg_cap_t cheri_csr_wcap; + cheri_csr_op_e cheri_csr_op; + logic cheri_csr_op_en; + logic [31:0] cheri_csr_rdata; + reg_cap_t cheri_csr_rcap; + logic cheri_csr_set_mie; + logic cheri_csr_clr_mie; + + logic lsu_is_cap, lsu_cheri_err; + logic [3:0] lsu_lc_clrperm; + + logic csr_dbg_tclr_fault; + + logic [31:0] csr_mshwm; + logic [31:0] csr_mshwmb; + logic csr_mshwm_set; + logic [31:0] csr_mshwm_new; + logic ztop_wr; + logic [31:0] ztop_wdata; + full_cap_t ztop_wfcap; + logic [31:0] ztop_rdata; + reg_cap_t ztop_rcap; + + logic stkz_active; + logic stkz_abort; + logic [31:0] stkz_ptr; + logic [31:0] stkz_base; + + logic lsu_tbre_resp_valid; + logic lsu_tbre_resp_err; + logic lsu_resp_is_wr; + logic [32:0] lsu_tbre_raw_lsw; + logic lsu_tbre_req_done; + logic lsu_tbre_addr_incr; + logic tbre_lsu_req; + logic tbre_lsu_is_cap; + logic tbre_lsu_we; + logic [31:0] tbre_lsu_addr; + logic [32:0] tbre_lsu_wdata; + logic tbre_trvk_en; + logic tbre_trvk_clrtag; + + logic lsu_tbre_sel, cpu_lsu_dec; + logic rf_trsv_en; + + logic cpu_stall_by_stkz, cpu_grant_to_stkz; + + + ////////////////////// + // Clock management // + ////////////////////// + + // Before going to sleep, wait for I- and D-side + // interfaces to finish ongoing operations. + assign core_busy_o = ctrl_busy | if_busy | lsu_busy; + + ////////////// + // IF stage // + ////////////// + + cheriot_if_stage #( + .DmHaltAddr (DmHaltAddr), + .DmExceptionAddr (DmExceptionAddr), + .DummyInstructions(DummyInstructions), + .ICache (ICache), + .ICacheECC (ICacheECC), + .BusSizeECC (BusSizeECC), + .TagSizeECC (TagSizeECC), + .LineSizeECC (LineSizeECC), + .PCIncrCheck (PCIncrCheck), + .ResetAll (ResetAll ), + .RndCnstLfsrSeed (RndCnstLfsrSeed ), + .RndCnstLfsrPerm (RndCnstLfsrPerm ), + .BranchPredictor (BranchPredictor), + .CHERIoTEn (CHERIoTEn) + ) if_stage_i ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + .cheri_pmode_i (cheri_pmode_i), + .boot_addr_i (boot_addr_i), + .req_i (instr_req_gated), // instruction request control + .debug_mode_i (debug_mode), + + // instruction cache interface + .instr_req_o (instr_req_o), + .instr_addr_o (instr_addr_o), + .instr_gnt_i (instr_gnt_i), + .instr_rvalid_i (instr_rvalid_i), + .instr_rdata_i (instr_rdata_i), + .instr_err_i (instr_err_i), + + .ic_tag_req_o (ic_tag_req_o), + .ic_tag_write_o (ic_tag_write_o), + .ic_tag_addr_o (ic_tag_addr_o), + .ic_tag_wdata_o (ic_tag_wdata_o), + .ic_tag_rdata_i (ic_tag_rdata_i), + .ic_data_req_o (ic_data_req_o), + .ic_data_write_o (ic_data_write_o), + .ic_data_addr_o (ic_data_addr_o), + .ic_data_wdata_o (ic_data_wdata_o), + .ic_data_rdata_i (ic_data_rdata_i), + .ic_scr_key_valid_i(ic_scr_key_valid_i), + + // outputs to ID stage + .instr_valid_id_o (instr_valid_id), + .instr_new_id_o (instr_new_id), + .instr_rdata_id_o (instr_rdata_id), + .instr_rdata_alu_id_o (instr_rdata_alu_id), + .instr_rdata_c_id_o (instr_rdata_c_id), + .instr_is_compressed_id_o(instr_is_compressed_id), + .instr_bp_taken_o (instr_bp_taken_id), + .instr_fetch_err_o (instr_fetch_err), + .instr_fetch_err_plus2_o (instr_fetch_err_plus2), + .instr_fetch_cheri_acc_vio_o (instr_fetch_cheri_acc_vio), + .instr_fetch_cheri_bound_vio_o (instr_fetch_cheri_bound_vio), + + .illegal_c_insn_id_o (illegal_c_insn_id), + .dummy_instr_id_o (dummy_instr_id), + .pc_if_o (pc_if), + .pc_id_o (pc_id), + .pmp_err_if_i (pmp_req_err[PMP_I]), + .pmp_err_if_plus2_i (pmp_req_err[PMP_I2]), + + // control signals + .instr_valid_clear_i (instr_valid_clear), + .pc_set_i (pc_set), + .pc_mux_i (pc_mux_id), + .nt_branch_mispredict_i(nt_branch_mispredict), + .exc_pc_mux_i (exc_pc_mux_id), + .exc_cause (exc_cause), + .dummy_instr_en_i (dummy_instr_en), + .dummy_instr_mask_i (dummy_instr_mask), + .dummy_instr_seed_en_i (dummy_instr_seed_en), + .dummy_instr_seed_i (dummy_instr_seed), + .icache_enable_i (icache_enable), + .icache_inval_i (icache_inval), + + // branch targets + .branch_target_ex_i(branch_target_ex), + .nt_branch_addr_i (nt_branch_addr), + + // CSRs + .csr_mepc_i (csr_mepc), // exception return address + .csr_depc_i (csr_depc), // debug return address + .csr_mtvec_i (csr_mtvec), // trap-vector base address + .csr_mtvec_init_o(csr_mtvec_init), + + // pipeline stalls + .id_in_ready_i(id_in_ready), + + .pc_mismatch_alert_o(pc_mismatch_alert), + .if_busy_o (if_busy), + .pcc_cap_i (pcc_cap_r) + ); + + // Core is waiting for the ISide when ID/EX stage is ready for a new instruction but none are + // available + assign perf_iside_wait = id_in_ready & ~instr_valid_id; + + // Multi-bit fetch enable used when SecureIbex == 1. When SecureIbex == 0 only use the bottom-bit + // of fetch_enable_i. Ensure the multi-bit encoding has the bottom bit set for on and unset for + // off so FetchEnableOn/FetchEnableOff can be used without needing to know the value of + // SecureIbex. + `ASSERT_INIT(FetchEnableSecureOnBottomBitSet, FetchEnableOn[0] == 1'b1) + `ASSERT_INIT(FetchEnableSecureOffBottomBitClear, FetchEnableOff[0] == 1'b0) + + // fetch_enable_i can be used to stop the core fetching new instructions + if (SecureIbex) begin : g_instr_req_gated_secure + // For secure Ibex fetch_enable_i must be a specific multi-bit pattern to enable instruction + // fetch + assign instr_req_gated = instr_req_int & (fetch_enable_i == FetchEnableOn); + end else begin : g_instr_req_gated_non_secure + // For non secure Ibex only the bottom bit of fetch enable is considered + logic unused_fetch_enable; + assign unused_fetch_enable = ^fetch_enable_i[$bits(fetch_enable_t)-1:1]; + + assign instr_req_gated = instr_req_int & fetch_enable_i[0]; + end + + ////////////// + // ID stage // + ////////////// + + cheriot_id_stage #( + .RV32E (RV32E), + .RV32M (RV32M), + .RV32B (RV32B), + .BranchTargetALU(BranchTargetALU), + .DataIndTiming (DataIndTiming), + .WritebackStage (WritebackStage), + .BranchPredictor(BranchPredictor), + .CHERIoTEn (CHERIoTEn), + .CheriPPLBC (CheriPPLBC), + .CheriSBND2 (CheriSBND2) + ) id_stage_i ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + .cheri_pmode_i (cheri_pmode_i), + .cheri_tsafe_en_i (cheri_tsafe_en_i), + + // Processor Enable + .ctrl_busy_o (ctrl_busy), + .illegal_insn_o(illegal_insn_id), + + // from/to IF-ID pipeline register + .instr_valid_i (instr_valid_id), + .instr_rdata_i (instr_rdata_id), + .instr_rdata_alu_i (instr_rdata_alu_id), + .instr_rdata_c_i (instr_rdata_c_id), + .instr_is_compressed_i(instr_is_compressed_id), + .instr_bp_taken_i (instr_bp_taken_id), + + // Jumps and branches + .branch_decision_i(branch_decision), + + // IF and ID control signals + .instr_first_cycle_id_o(instr_first_cycle_id), + .instr_valid_clear_o (instr_valid_clear), + .id_in_ready_o (id_in_ready), + .instr_req_o (instr_req_int), + .pc_set_o (pc_set), + .pc_mux_o (pc_mux_id), + .nt_branch_mispredict_o(nt_branch_mispredict), + .nt_branch_addr_o (nt_branch_addr), + .exc_pc_mux_o (exc_pc_mux_id), + .exc_cause_o (exc_cause), + .icache_inval_o (icache_inval), + + .instr_fetch_err_i (instr_fetch_err), + .instr_fetch_err_plus2_i(instr_fetch_err_plus2), + .instr_fetch_cheri_acc_vio_i (instr_fetch_cheri_acc_vio), + .instr_fetch_cheri_bound_vio_i (instr_fetch_cheri_bound_vio), + + .illegal_c_insn_i (illegal_c_insn_id), + + .pc_id_i(pc_id), + + // Stalls + .ex_valid_i (ex_valid), + .lsu_resp_valid_i(lsu_resp_valid), + + .alu_operator_ex_o (alu_operator_ex), + .alu_operand_a_ex_o(alu_operand_a_ex), + .alu_operand_b_ex_o(alu_operand_b_ex), + + .imd_val_q_ex_o (imd_val_q_ex), + .imd_val_d_ex_i (imd_val_d_ex), + .imd_val_we_ex_i(imd_val_we_ex), + + .bt_a_operand_o(bt_a_operand), + .bt_b_operand_o(bt_b_operand), + + .mult_en_ex_o (mult_en_ex), + .div_en_ex_o (div_en_ex), + .mult_sel_ex_o (mult_sel_ex), + .div_sel_ex_o (div_sel_ex), + .multdiv_operator_ex_o (multdiv_operator_ex), + .multdiv_signed_mode_ex_o(multdiv_signed_mode_ex), + .multdiv_operand_a_ex_o (multdiv_operand_a_ex), + .multdiv_operand_b_ex_o (multdiv_operand_b_ex), + .multdiv_ready_id_o (multdiv_ready_id), + + // CSR ID/EX + .csr_access_o (csr_access), + .csr_op_o (csr_op), + .csr_op_en_o (csr_op_en), + .csr_save_if_o (csr_save_if), // control signal to save PC + .csr_save_id_o (csr_save_id), // control signal to save PC + .csr_save_wb_o (csr_save_wb), // control signal to save PC + .csr_restore_mret_id_o(csr_restore_mret_id), // restore mstatus upon MRET + .csr_restore_dret_id_o(csr_restore_dret_id), // restore mstatus upon MRET + .csr_save_cause_o (csr_save_cause), + .csr_mepcc_clrtag_o (csr_mepcc_clrtag), + .csr_mtval_o (csr_mtval), + .priv_mode_i (priv_mode_id), + .csr_mstatus_tw_i (csr_mstatus_tw), + .illegal_csr_insn_i (illegal_csr_insn_id), + .data_ind_timing_i (data_ind_timing), + .csr_pcc_perm_sr_i (pcc_cap_r.perms[PERM_SR]), + + // LSU + .lsu_req_o (rv32_lsu_req), // to load store unit + .lsu_we_o (rv32_lsu_we), // to load store unit + .lsu_type_o (rv32_lsu_type), // to load store unit + .lsu_sign_ext_o (rv32_lsu_sign_ext), // to load store unit + .lsu_wdata_o (rv32_lsu_wdata), // to load store unit + .lsu_req_done_i (lsu_req_done), // from load store unit + + .lsu_addr_incr_req_i(rv32_lsu_addr_incr_req), + .lsu_addr_last_i (rv32_lsu_addr_last), + + .lsu_load_err_i (lsu_load_err), + .lsu_store_err_i(lsu_store_err), + .lsu_err_is_cheri_i(lsu_err_is_cheri), + + // Interrupt Signals + .csr_mstatus_mie_i(csr_mstatus_mie), + .irq_pending_i (irq_pending_o), + .irqs_i (irqs), + .irq_nm_i (irq_nm_i), + .nmi_mode_o (nmi_mode), + + // Debug Signal + .debug_mode_o (debug_mode), + .debug_cause_o (debug_cause), + .debug_csr_save_o (debug_csr_save), + .debug_req_i (debug_req_i), + .debug_single_step_i(debug_single_step), + .debug_ebreakm_i (debug_ebreakm), + .debug_ebreaku_i (debug_ebreaku), + .trigger_match_i (trigger_match), + + // write data to commit in the register file + .result_ex_i(result_ex), + .csr_rdata_i(csr_rdata), + + .rf_raddr_a_o (rf_raddr_a), + .rf_rdata_a_i (rf_rdata_a), + .rf_raddr_b_o (rf_raddr_b), + .rf_rdata_b_i (rf_rdata_b), + .rf_ren_a_o (rf_ren_a), + .rf_ren_b_o (rf_ren_b), + .rf_waddr_id_o (rf_waddr_id), + .rf_wdata_id_o (rf_wdata_id), + .rf_we_id_o (rf_we_id), + .rf_rd_a_wb_match_o(rf_rd_a_wb_match), + .rf_rd_b_wb_match_o(rf_rd_b_wb_match), + + .rf_waddr_wb_i (rf_waddr_wb), + .rf_wdata_fwd_wb_i(rf_wdata_fwd_wb), + .rf_write_wb_i (rf_write_wb), + .rf_reg_rdy_i (rf_reg_rdy_i), + + .en_wb_o (en_wb), + .instr_type_wb_o (instr_type_wb), + .instr_perf_count_id_o (instr_perf_count_id), + .ready_wb_i (ready_wb), + .outstanding_load_wb_i (outstanding_load_wb), + .outstanding_store_wb_i(outstanding_store_wb), + + // Performance Counters + .perf_jump_o (perf_jump), + .perf_branch_o (perf_branch), + .perf_tbranch_o (perf_tbranch), + .perf_dside_wait_o(perf_dside_wait), + .perf_mul_wait_o (perf_mul_wait), + .perf_div_wait_o (perf_div_wait), + .instr_id_done_o (instr_id_done), + + .cheri_exec_id_o (cheri_exec_id), + .instr_is_cheri_id_o (instr_is_cheri_id), + .instr_is_rv32lsu_id_o (instr_is_rv32lsu_id), + .cheri_imm12_o (cheri_imm12), + .cheri_imm20_o (cheri_imm20), + .cheri_imm21_o (cheri_imm21), + .cheri_operator_o (cheri_operator), + .cheri_cs2_dec_o (cheri_cs2_dec), + .cheri_load_o (cheri_load_id), + .cheri_store_o (cheri_store_id), + .cheri_ex_valid_i (cheri_ex_valid), + .cheri_ex_err_i (cheri_ex_err), + .cheri_ex_err_info_i (cheri_ex_err_info), + .cheri_wb_err_i (cheri_wb_err), + .cheri_wb_err_info_i (cheri_wb_err_info), + .cheri_branch_req_i (cheri_branch_req_spec), + .cheri_branch_target_i (branch_target_ex_cheri) + ); + + + assign icache_inval_o = icache_inval; + // for RVFI only + assign unused_illegal_insn_id = illegal_insn_id; + + cheriot_ex_block #( + .RV32M (RV32M), + .RV32B (RV32B), + .BranchTargetALU(BranchTargetALU) + ) ex_block_i ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // ALU signal from ID stage + .alu_operator_i (alu_operator_ex), + .alu_operand_a_i (alu_operand_a_ex), + .alu_operand_b_i (alu_operand_b_ex), + .alu_instr_first_cycle_i(instr_first_cycle_id), + + // Branch target ALU signal from ID stage + .bt_a_operand_i(bt_a_operand), + .bt_b_operand_i(bt_b_operand), + + // Multipler/Divider signal from ID stage + .multdiv_operator_i (multdiv_operator_ex), + .mult_en_i (mult_en_ex), + .div_en_i (div_en_ex), + .mult_sel_i (mult_sel_ex), + .div_sel_i (div_sel_ex), + .multdiv_signed_mode_i(multdiv_signed_mode_ex), + .multdiv_operand_a_i (multdiv_operand_a_ex), + .multdiv_operand_b_i (multdiv_operand_b_ex), + .multdiv_ready_id_i (multdiv_ready_id), + .data_ind_timing_i (data_ind_timing), + + // Intermediate value register + .imd_val_we_o(imd_val_we_ex), + .imd_val_d_o (imd_val_d_ex), + .imd_val_q_i (imd_val_q_ex), + + // Outputs + .alu_adder_result_ex_o(alu_adder_result_ex), // to LSU + .result_ex_o (result_ex), // to ID + + .branch_target_o (branch_target_ex_rv32), // to IF + .branch_decision_o(branch_decision), // to ID + + .ex_valid_o(ex_valid) + ); + + ////////////// + // cheri EX // + ////////////// + if (CHERIoTEn) begin : g_cheri_ex + cheri_ex #( + .WritebackStage (WritebackStage), + .MemCapFmt (MemCapFmt), + .HeapBase (HeapBase), + .TSMapBase (TSMapBase), + .TSMapSize (TSMapSize), + .CheriPPLBC (CheriPPLBC), + .CheriSBND2 (CheriSBND2), + .CheriStkZ (CheriStkZ), + .CheriCapIT8 (CheriCapIT8) + ) u_cheri_ex ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .cheri_pmode_i (cheri_pmode_i), + .cheri_tsafe_en_i (cheri_tsafe_en_i), + .debug_mode_i (debug_mode), + .fwd_we_i (rf_write_wb), + .fwd_waddr_i (rf_waddr_wb), + .fwd_wdata_i (rf_wdata_fwd_wb), + .fwd_wcap_i (rf_wcap_fwd_wb), + .rf_raddr_a_i (rf_raddr_a), + .rf_rdata_a_i (rf_rdata_a), + .rf_rcap_a_i (rf_rcap_a_i), + .rf_raddr_b_i (rf_raddr_b), + .rf_rdata_b_i (rf_rdata_b), + .rf_rcap_b_i (rf_rcap_b_i), + .rf_trsv_en_o (rf_trsv_en), + .rf_waddr_i (rf_waddr_id), + .pcc_cap_i (pcc_cap_r), + .pcc_cap_o (pcc_cap_w), + .pc_id_i (pc_id), + .branch_req_o (cheri_branch_req), + .branch_req_spec_o (cheri_branch_req_spec), + .branch_target_o (branch_target_ex_cheri), + .cheri_exec_id_i (cheri_exec_id), + .instr_valid_i (instr_valid_id), + .instr_first_cycle_i (instr_first_cycle_id), + .instr_is_cheri_i (instr_is_cheri_id), + .instr_is_rv32lsu_i (instr_is_rv32lsu_id), + .instr_is_compressed_i(instr_is_compressed_id), + .cheri_imm12_i (cheri_imm12), + .cheri_imm20_i (cheri_imm20), + .cheri_imm21_i (cheri_imm21), + .cheri_operator_i (cheri_operator), + .cheri_cs2_dec_i (cheri_cs2_dec), + .cheri_rf_we_o (cheri_rf_we), + .result_data_o (cheri_result_data), + .result_cap_o (cheri_result_cap), + .cheri_ex_valid_o (cheri_ex_valid), + .cheri_ex_err_o (cheri_ex_err), + .cheri_ex_err_info_o (cheri_ex_err_info), + .cheri_wb_err_o (cheri_wb_err), + .cheri_wb_err_info_o (cheri_wb_err_info), + .lsu_req_o (lsu_req), + .lsu_is_cap_o (lsu_is_cap), + .lsu_lc_clrperm_o (lsu_lc_clrperm), + .lsu_cheri_err_o (lsu_cheri_err), + .lsu_we_o (lsu_we), + .lsu_addr_o (lsu_addr), + .lsu_type_o (lsu_type), + .lsu_wdata_o (lsu_wdata), + .lsu_wcap_o (lsu_wcap), + .cpu_stall_by_stkz_o (cpu_stall_by_stkz), + .cpu_grant_to_stkz_o (cpu_grant_to_stkz), + .lsu_sign_ext_o (lsu_sign_ext), + .addr_incr_req_i (lsu_addr_incr_req), + .addr_last_i (lsu_addr_last), + .lsu_req_done_i (lsu_req_done), + .lsu_rdata_i (rf_wdata_lsu), + .lsu_rcap_i (rf_wcap_lsu), + .rv32_lsu_req_i (rv32_lsu_req), + .rv32_lsu_we_i (rv32_lsu_we), + .rv32_lsu_type_i (rv32_lsu_type), + .rv32_lsu_wdata_i (rv32_lsu_wdata), + .rv32_lsu_sign_ext_i (rv32_lsu_sign_ext), + .rv32_lsu_addr_i (alu_adder_result_ex), + .rv32_addr_incr_req_o (rv32_lsu_addr_incr_req), + .rv32_addr_last_o (rv32_lsu_addr_last), + .lsu_tbre_sel_i (lsu_tbre_sel), + .tbre_lsu_req_i (tbre_lsu_req), + .tbre_lsu_is_cap_i (tbre_lsu_is_cap), + .tbre_lsu_we_i (tbre_lsu_we), + .tbre_lsu_addr_i (tbre_lsu_addr), + .tbre_lsu_wdata_i (tbre_lsu_wdata), + .cpu_lsu_dec_o (cpu_lsu_dec), + .csr_rdata_i (cheri_csr_rdata), + .csr_rcap_i (cheri_csr_rcap), + .csr_mstatus_mie_i (csr_mstatus_mie), + .csr_access_o (cheri_csr_access), + .csr_addr_o (cheri_csr_addr), + .csr_wdata_o (cheri_csr_wdata), + .csr_wcap_o (cheri_csr_wcap), + .csr_op_o (cheri_csr_op), + .csr_op_en_o (cheri_csr_op_en), + .csr_set_mie_o (cheri_csr_set_mie), + .csr_clr_mie_o (cheri_csr_clr_mie), + .csr_mshwm_i (csr_mshwm), + .csr_mshwmb_i (csr_mshwmb), + .csr_mshwm_set_o (csr_mshwm_set), + .csr_mshwm_new_o (csr_mshwm_new), + .stkz_active_i (stkz_active), + .stkz_abort_i (stkz_abort), + .stkz_ptr_i (stkz_ptr), + .stkz_base_i (stkz_base), + .ztop_wr_o (ztop_wr), + .ztop_wdata_o (ztop_wdata), + .ztop_wfcap_o (ztop_wfcap), + .ztop_rdata_i (ztop_rdata), + .ztop_rcap_i (ztop_rcap), + .csr_dbg_tclr_fault_i (csr_dbg_tclr_fault) + ); + + assign rf_trsv_en_o = rf_trsv_en; + assign rf_trsv_addr_o = rf_waddr_id; + assign branch_target_ex = (instr_valid_id & instr_is_cheri_id) ? branch_target_ex_cheri : branch_target_ex_rv32; + end else begin : gen_no_cheri_ex + assign rf_trsv_en_o = 1'b0; + assign rf_trsv_addr_o = 5'h0; + + assign cheri_branch_req = 1'b0; + assign cheri_branch_req_spec = 1'b0; + assign branch_target_ex = branch_target_ex_rv32; + assign pcc_cap_w = NULL_PCC_CAP; + + assign cheri_rf_we = 1'b0; + assign cheri_result_data = 32'h0; + assign cheri_result_cap = NULL_REG_CAP; + + assign cheri_ex_valid = 1'b0; + assign cheri_ex_err = 1'b0; + assign cheri_ex_err_info = 11'h0; + assign cheri_wb_err = 1'b0; + assign cheri_wb_err_info = 16'h0; + + assign lsu_req = rv32_lsu_req; + assign lsu_is_cap = 1'b0; + assign lsu_lc_clrperm = 4'h0; + assign lsu_cheri_err = 1'b0; + assign lsu_we = rv32_lsu_we; + assign lsu_addr = alu_adder_result_ex; + assign lsu_type = rv32_lsu_type; + assign lsu_wdata = rv32_lsu_wdata; + assign lsu_wcap = NULL_REG_CAP; + assign lsu_sign_ext = rv32_lsu_sign_ext; + assign rv32_lsu_addr_incr_req = lsu_addr_incr_req; + assign rv32_lsu_addr_last = lsu_addr_last; + + assign cpu_lsu_dec = 1'b0; + assign cheri_csr_access = 1'b0; + assign cheri_csr_addr = 5'h0; + assign cheri_csr_wdata = 32'h0; + assign cheri_csr_wcap = NULL_REG_CAP; + assign cheri_csr_op = CHERI_CSR_NULL; + assign cheri_csr_op_en = 1'b0; + assign cheri_csr_set_mie = 1'b0; + assign cheri_csr_clr_mie = 1'b0; + + assign csr_mshwm_set = 1'b0; + assign csr_mshwm_new = 1'b0; + + end + + ///////////////////////////// + // cheri TS pipeline stage // + ///////////////////////////// + + if (CHERIoTEn & CheriPPLBC) begin : g_trvk_stage + cheri_trvk_stage #( + .HeapBase (HeapBase), + .TSMapSize (TSMapSize) + ) cheri_trvk_stage_i ( + // Clock and Reset + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .rf_trsv_en_i (rf_trsv_en ), + .rf_trsv_addr_i (rf_trsv_addr_o ), + .lsu_resp_valid_i (lsu_resp_valid ), + .lsu_load_err_i (lsu_load_err ), + .rf_wdata_lsu_i (rf_wdata_lsu[31:0]), + .rf_wcap_lsu_i (rf_wcap_lsu ), + .lsu_resp_is_wr_i (lsu_resp_is_wr), + .lsu_tbre_resp_valid_i (lsu_tbre_resp_valid), + .lsu_tbre_resp_err_i (lsu_tbre_resp_err), + .rf_trvk_addr_o (rf_trvk_addr_o ), + .rf_trvk_en_o (rf_trvk_en_o ), + .rf_trvk_clrtag_o (rf_trvk_clrtag_o), + .tbre_trvk_en_o (tbre_trvk_en ), + .tbre_trvk_clrtag_o(tbre_trvk_clrtag), + .tsmap_cs_o (tsmap_cs_o ), + .tsmap_addr_o (tsmap_addr_o ), + .tsmap_rdata_i (tsmap_rdata_i ) + ); + end else begin + assign rf_trvk_addr_o = 0; + assign rf_trvk_en_o = 1'b0; + assign rf_trvk_clrtag_o = 1'b0; + assign tsmap_cs_o = 1'b0; + assign tsmap_addr_o = 0; + end + + ////////////////////////////////////////// + // cheri TS background revocation engine// + ////////////////////////////////////////// + + logic snoop_lsu_req_done; + logic unmasked_intr; + + assign snoop_lsu_req_done = lsu_req_done; + assign unmasked_intr = irq_pending_o & csr_mstatus_mie; + + cheri_tbre_wrapper #( + .CHERIoTEn (CHERIoTEn), + .CheriTBRE (CheriTBRE), + .CheriStkZ (CheriStkZ), + .MMRegDinW (MMRegDinW), + .MMRegDoutW (MMRegDoutW) + ) cheri_tbre_wrapper_i ( + // Clock and Reset + .clk_i (clk_i), + .rst_ni (rst_ni), + .mmreg_corein_i (mmreg_corein_i), + .mmreg_coreout_o (mmreg_coreout_o), + .lsu_tbre_resp_valid_i (lsu_tbre_resp_valid), + .lsu_tbre_resp_err_i (lsu_tbre_resp_err), + .lsu_tbre_resp_is_wr_i (lsu_resp_is_wr), + .lsu_tbre_raw_lsw_i (lsu_tbre_raw_lsw), + .lsu_tbre_req_done_i (lsu_tbre_req_done), + .lsu_tbre_addr_incr_i (lsu_tbre_addr_incr), + .lsu_tbre_sel_i (lsu_tbre_sel), + .tbre_lsu_req_o (tbre_lsu_req), + .tbre_lsu_is_cap_o (tbre_lsu_is_cap), + .tbre_lsu_we_o (tbre_lsu_we), + .tbre_lsu_addr_o (tbre_lsu_addr), + .tbre_lsu_wdata_o (tbre_lsu_wdata), + .snoop_lsu_req_done_i (snoop_lsu_req_done), + .snoop_lsu_req_i (lsu_req), + .snoop_lsu_is_cap_i (lsu_is_cap), + .snoop_lsu_we_i (lsu_we), + .snoop_lsu_cheri_err_i (lsu_cheri_err), + .snoop_lsu_addr_i (lsu_addr), + .trvk_en_i (tbre_trvk_en), + .trvk_clrtag_i (tbre_trvk_clrtag), + .ztop_wr_i (ztop_wr), + .ztop_wdata_i (ztop_wdata), + .ztop_wfcap_i (ztop_wfcap), + .ztop_rdata_o (ztop_rdata), + .ztop_rcap_o (ztop_rcap), + .unmasked_intr_i (unmasked_intr), + .stkz_active_o (stkz_active), + .stkz_abort_o (stkz_abort), + .stkz_ptr_o (stkz_ptr), + .stkz_base_o (stkz_base) + ) ; + + + ///////////////////// + // Load/store unit // + ///////////////////// + logic [32:0] data_wdata33, data_rdata33; + + assign data_req_o = data_req_out & ~pmp_req_err[PMP_D]; + assign lsu_resp_err = lsu_load_err | lsu_store_err; + assign data_wdata_o = data_wdata33[DataWidth-1:0]; + + if (DataWidth == 33) begin + assign data_rdata33 = data_rdata_i; + end else begin + assign data_rdata33 = {1'b0, data_rdata_i}; + end + + cheriot_load_store_unit #( + .CHERIoTEn(CHERIoTEn), + .MemCapFmt(MemCapFmt), + .CheriTBRE(CheriTBRE), + .CheriCapIT8(CheriCapIT8) + ) load_store_unit_i ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + .cheri_pmode_i (cheri_pmode_i), + // data interface + .data_req_o (data_req_out), + .data_is_cap_o (data_is_cap_o), + .data_gnt_i (data_gnt_i), + .data_rvalid_i (data_rvalid_i), + .data_err_i (data_err_i), + .data_pmp_err_i(pmp_req_err[PMP_D]), + + .data_addr_o (data_addr_o), + .data_we_o (data_we_o), + .data_be_o (data_be_o), + .data_wdata_o(data_wdata33), + .data_rdata_i(data_rdata33), + + // signals to/from ID/EX stage + .lsu_we_i (lsu_we), + .lsu_type_i (lsu_type), + .lsu_wdata_i (lsu_wdata), + .lsu_wcap_i (lsu_wcap), + .lsu_sign_ext_i(lsu_sign_ext), + .cpu_stall_by_stkz_i (cpu_stall_by_stkz), + .cpu_grant_to_stkz_i (cpu_grant_to_stkz), + + .lsu_rdata_o (rf_wdata_lsu), + .lsu_rcap_o (rf_wcap_lsu), + .lsu_rdata_valid_o(rf_we_lsu), + .lsu_req_i (lsu_req), + .lsu_is_cap_i (lsu_is_cap), + .lsu_lc_clrperm_i (lsu_lc_clrperm), + .lsu_cheri_err_i (lsu_cheri_err), + .lsu_addr_i (lsu_addr), + + .lsu_addr_incr_req_o(lsu_addr_incr_req), + .addr_last_o (lsu_addr_last), + + .lsu_req_done_o (lsu_req_done), + .lsu_resp_valid_o (lsu_resp_valid), + .lsu_resp_is_wr_o (lsu_resp_is_wr), + + .tbre_lsu_req_i (tbre_lsu_req), + .cpu_lsu_dec_i (cpu_lsu_dec), + .lsu_tbre_sel_o (lsu_tbre_sel), + .lsu_tbre_raw_lsw_o (lsu_tbre_raw_lsw), + .lsu_tbre_req_done_o (lsu_tbre_req_done), + .lsu_tbre_resp_valid_o (lsu_tbre_resp_valid), + .lsu_tbre_resp_err_o (lsu_tbre_resp_err), + .lsu_tbre_addr_incr_req_o(lsu_tbre_addr_incr), + + // exception signals + .load_err_o (lsu_load_err), + .store_err_o(lsu_store_err), + .lsu_err_is_cheri_o(lsu_err_is_cheri), + + .busy_o(lsu_busy), + + .busy_tbre_o(lsu_busy_tbre), + + .perf_load_o (perf_load), + .perf_store_o(perf_store) + ); + + cheriot_wb_stage #( + .ResetAll ( ResetAll ), + .WritebackStage(WritebackStage) + ) wb_stage_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .en_wb_i (en_wb), + .instr_type_wb_i (instr_type_wb), + .pc_id_i (pc_id), + .instr_is_compressed_id_i(instr_is_compressed_id), + .instr_perf_count_id_i (instr_perf_count_id), + .instr_is_cheri_i (instr_is_cheri_id), + .cheri_load_i (cheri_load_id), + .cheri_store_i (cheri_store_id), + + .ready_wb_o (ready_wb), + .rf_write_wb_o (rf_write_wb), + .outstanding_load_wb_o (outstanding_load_wb), + .outstanding_store_wb_o (outstanding_store_wb), + .pc_wb_o (pc_wb), + .perf_instr_ret_wb_o (perf_instr_ret_wb), + .perf_instr_ret_compressed_wb_o (perf_instr_ret_compressed_wb), + .perf_instr_ret_wb_spec_o (perf_instr_ret_wb_spec), + .perf_instr_ret_compressed_wb_spec_o(perf_instr_ret_compressed_wb_spec), + + .rf_waddr_id_i(rf_waddr_id), + .rf_wdata_id_i(rf_wdata_id), + .rf_we_id_i (rf_we_id), + + .cheri_rf_we_i (cheri_rf_we), + .cheri_rf_wdata_i (cheri_result_data), + .cheri_rf_wcap_i (cheri_result_cap), + + .rf_wdata_lsu_i(rf_wdata_lsu[31:0]), + .rf_wcap_lsu_i(rf_wcap_lsu), + .rf_we_lsu_i (rf_we_lsu), + + .rf_wdata_fwd_wb_o(rf_wdata_fwd_wb), + .rf_wcap_fwd_wb_o (rf_wcap_fwd_wb), + + .rf_waddr_wb_o(rf_waddr_wb), + .rf_wdata_wb_o(rf_wdata_wb), + .rf_wcap_wb_o (rf_wcap_wb), + .rf_we_wb_o (rf_we_wb), + + .lsu_resp_valid_i(lsu_resp_valid), + .lsu_resp_err_i (lsu_resp_err), + + .instr_done_wb_o(instr_done_wb) + ); + + ///////////////////////////// + // Register file interface // + ///////////////////////////// + + assign dummy_instr_id_o = dummy_instr_id; + assign rf_raddr_a_o = rf_raddr_a; + assign rf_waddr_wb_o = rf_waddr_wb; + assign rf_we_wb_o = rf_we_wb; + assign rf_raddr_b_o = rf_raddr_b; + + assign rf_wcap_wb_o = rf_wcap_wb; + + if (RegFileECC & CHERIoTEn) begin : gen_ecc_cheriot + logic [37:0] rf_wcap_vec, rf_rcap_a_vec, rf_rcap_b_vec; + logic [1:0] rf_ecc_err_a, rf_ecc_err_b; + logic rf_ecc_err_a_id, rf_ecc_err_b_id; + logic [31:0] wdata_tmp, rdata_a_tmp, rdata_b_tmp; + logic [31:0] unused_sig32_0, unused_sig32_1; + logic [38:0] wdata_ecc_tmp; + + assign rf_wcap_vec = reg2vec(rf_wcap_wb); + + // ECC checkbit generation + // -- for simplicity just linearly add the parity bits together. + // this is not as good as the full secded implementation (some double errors won't be detected) + // but probably ok for protection against random fault injection + + // include waddr and we in the ECC calculation + assign wdata_tmp = rf_wdata_wb ^ rf_wcap_vec[31:0] ^ {20'h0, rf_we_wb, rf_waddr_wb, rf_wcap_vec[37:32]}; + // assign wdata_tmp = rf_wdata_wb ^ rf_wcap_vec[31:0] ^ {26'h0, rf_wcap_vec[37:32]}; + assign rf_wdata_wb_ecc_o = {wdata_ecc_tmp[38:32], rf_wdata_wb}; + prim_secded_inv_39_32_enc regfile_ecc_enc ( + .data_i(wdata_tmp), + .data_o(wdata_ecc_tmp) + ); + + // generate parity bits for the TRSV/TRVK interface + prim_secded_inv_39_32_enc trsv_ecc_enc ( + .data_i({26'h0, rf_trsv_en_o, rf_trsv_addr_o}), + .data_o({rf_trsv_par_o, unused_sig32_0}) + ); + + prim_secded_inv_39_32_enc trvk_ecc_enc ( + .data_i({25'h0, rf_trvk_en_o, rf_trvk_clrtag_o, rf_trvk_addr_o}), + .data_o({rf_trvk_par_o, unused_sig32_1}) + ); + + // ECC checking on register file rdata + assign rf_rcap_a_vec = reg2vec(rf_rcap_a_i); + assign rf_rcap_b_vec = reg2vec(rf_rcap_b_i); + + assign rdata_a_tmp = rf_rdata_a_ecc_i[31:0] ^ rf_rcap_a_vec[31:0] ^ {20'h0, 1'b1, rf_raddr_a, rf_rcap_a_vec[37:32]}; + assign rdata_b_tmp = rf_rdata_b_ecc_i[31:0] ^ rf_rcap_b_vec[31:0] ^ {20'h0, 1'b1, rf_raddr_b, rf_rcap_b_vec[37:32]}; + + //assign rdata_a_tmp = rf_rdata_a_ecc_i[31:0] ^ rf_rcap_a_vec[31:0] ^ {26'h0, rf_rcap_a_vec[37:32]}; + //assign rdata_b_tmp = rf_rdata_b_ecc_i[31:0] ^ rf_rcap_b_vec[31:0] ^ {26'h0, rf_rcap_b_vec[37:32]}; + prim_secded_inv_39_32_dec regfile_ecc_dec_a ( + .data_i ({rf_rdata_a_ecc_i[38:32], rdata_a_tmp}), + .data_o (), + .syndrome_o(), + .err_o (rf_ecc_err_a) + ); + prim_secded_inv_39_32_dec regfile_ecc_dec_b ( + .data_i ({rf_rdata_b_ecc_i[38:32], rdata_b_tmp}), + .data_o (), + .syndrome_o(), + .err_o (rf_ecc_err_b) + ); + + // Assign read outputs - no error correction, just trigger an alert + assign rf_rdata_a = rf_rdata_a_ecc_i[31:0]; + assign rf_rdata_b = rf_rdata_b_ecc_i[31:0]; + + // Calculate errors - qualify with WB forwarding to avoid xprop into the alert signal + assign rf_ecc_err_a_id = |rf_ecc_err_a & rf_ren_a & ~rf_rd_a_wb_match; + assign rf_ecc_err_b_id = |rf_ecc_err_b & rf_ren_b & ~rf_rd_b_wb_match; + + // Combined error + assign rf_ecc_err_comb = instr_valid_id & (rf_ecc_err_a_id | rf_ecc_err_b_id); + + end else if (RegFileECC) begin : gen_regfile_ecc + + logic [1:0] rf_ecc_err_a, rf_ecc_err_b; + logic rf_ecc_err_a_id, rf_ecc_err_b_id; + + // ECC checkbit generation for regiter file wdata + prim_secded_inv_39_32_enc regfile_ecc_enc ( + .data_i(rf_wdata_wb), + .data_o(rf_wdata_wb_ecc_o) + ); + + // ECC checking on register file rdata + prim_secded_inv_39_32_dec regfile_ecc_dec_a ( + .data_i (rf_rdata_a_ecc_i), + .data_o (), + .syndrome_o(), + .err_o (rf_ecc_err_a) + ); + prim_secded_inv_39_32_dec regfile_ecc_dec_b ( + .data_i (rf_rdata_b_ecc_i), + .data_o (), + .syndrome_o(), + .err_o (rf_ecc_err_b) + ); + + // Assign read outputs - no error correction, just trigger an alert + assign rf_rdata_a = rf_rdata_a_ecc_i[31:0]; + assign rf_rdata_b = rf_rdata_b_ecc_i[31:0]; + + // Calculate errors - qualify with WB forwarding to avoid xprop into the alert signal + assign rf_ecc_err_a_id = |rf_ecc_err_a & rf_ren_a & ~rf_rd_a_wb_match; + assign rf_ecc_err_b_id = |rf_ecc_err_b & rf_ren_b & ~rf_rd_b_wb_match; + + // Combined error + assign rf_ecc_err_comb = instr_valid_id & (rf_ecc_err_a_id | rf_ecc_err_b_id); + + assign rf_trvk_par_o = 7'h0; + assign rf_trsv_par_o = 7'h0; + + end else begin : gen_no_regfile_ecc + + logic unused_rf_ren_a, unused_rf_ren_b; + logic unused_rf_rd_a_wb_match, unused_rf_rd_b_wb_match; + + assign unused_rf_ren_a = rf_ren_a; + assign unused_rf_ren_b = rf_ren_b; + assign unused_rf_rd_a_wb_match = rf_rd_a_wb_match; + assign unused_rf_rd_b_wb_match = rf_rd_b_wb_match; + assign rf_wdata_wb_ecc_o = rf_wdata_wb; + assign rf_rdata_a = rf_rdata_a_ecc_i; + assign rf_rdata_b = rf_rdata_b_ecc_i; + assign rf_ecc_err_comb = 1'b0; + + assign rf_trvk_par_o = 7'h0; + assign rf_trsv_par_o = 7'h0; +end + + /////////////////////// + // Crash dump output // + /////////////////////// + + assign crash_dump_o.current_pc = pc_id; + assign crash_dump_o.next_pc = pc_if; + assign crash_dump_o.last_data_addr = lsu_addr_last; + assign crash_dump_o.exception_addr = csr_mepc; + + /////////////////// + // Alert outputs // + /////////////////// + + // Minor alert - core is in a recoverable state + // TODO add I$ ECC errors here + assign alert_minor_o = 1'b0; + + // Major alert - core is unrecoverable + assign alert_major_o = rf_ecc_err_comb | pc_mismatch_alert | csr_shadow_err; + + //////////////////////// + // RF (Register File) // + //////////////////////// +`ifdef RVFI +`endif + + + ///////////////////////////////////////// + // CSRs (Control and Status Registers) // + ///////////////////////////////////////// + + assign csr_wdata = alu_operand_a_ex; + assign csr_addr = csr_num_e'(csr_access ? alu_operand_b_ex[11:0] : 12'b0); + + cheriot_cs_registers #( + .DbgTriggerEn (DbgTriggerEn), + .DbgHwBreakNum (DbgHwBreakNum), + .DataIndTiming (DataIndTiming), + .DummyInstructions(DummyInstructions), + .ShadowCSR (ShadowCSR), + .ICache (ICache), + .MHPMCounterNum (MHPMCounterNum), + .MHPMCounterWidth (MHPMCounterWidth), + .PMPEnable (PMPEnable), + .PMPGranularity (PMPGranularity), + .PMPNumRegions (PMPNumRegions), + .RV32E (RV32E), + .RV32M (RV32M), + .RV32B (RV32B), + .CHERIoTEn (CHERIoTEn) + ) cs_registers_i ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + .cheri_pmode_i (cheri_pmode_i), + + // Hart ID from outside + .hart_id_i (hart_id_i), + .priv_mode_id_o (priv_mode_id), + .priv_mode_lsu_o(priv_mode_lsu), + + // mtvec + .csr_mtvec_o (csr_mtvec), + .csr_mtvec_init_i(csr_mtvec_init), + .boot_addr_i (boot_addr_i), + + // Interface to CSRs ( SRAM like ) + .csr_access_i(csr_access), + .csr_addr_i (csr_addr), + .csr_wdata_i (csr_wdata), + .csr_op_i (csr_op), + .csr_op_en_i (csr_op_en), + .csr_rdata_o (csr_rdata), + + .cheri_csr_access_i (cheri_csr_access), + .cheri_csr_addr_i (cheri_csr_addr), + .cheri_csr_wdata_i (cheri_csr_wdata), + .cheri_csr_wcap_i (cheri_csr_wcap), + .cheri_csr_op_i (cheri_csr_op), + .cheri_csr_op_en_i (cheri_csr_op_en), + .cheri_csr_set_mie_i (cheri_csr_set_mie), + .cheri_csr_clr_mie_i (cheri_csr_clr_mie), + .cheri_csr_rdata_o (cheri_csr_rdata), + .cheri_csr_rcap_o (cheri_csr_rcap), + + .csr_mshwm_o (csr_mshwm), + .csr_mshwmb_o (csr_mshwmb), + .csr_mshwm_set_i (csr_mshwm_set), + .csr_mshwm_new_i (csr_mshwm_new), + + // Interrupt related control signals + .irq_software_i (irq_software_i), + .irq_timer_i (irq_timer_i), + .irq_external_i (irq_external_i), + .irq_fast_i (irq_fast_i), + .nmi_mode_i (nmi_mode), + .irq_pending_o (irq_pending_o), + .irqs_o (irqs), + .csr_mstatus_mie_o(csr_mstatus_mie), + .csr_mstatus_tw_o (csr_mstatus_tw), + .csr_mepc_o (csr_mepc), + + // PMP + .csr_pmp_cfg_o (csr_pmp_cfg), + .csr_pmp_addr_o (csr_pmp_addr), + .csr_pmp_mseccfg_o(csr_pmp_mseccfg), + + // debug + .csr_depc_o (csr_depc), + .debug_mode_i (debug_mode), + .debug_cause_i (debug_cause), + .debug_csr_save_i (debug_csr_save), + .debug_single_step_o(debug_single_step), + .debug_ebreakm_o (debug_ebreakm), + .debug_ebreaku_o (debug_ebreaku), + .trigger_match_o (trigger_match), + + .pc_if_i(pc_if), + .pc_id_i(pc_id), + .pc_wb_i(pc_wb), + + .data_ind_timing_o (data_ind_timing), + .dummy_instr_en_o (dummy_instr_en), + .dummy_instr_mask_o (dummy_instr_mask), + .dummy_instr_seed_en_o(dummy_instr_seed_en), + .dummy_instr_seed_o (dummy_instr_seed), + .icache_enable_o (icache_enable), + .csr_shadow_err_o (csr_shadow_err), + + .csr_save_if_i (csr_save_if), + .csr_save_id_i (csr_save_id), + .csr_save_wb_i (csr_save_wb), + .csr_restore_mret_i(csr_restore_mret_id), + .csr_restore_dret_i(csr_restore_dret_id), + .csr_save_cause_i (csr_save_cause), + .csr_mepcc_clrtag_i (csr_mepcc_clrtag), + .csr_mcause_i (exc_cause), + .csr_mtval_i (csr_mtval), + .illegal_csr_insn_o(illegal_csr_insn_id), + + .double_fault_seen_o, + + // performance counter related signals + .instr_ret_i (perf_instr_ret_wb), + .instr_ret_compressed_i (perf_instr_ret_compressed_wb), + .instr_ret_spec_i (perf_instr_ret_wb_spec), + .instr_ret_compressed_spec_i(perf_instr_ret_compressed_wb_spec), + .iside_wait_i (perf_iside_wait), + .jump_i (perf_jump), + .branch_i (perf_branch), + .branch_taken_i (perf_tbranch), + .mem_load_i (perf_load), + .mem_store_i (perf_store), + .dside_wait_i (perf_dside_wait), + .mul_wait_i (perf_mul_wait), + .div_wait_i (perf_div_wait), + + .cheri_branch_req_i (cheri_branch_req), + .cheri_branch_target_i (branch_target_ex_cheri), + .pcc_cap_i (pcc_cap_w), + .pcc_cap_o (pcc_cap_r), + .csr_dbg_tclr_fault_o (csr_dbg_tclr_fault), + .cheri_fatal_err_o (cheri_fatal_err_o) + ); + + + if (PMPEnable) begin : g_pmp + logic [33:0] pmp_req_addr [PMP_NUM_CHAN]; + pmp_req_e pmp_req_type [PMP_NUM_CHAN]; + priv_lvl_e pmp_priv_lvl [PMP_NUM_CHAN]; + + assign pmp_req_addr[PMP_I] = {2'b00, pc_if}; + assign pmp_req_type[PMP_I] = PMP_ACC_EXEC; + assign pmp_priv_lvl[PMP_I] = priv_mode_id; + assign pmp_req_addr[PMP_I2] = {2'b00, (pc_if + 32'd2)}; + assign pmp_req_type[PMP_I2] = PMP_ACC_EXEC; + assign pmp_priv_lvl[PMP_I2] = priv_mode_id; + assign pmp_req_addr[PMP_D] = {2'b00, data_addr_o[31:0]}; + assign pmp_req_type[PMP_D] = data_we_o ? PMP_ACC_WRITE : PMP_ACC_READ; + assign pmp_priv_lvl[PMP_D] = priv_mode_lsu; + + cheriot_pmp #( + .PMPGranularity(PMPGranularity), + .PMPNumChan (PMP_NUM_CHAN), + .PMPNumRegions (PMPNumRegions) + ) pmp_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + // Interface to CSRs + .csr_pmp_cfg_i (csr_pmp_cfg), + .csr_pmp_addr_i (csr_pmp_addr), + .csr_pmp_mseccfg_i(csr_pmp_mseccfg), + .priv_mode_i (pmp_priv_lvl), + // Access checking channels + .pmp_req_addr_i (pmp_req_addr), + .pmp_req_type_i (pmp_req_type), + .pmp_req_err_o (pmp_req_err) + ); + end else begin : g_no_pmp + // Unused signal tieoff + priv_lvl_e unused_priv_lvl_ls; + logic [33:0] unused_csr_pmp_addr [PMPNumRegions]; + pmp_cfg_t unused_csr_pmp_cfg [PMPNumRegions]; + pmp_mseccfg_t unused_csr_pmp_mseccfg; + assign unused_priv_lvl_ls = priv_mode_lsu; + assign unused_csr_pmp_addr = csr_pmp_addr; + assign unused_csr_pmp_cfg = csr_pmp_cfg; + assign unused_csr_pmp_mseccfg = csr_pmp_mseccfg; + + // Output tieoff + assign pmp_req_err[PMP_I] = 1'b0; + assign pmp_req_err[PMP_I2] = 1'b0; + assign pmp_req_err[PMP_D] = 1'b0; + end + +`ifdef RVFI + // When writeback stage is present RVFI information is emitted when instruction is finished in + // third stage but some information must be captured whilst the instruction is in the second + // stage. Without writeback stage RVFI information is all emitted when instruction retires in + // second stage. RVFI outputs are all straight from flops. So 2 stage pipeline requires a single + // set of flops (instr_info => RVFI_out), 3 stage pipeline requires two sets (instr_info => wb + // => RVFI_out) + localparam int RVFI_STAGES = WritebackStage ? 2 : 1; + + logic rvfi_stage_valid [RVFI_STAGES]; + logic [63:0] rvfi_stage_order [RVFI_STAGES]; + logic [31:0] rvfi_stage_insn [RVFI_STAGES]; + logic rvfi_stage_trap [RVFI_STAGES]; + logic rvfi_stage_halt [RVFI_STAGES]; + logic rvfi_stage_intr [RVFI_STAGES]; + logic [ 1:0] rvfi_stage_mode [RVFI_STAGES]; + logic [ 1:0] rvfi_stage_ixl [RVFI_STAGES]; + logic [ 4:0] rvfi_stage_rs1_addr [RVFI_STAGES]; + logic [ 4:0] rvfi_stage_rs2_addr [RVFI_STAGES]; + logic [ 4:0] rvfi_stage_rs3_addr [RVFI_STAGES]; + logic [31:0] rvfi_stage_rs1_rdata [RVFI_STAGES]; + logic [31:0] rvfi_stage_rs2_rdata [RVFI_STAGES]; + logic [31:0] rvfi_stage_rs3_rdata [RVFI_STAGES]; + reg_cap_t rvfi_stage_rs1_rcap [RVFI_STAGES]; + reg_cap_t rvfi_stage_rs2_rcap [RVFI_STAGES]; + logic [ 4:0] rvfi_stage_rd_addr [RVFI_STAGES]; + logic [31:0] rvfi_stage_rd_wdata [RVFI_STAGES]; + reg_cap_t rvfi_stage_rd_wcap [RVFI_STAGES]; + logic [31:0] rvfi_stage_pc_rdata [RVFI_STAGES]; + logic [31:0] rvfi_stage_pc_wdata [RVFI_STAGES]; + logic [31:0] rvfi_stage_mem_addr [RVFI_STAGES]; + logic [ 3:0] rvfi_stage_mem_rmask [RVFI_STAGES]; + logic [ 3:0] rvfi_stage_mem_wmask [RVFI_STAGES]; + logic [DataWidth-1:0] rvfi_stage_mem_rdata [RVFI_STAGES]; + reg_cap_t rvfi_stage_mem_rcap [RVFI_STAGES]; + logic [DataWidth-1:0] rvfi_stage_mem_wdata [RVFI_STAGES]; + reg_cap_t rvfi_stage_mem_wcap [RVFI_STAGES]; + logic rvfi_stage_mem_is_cap [RVFI_STAGES]; + + logic rvfi_instr_new_wb; + logic rvfi_intr_d; + logic rvfi_intr_q; + logic rvfi_set_trap_pc_d; + logic rvfi_set_trap_pc_q; + logic [31:0] rvfi_insn_id; + logic [4:0] rvfi_rs1_addr_d; + logic [4:0] rvfi_rs1_addr_q; + logic [4:0] rvfi_rs2_addr_d; + logic [4:0] rvfi_rs2_addr_q; + logic [4:0] rvfi_rs3_addr_d; + logic [31:0] rvfi_rs1_data_d; + logic [31:0] rvfi_rs1_data_q; + logic [31:0] rvfi_rs2_data_d; + logic [31:0] rvfi_rs2_data_q; + reg_cap_t rvfi_rs1_cap_d; + reg_cap_t rvfi_rs1_cap_q; + reg_cap_t rvfi_rs2_cap_d; + reg_cap_t rvfi_rs2_cap_q; + reg_cap_t rvfi_rd_cap_d; + reg_cap_t rvfi_rd_cap_q; + logic [31:0] rvfi_rs3_data_d; + logic [4:0] rvfi_rd_addr_wb; + logic [4:0] rvfi_rd_addr_q; + logic [4:0] rvfi_rd_addr_d; + logic [31:0] rvfi_rd_wdata_wb; + logic [31:0] rvfi_rd_wdata_d; + logic [31:0] rvfi_rd_wdata_q; + logic rvfi_rd_we_wb; + logic [3:0] rvfi_mem_mask_int; + logic [DataWidth-1:0] rvfi_mem_rdata_d; + logic [DataWidth-1:0] rvfi_mem_rdata_q; + logic [DataWidth-1:0] rvfi_mem_wdata_d; + logic [DataWidth-1:0] rvfi_mem_wdata_q; + logic [31:0] rvfi_mem_addr_d; + logic [31:0] rvfi_mem_addr_q; + logic rvfi_mem_is_cap_d; + logic rvfi_mem_is_cap_q; + reg_cap_t rvfi_mem_rcap_d; + reg_cap_t rvfi_mem_rcap_q; + reg_cap_t rvfi_mem_wcap_d; + reg_cap_t rvfi_mem_wcap_q; + logic rvfi_trap_id; + logic rvfi_trap_wb; + logic [63:0] rvfi_stage_order_d; + logic rvfi_id_done; + logic rvfi_wb_done; + + logic new_debug_req; + logic new_nmi; + logic new_irq; + cheriot_pkg::irqs_t captured_mip; + logic captured_nmi; + logic captured_debug_req; + logic captured_valid; + + // RVFI extension for co-simulation support + // debug_req and MIP captured at IF -> ID transition so one extra stage + cheriot_pkg::irqs_t rvfi_ext_stage_mip [RVFI_STAGES+1]; + logic rvfi_ext_stage_nmi [RVFI_STAGES+1]; + logic rvfi_ext_stage_debug_req [RVFI_STAGES+1]; + logic [63:0] rvfi_ext_stage_mcycle [RVFI_STAGES]; + + logic rvfi_stage_valid_d [RVFI_STAGES]; + + logic insn_c_hint; + + assign rvfi_valid = rvfi_stage_valid [RVFI_STAGES-1]; + assign rvfi_order = rvfi_stage_order [RVFI_STAGES-1]; + assign rvfi_insn = rvfi_stage_insn [RVFI_STAGES-1]; + assign rvfi_trap = rvfi_stage_trap [RVFI_STAGES-1]; + assign rvfi_halt = rvfi_stage_halt [RVFI_STAGES-1]; + assign rvfi_intr = rvfi_stage_intr [RVFI_STAGES-1]; + assign rvfi_mode = rvfi_stage_mode [RVFI_STAGES-1]; + assign rvfi_ixl = rvfi_stage_ixl [RVFI_STAGES-1]; + assign rvfi_rs1_addr = rvfi_stage_rs1_addr [RVFI_STAGES-1]; + assign rvfi_rs2_addr = rvfi_stage_rs2_addr [RVFI_STAGES-1]; + assign rvfi_rs3_addr = rvfi_stage_rs3_addr [RVFI_STAGES-1]; + assign rvfi_rs1_rdata = rvfi_stage_rs1_rdata[RVFI_STAGES-1]; + assign rvfi_rs2_rdata = rvfi_stage_rs2_rdata[RVFI_STAGES-1]; + assign rvfi_rs1_rcap = rvfi_stage_rs1_rcap [RVFI_STAGES-1]; + assign rvfi_rs2_rcap = rvfi_stage_rs2_rcap [RVFI_STAGES-1]; + assign rvfi_rs3_rdata = rvfi_stage_rs3_rdata[RVFI_STAGES-1]; + assign rvfi_rd_wdata = rvfi_stage_rd_wdata [RVFI_STAGES-1]; + assign rvfi_rd_wcap = rvfi_stage_rd_wcap [RVFI_STAGES-1]; + assign rvfi_pc_rdata = rvfi_stage_pc_rdata [RVFI_STAGES-1]; + assign rvfi_pc_wdata = rvfi_stage_pc_wdata [RVFI_STAGES-1]; + assign rvfi_mem_addr = rvfi_stage_mem_addr [RVFI_STAGES-1]; + assign rvfi_mem_rmask = rvfi_stage_mem_rmask[RVFI_STAGES-1]; + assign rvfi_mem_wmask = rvfi_stage_mem_wmask[RVFI_STAGES-1]; + assign rvfi_mem_rdata = rvfi_stage_mem_rdata[RVFI_STAGES-1]; + assign rvfi_mem_wdata = rvfi_stage_mem_wdata[RVFI_STAGES-1]; + assign rvfi_mem_is_cap = rvfi_stage_mem_is_cap[RVFI_STAGES-1]; + assign rvfi_mem_rcap = rvfi_stage_mem_rcap[RVFI_STAGES-1]; + assign rvfi_mem_wcap = rvfi_stage_mem_wcap[RVFI_STAGES-1]; + + // for HINT instructions like c.srai64/c.slli64, force rvfi_rd_addr output to 0 to match sail implementation + assign rvfi_rd_addr = insn_c_hint ? 0 : rvfi_stage_rd_addr [RVFI_STAGES-1]; + + always_comb begin + if ((rvfi_insn[1:0] == 2'b01) && (rvfi_insn[15:13] == 3'b100) && (rvfi_insn[11:10] == 2'b00) && // c.srli64 + ({rvfi_insn[12], rvfi_insn[6:2]} == 6'h0) && + (rvfi_rs1_addr == rvfi_rd_addr) && (rvfi_rs1_rdata == rvfi_rd_wdata)) + insn_c_hint = 1'b1; + else if ((rvfi_insn[1:0] == 2'b01) && (rvfi_insn[15:13] == 3'b100) && (rvfi_insn[11:10] == 2'b01) && // c.srai64 + ({rvfi_insn[12], rvfi_insn[6:2]} == 6'h0) && + (rvfi_rs1_addr == rvfi_rd_addr) && (rvfi_rs1_rdata == rvfi_rd_wdata)) + insn_c_hint = 1'b1; + else + insn_c_hint = 1'b0; + + + end + + assign rvfi_rd_addr_wb = rf_waddr_wb; + assign rvfi_rd_wdata_wb = rf_we_wb ? rf_wdata_wb : rf_wdata_lsu; // this doesn't look right but ok + assign rvfi_rd_we_wb = rf_we_wb | rf_we_lsu; + + always_comb begin + // Use always_comb instead of continuous assign so first assign can set 0 as default everywhere + // that is overridden by more specific settings. + rvfi_ext_mip = '0; + rvfi_ext_mip[CSR_MSIX_BIT] = rvfi_ext_stage_mip[RVFI_STAGES].irq_software; + rvfi_ext_mip[CSR_MTIX_BIT] = rvfi_ext_stage_mip[RVFI_STAGES].irq_timer; + rvfi_ext_mip[CSR_MEIX_BIT] = rvfi_ext_stage_mip[RVFI_STAGES].irq_external; + rvfi_ext_mip[CSR_MFIX_BIT_HIGH:CSR_MFIX_BIT_LOW] = rvfi_ext_stage_mip[RVFI_STAGES].irq_fast; + end + + assign rvfi_ext_nmi = rvfi_ext_stage_nmi[RVFI_STAGES]; + assign rvfi_ext_debug_req = rvfi_ext_stage_debug_req[RVFI_STAGES]; + assign rvfi_ext_mcycle = rvfi_ext_stage_mcycle[RVFI_STAGES-1]; + + // When an instruction takes a trap the `rvfi_trap` signal will be set. Instructions that take + // traps flush the pipeline so ordinarily wouldn't be seen to be retire. The RVFI tracking + // pipeline is kept going for flushed instructions that trapped so they are still visible on the + // RVFI interface. + + // Factor in exceptions taken in ID so RVFI tracking picks up flushed instructions that took + // a trap + // kliu 05082024: add the ~wb_exception_o iterm to handle the corner case where + // ID and WB both faulted, e.g., illegal_insn in ID and cheri_wb_err in WB + // The previous behavior is 2 rvfi items in the trace (both traps), + // even if the instruction in the ID is never executed. + // The new behavior only generate 1 rvfi item for wb stage fault + assign rvfi_id_done = instr_id_done | (id_stage_i.controller_i.rvfi_flush_next & + id_stage_i.controller_i.id_exception_o & + ~id_stage_i.controller_i.wb_exception_o); + + if (WritebackStage) begin : gen_rvfi_wb_stage + logic unused_instr_new_id; + + assign unused_instr_new_id = instr_new_id; + + // With writeback stage first RVFI stage buffers instruction information captured in ID/EX + // awaiting instruction retirement and RF Write data/Mem read data whilst instruction is in WB + // So first stage becomes valid when instruction leaves ID/EX stage and remains valid until + // instruction leaves WB + assign rvfi_stage_valid_d[0] = (rvfi_id_done & ~dummy_instr_id) | + (rvfi_stage_valid[0] & ~rvfi_wb_done); + // Second stage is output stage so simple valid cycle after instruction leaves WB (and so has + // retired) + assign rvfi_stage_valid_d[1] = rvfi_wb_done; + + // Signal new instruction in WB cycle after instruction leaves ID/EX (to enter WB) + logic rvfi_instr_new_wb_q; + + // Signal new instruction in WB either when one has just entered or when a trap is progressing + // through the tracking pipeline + assign rvfi_instr_new_wb = rvfi_instr_new_wb_q | (rvfi_stage_valid[0] & rvfi_stage_trap[0]); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rvfi_instr_new_wb_q <= 0; + end else begin + rvfi_instr_new_wb_q <= rvfi_id_done; + end + end + + assign rvfi_trap_id = id_stage_i.controller_i.id_exception_o; + assign rvfi_trap_wb = id_stage_i.controller_i.exc_req_wb ; + // WB is instantly done in the tracking pipeline when a trap is progress through the pipeline + assign rvfi_wb_done = instr_done_wb | (rvfi_stage_valid[0] & rvfi_stage_trap[0]); + end else begin : gen_rvfi_no_wb_stage + // Without writeback stage first RVFI stage is output stage so simply valid the cycle after + // instruction leaves ID/EX (and so has retired) + assign rvfi_stage_valid_d[0] = rvfi_id_done & ~dummy_instr_id; + // Without writeback stage signal new instr_new_wb when instruction enters ID/EX to correctly + // setup register write signals + assign rvfi_instr_new_wb = instr_new_id; + assign rvfi_trap_id = id_stage_i.controller_i.exc_req_d | id_stage_i.controller_i.exc_req_lsu; + assign rvfi_trap_wb = 1'b0; + assign rvfi_wb_done = instr_done_wb; + end + + assign rvfi_stage_order_d = dummy_instr_id ? rvfi_stage_order[0] : rvfi_stage_order[0] + 64'd1; + + // For interrupts and debug Ibex will take the relevant trap as soon as whatever instruction in ID + // finishes or immediately if the ID stage is empty. The rvfi_ext interface provides the DV + // environment with information about the irq/debug_req/nmi state that applies to a particular + // instruction. + // + // When a irq/debug_req/nmi appears the ID stage will finish whatever instruction it is currently + // executing (if any) then take the trap the cycle after that instruction leaves the ID stage. The + // trap taken depends upon the state of irq/debug_req/nmi on that cycle. In the cycles following + // that before the first instruction of the trap handler enters the ID stage the state of + // irq/debug_req/nmi could change but this has no effect on the trap handler (e.g. a higher + // priority interrupt might appear but this wouldn't stop the lower priority interrupt trap + // handler executing first as it's already being fetched). To provide the DV environment with the + // correct information for it to verify execution we need to capture the irq/debug_req/nmi state + // the cycle the trap decision is made. Which the captured_X signals below do. + // + // The new_X signals take the raw irq/debug_req/nmi inputs and factor in the enable terms required + // to determine if a trap will actually happen. + // + // These signals and the comment above are referred to in the documentation (cosim.rst). If + // altering the names or meanings of these signals or this comment please adjust the documentation + // appropriately. + assign new_debug_req = (debug_req_i & ~debug_mode); + assign new_nmi = irq_nm_i & ~nmi_mode & ~debug_mode; + assign new_irq = irq_pending_o & csr_mstatus_mie & ~nmi_mode & ~debug_mode; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + captured_valid <= 1'b0; + captured_mip <= '0; + captured_nmi <= 1'b0; + captured_debug_req <= 1'b0; + end else begin + // Capture when ID stage has emptied out and something occurs that will cause a trap and we + // haven't yet captured + if (~instr_valid_id & (new_debug_req | new_irq | new_nmi) & ~captured_valid) begin + captured_valid <= 1'b1; + captured_nmi <= irq_nm_i; + captured_mip <= cs_registers_i.mip; + captured_debug_req <= debug_req_i; + end + + // Capture cleared out as soon as a new instruction appears in ID + if (if_stage_i.instr_valid_id_d) begin + captured_valid <= 1'b0; + end + end + end + + // Pass the captured irq/debug_req/nmi state to the rvfi_ext interface tracking pipeline. + // + // To correctly capture we need to factor in various enable terms, should there be a fault in this + // logic we won't tell the DV environment about a trap that should have been taken. So if there's + // no valid capture we grab the raw values of the irq/debug_req/nmi inputs whatever they are and + // the DV environment will see if a trap should have been taken but wasn't. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rvfi_ext_stage_mip[0] <= '0; + rvfi_ext_stage_nmi[0] <= '0; + rvfi_ext_stage_debug_req[0] <= '0; + end else if (if_stage_i.instr_valid_id_d & if_stage_i.instr_new_id_d) begin + rvfi_ext_stage_mip[0] <= instr_valid_id | ~captured_valid ? cs_registers_i.mip : + captured_mip; + rvfi_ext_stage_nmi[0] <= instr_valid_id | ~captured_valid ? irq_nm_i : + captured_nmi; + rvfi_ext_stage_debug_req[0] <= instr_valid_id | ~captured_valid ? debug_req_i : + captured_debug_req; + end + end + + logic is_mem_rd, is_mem_wr; + assign is_mem_rd = lsu_req & ~lsu_we; + assign is_mem_wr = lsu_req & lsu_we; + + for (genvar i = 0; i < RVFI_STAGES; i = i + 1) begin : g_rvfi_stages + int im1; + + // this is just to get rid of the VCS elab warning (i-1 out of range when i==0) + if (i == 0) + assign im1 = 0; + else + assign im1 = i-1; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rvfi_stage_halt[i] <= '0; + rvfi_stage_trap[i] <= '0; + rvfi_stage_intr[i] <= '0; + rvfi_stage_order[i] <= '0; + rvfi_stage_insn[i] <= '0; + rvfi_stage_mode[i] <= {PRIV_LVL_M}; + rvfi_stage_ixl[i] <= CSR_MISA_MXL; + rvfi_stage_rs1_addr[i] <= '0; + rvfi_stage_rs2_addr[i] <= '0; + rvfi_stage_rs3_addr[i] <= '0; + rvfi_stage_pc_rdata[i] <= '0; + rvfi_stage_pc_wdata[i] <= '0; + rvfi_stage_mem_rmask[i] <= '0; + rvfi_stage_mem_wmask[i] <= '0; + rvfi_stage_valid[i] <= '0; + rvfi_stage_rs1_rdata[i] <= '0; + rvfi_stage_rs2_rdata[i] <= '0; + rvfi_stage_rs3_rdata[i] <= '0; + rvfi_stage_rs1_rcap[i] <= NULL_REG_CAP; + rvfi_stage_rs2_rcap[i] <= NULL_REG_CAP;; + rvfi_stage_rd_wdata[i] <= '0; + rvfi_stage_rd_wcap[i] <= NULL_REG_CAP;; + rvfi_stage_rd_addr[i] <= '0; + rvfi_stage_mem_rdata[i] <= '0; + rvfi_stage_mem_wdata[i] <= '0; + rvfi_stage_mem_addr[i] <= '0; + rvfi_ext_stage_mip[i+1] <= '0; + rvfi_ext_stage_nmi[i+1] <= '0; + rvfi_ext_stage_debug_req[i+1] <= '0; + rvfi_ext_stage_mcycle[i] <= '0; + end else begin + rvfi_stage_valid[i] <= rvfi_stage_valid_d[i]; + + if (i == 0) begin + if (rvfi_id_done) begin + rvfi_stage_halt[i] <= '0; + // TODO: Sort this out for writeback stage + rvfi_stage_trap[i] <= rvfi_trap_id; + rvfi_stage_intr[i] <= rvfi_intr_d; + rvfi_stage_order[i] <= rvfi_stage_order_d; + rvfi_stage_insn[i] <= rvfi_insn_id; + rvfi_stage_mode[i] <= {priv_mode_id}; + rvfi_stage_ixl[i] <= CSR_MISA_MXL; + rvfi_stage_rs1_addr[i] <= rvfi_rs1_addr_d; + rvfi_stage_rs2_addr[i] <= rvfi_rs2_addr_d; + rvfi_stage_rs3_addr[i] <= rvfi_rs3_addr_d; + rvfi_stage_pc_rdata[i] <= pc_id; + rvfi_stage_pc_wdata[i] <= pc_set ? branch_target_ex : pc_if; + rvfi_stage_mem_rmask[i] <= is_mem_rd ? rvfi_mem_mask_int : 4'b0000; // kliu + rvfi_stage_mem_wmask[i] <= is_mem_wr ? rvfi_mem_mask_int : 4'b0000; + rvfi_stage_rs1_rdata[i] <= rvfi_rs1_data_d; + rvfi_stage_rs2_rdata[i] <= rvfi_rs2_data_d; + rvfi_stage_rs3_rdata[i] <= rvfi_rs3_data_d; + rvfi_stage_rs1_rcap[i] <= rvfi_rs1_cap_d; + rvfi_stage_rs2_rcap[i] <= rvfi_rs2_cap_d; + rvfi_stage_rd_addr[i] <= rvfi_rd_addr_d; + rvfi_stage_rd_wdata[i] <= rvfi_rd_wdata_d; + rvfi_stage_rd_wcap[i] <= rvfi_rd_cap_d; + rvfi_stage_mem_rdata[i] <= rvfi_mem_rdata_d; + rvfi_stage_mem_wdata[i] <= rvfi_mem_wdata_d; + rvfi_stage_mem_rcap[i] <= rvfi_mem_rcap_d; + rvfi_stage_mem_wcap[i] <= rvfi_mem_wcap_d; + rvfi_stage_mem_is_cap[i] <= rvfi_mem_is_cap_d; + rvfi_stage_mem_addr[i] <= rvfi_mem_addr_d; + rvfi_ext_stage_mip[i+1] <= rvfi_ext_stage_mip[i]; + rvfi_ext_stage_nmi[i+1] <= rvfi_ext_stage_nmi[i]; + rvfi_ext_stage_debug_req[i+1] <= rvfi_ext_stage_debug_req[i]; + rvfi_ext_stage_mcycle[i] <= cs_registers_i.mcycle_counter_i.counter_val_o; + end + end else begin + if (rvfi_wb_done) begin + rvfi_stage_halt[i] <= rvfi_stage_halt[im1]; + rvfi_stage_trap[i] <= rvfi_stage_trap[im1] | rvfi_trap_wb; + rvfi_stage_intr[i] <= rvfi_stage_intr[im1]; + rvfi_stage_order[i] <= rvfi_stage_order[im1]; + rvfi_stage_insn[i] <= rvfi_stage_insn[im1]; + rvfi_stage_mode[i] <= rvfi_stage_mode[im1]; + rvfi_stage_ixl[i] <= rvfi_stage_ixl[im1]; + rvfi_stage_rs1_addr[i] <= rvfi_stage_rs1_addr[im1]; + rvfi_stage_rs2_addr[i] <= rvfi_stage_rs2_addr[im1]; + rvfi_stage_rs3_addr[i] <= rvfi_stage_rs3_addr[im1]; + rvfi_stage_pc_rdata[i] <= rvfi_stage_pc_rdata[im1]; + rvfi_stage_pc_wdata[i] <= rvfi_stage_pc_wdata[im1]; + rvfi_stage_mem_rmask[i] <= rvfi_trap_wb ? 4'b0000 : rvfi_stage_mem_rmask[im1]; + rvfi_stage_mem_wmask[i] <= rvfi_trap_wb ? 4'b0000 : rvfi_stage_mem_wmask[im1]; + rvfi_stage_rs1_rdata[i] <= rvfi_stage_rs1_rdata[im1]; + rvfi_stage_rs2_rdata[i] <= rvfi_stage_rs2_rdata[im1]; + rvfi_stage_rs3_rdata[i] <= rvfi_stage_rs3_rdata[im1]; + rvfi_stage_mem_wdata[i] <= rvfi_stage_mem_wdata[im1]; + rvfi_stage_mem_is_cap[i] <= rvfi_stage_mem_is_cap[im1]; + rvfi_stage_mem_wcap[i] <= rvfi_stage_mem_wcap[im1]; + rvfi_stage_mem_addr[i] <= rvfi_stage_mem_addr[im1]; + rvfi_stage_rs1_rcap[i] <= rvfi_stage_rs1_rcap[im1]; + rvfi_stage_rs2_rcap[i] <= rvfi_stage_rs2_rcap[im1]; + + // For 2 RVFI_STAGES/Writeback Sor 2 Rtage ignore first stage flops for rd_addr, rd_wdata and + // mem_rdata. For RF write addr/data actual write happens in writeback so capture + // address/data there. For mem_rdata that is only available from the writeback stage. + // Previous stage flops still exist in RTL as they are used by the non writeback config + rvfi_stage_rd_addr[i] <= rvfi_rd_addr_d; + rvfi_stage_rd_wdata[i] <= rvfi_rd_wdata_d; + rvfi_stage_mem_rdata[i] <= rvfi_mem_rdata_d; + rvfi_stage_mem_rcap[i] <= rvfi_mem_rcap_d; + rvfi_stage_rd_wcap[i] <= rvfi_rd_cap_d; + + rvfi_ext_stage_mip[i+1] <= rvfi_ext_stage_mip[i]; + rvfi_ext_stage_nmi[i+1] <= rvfi_ext_stage_nmi[i]; + rvfi_ext_stage_debug_req[i+1] <= rvfi_ext_stage_debug_req[i]; + rvfi_ext_stage_mcycle[i] <= rvfi_ext_stage_mcycle[im1]; + end + end + end + end + end + + // Memory adddress/write data available first cycle of ld/st instruction from register read + always_comb begin + if (~CheriTBRE & instr_first_cycle_id) begin + // rvfi_mem_addr_d = alu_adder_result_ex; + rvfi_mem_addr_d = lsu_addr; + rvfi_mem_wdata_d = lsu_wdata; + rvfi_mem_wcap_d = lsu_wcap; + rvfi_mem_is_cap_d = lsu_is_cap; + end else if (CheriTBRE & lsu_req & cpu_lsu_dec & ~lsu_addr_incr_req) begin + rvfi_mem_addr_d = lsu_addr; + rvfi_mem_wdata_d = lsu_wdata; + rvfi_mem_wcap_d = lsu_wcap; + rvfi_mem_is_cap_d = lsu_is_cap; + end else begin + rvfi_mem_addr_d = rvfi_mem_addr_q; + rvfi_mem_wdata_d = rvfi_mem_wdata_q; + rvfi_mem_wcap_d = rvfi_mem_wcap_q; + rvfi_mem_is_cap_d = rvfi_mem_is_cap_q; + end + end + + // Capture read data from LSU when it becomes valid + always_comb begin + if (load_store_unit_i.resp_is_cap_q & lsu_resp_valid) begin + rvfi_mem_rdata_d = rf_wdata_lsu; + rvfi_mem_rcap_d = rf_wcap_lsu; + end else if (lsu_resp_valid) begin + rvfi_mem_rdata_d = rf_wdata_lsu; + rvfi_mem_rcap_d = rvfi_mem_rcap_q; + end else begin + rvfi_mem_rdata_d = rvfi_mem_rdata_q; + rvfi_mem_rcap_d = rvfi_mem_rcap_q; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rvfi_mem_addr_q <= '0; + rvfi_mem_rdata_q <= '0; + rvfi_mem_wdata_q <= '0; + rvfi_mem_rcap_q <= NULL_REG_CAP; + rvfi_mem_wcap_q <= NULL_REG_CAP; + rvfi_mem_is_cap_q <= 1'b0; + end else begin + rvfi_mem_addr_q <= rvfi_mem_addr_d; + rvfi_mem_rdata_q <= rvfi_mem_rdata_d; + rvfi_mem_wdata_q <= rvfi_mem_wdata_d; + rvfi_mem_rcap_q <= rvfi_mem_rcap_d; + rvfi_mem_wcap_q <= rvfi_mem_wcap_d; + rvfi_mem_is_cap_q <=rvfi_mem_is_cap_d; + end + end + // Byte enable based on data type + always_comb begin + unique case (lsu_type) + 2'b00: rvfi_mem_mask_int = 4'b1111; + 2'b01: rvfi_mem_mask_int = 4'b0011; + 2'b10: rvfi_mem_mask_int = 4'b0001; + 2'b11: rvfi_mem_mask_int = 4'b0001; // kliu + default: rvfi_mem_mask_int = 4'b0000; + endcase + end + + always_comb begin + if (instr_is_compressed_id) begin + rvfi_insn_id = {16'b0, instr_rdata_c_id}; + end else begin + rvfi_insn_id = instr_rdata_id; + end + end + + // Source registers 1 and 2 are read in the first instruction cycle + // Source register 3 is read in the second instruction cycle. + if (CHERIoTEn) begin + always_comb begin + if (instr_first_cycle_id) begin + rvfi_rs1_cap_d = rf_ren_a ? g_cheri_ex.u_cheri_ex.rf_rcap_a : NULL_REG_CAP; + rvfi_rs2_cap_d = rf_ren_b ? g_cheri_ex.u_cheri_ex.rf_rcap_b : NULL_REG_CAP; + end else begin + rvfi_rs1_cap_d = rvfi_rs1_cap_q; + rvfi_rs2_cap_d = rvfi_rs2_cap_q; + end + end + end else begin + assign rvfi_rs1_cap_d = NULL_REG_CAP; + assign rvfi_rs2_cap_d = NULL_REG_CAP; + end + + always_comb begin + if (instr_first_cycle_id) begin + rvfi_rs1_data_d = rf_ren_a ? multdiv_operand_a_ex : '0; + rvfi_rs1_addr_d = rf_ren_a ? rf_raddr_a : '0; + rvfi_rs2_data_d = rf_ren_b ? multdiv_operand_b_ex : '0; + rvfi_rs2_addr_d = rf_ren_b ? rf_raddr_b : '0; + rvfi_rs3_data_d = '0; + rvfi_rs3_addr_d = '0; + end else begin + rvfi_rs1_data_d = rvfi_rs1_data_q; + rvfi_rs1_addr_d = rvfi_rs1_addr_q; + rvfi_rs2_data_d = rvfi_rs2_data_q; + rvfi_rs2_addr_d = rvfi_rs2_addr_q; + rvfi_rs3_data_d = multdiv_operand_a_ex; + rvfi_rs3_addr_d = rf_raddr_a; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rvfi_rs1_data_q <= '0; + rvfi_rs1_addr_q <= '0; + rvfi_rs2_data_q <= '0; + rvfi_rs2_addr_q <= '0; + rvfi_rs1_cap_q <= NULL_REG_CAP; + rvfi_rs2_cap_q <= NULL_REG_CAP; + end else begin + rvfi_rs1_data_q <= rvfi_rs1_data_d; + rvfi_rs1_addr_q <= rvfi_rs1_addr_d; + rvfi_rs2_data_q <= rvfi_rs2_data_d; + rvfi_rs2_addr_q <= rvfi_rs2_addr_d; + rvfi_rs1_cap_q <= rvfi_rs1_cap_d; + rvfi_rs2_cap_q <= rvfi_rs2_cap_d; + end + end + + always_comb begin + if (rvfi_rd_we_wb) begin + // Capture address/data of write to register file + rvfi_rd_addr_d = rvfi_rd_addr_wb; + // If writing to x0 zero write data as required by RVFI specification + if (rvfi_rd_addr_wb == 5'b0) begin + rvfi_rd_wdata_d = '0; + rvfi_rd_cap_d = NULL_REG_CAP; + end else begin + rvfi_rd_wdata_d = rvfi_rd_wdata_wb; + rvfi_rd_cap_d = rf_wcap_wb; + end + end else if (rvfi_instr_new_wb) begin + // If no RF write but new instruction in Writeback (when present) or ID/EX (when no writeback + // stage present) then zero RF write address/data as required by RVFI specification + rvfi_rd_addr_d = '0; + rvfi_rd_wdata_d = '0; + rvfi_rd_cap_d = NULL_REG_CAP; + end else begin + // Otherwise maintain previous value + rvfi_rd_addr_d = rvfi_rd_addr_q; + rvfi_rd_wdata_d = rvfi_rd_wdata_q; + rvfi_rd_cap_d = rvfi_rd_cap_q; + end + end + + // RD write register is refreshed only once per cycle and + // then it is kept stable for the cycle. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rvfi_rd_addr_q <= '0; + rvfi_rd_wdata_q <= '0; + rvfi_rd_cap_q <= NULL_REG_CAP; + end else begin + rvfi_rd_addr_q <= rvfi_rd_addr_d; + rvfi_rd_wdata_q <= rvfi_rd_wdata_d; + rvfi_rd_cap_q <= rvfi_rd_cap_d; + end + end + + // rvfi_intr must be set for first instruction that is part of a trap handler. + // On the first cycle of a new instruction see if a trap PC was set by the previous instruction, + // otherwise maintain value. + assign rvfi_intr_d = instr_first_cycle_id ? rvfi_set_trap_pc_q : rvfi_intr_q; + + always_comb begin + rvfi_set_trap_pc_d = rvfi_set_trap_pc_q; + + //if (pc_set && pc_mux_id == PC_EXC && // kliu - interrupt only + // (exc_pc_mux_id == EXC_PC_EXC || exc_pc_mux_id == EXC_PC_IRQ)) begin + if (pc_set && pc_mux_id == PC_EXC && (exc_pc_mux_id == EXC_PC_IRQ)) begin + // PC is set to enter a trap handler + rvfi_set_trap_pc_d = 1'b1; + end else if (rvfi_set_trap_pc_q && rvfi_id_done) begin + // first instruction has been executed after PC is set to trap handler + rvfi_set_trap_pc_d = 1'b0; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rvfi_set_trap_pc_q <= 1'b0; + rvfi_intr_q <= 1'b0; + end else begin + rvfi_set_trap_pc_q <= rvfi_set_trap_pc_d; + rvfi_intr_q <= rvfi_intr_d; + end + end + +`else + logic unused_instr_new_id, unused_instr_id_done, unused_instr_done_wb; + assign unused_instr_id_done = instr_id_done; + assign unused_instr_new_id = instr_new_id; + assign unused_instr_done_wb = instr_done_wb; +`endif + + // Certain parameter combinations are not supported + `ASSERT_INIT(IllegalParamSecure, !(SecureIbex && (RV32M == RV32MNone))) + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_counter.sv b/hw/ip/cheriot-ibex/rtl/cheriot_counter.sv new file mode 100644 index 0000000..f574eff --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_counter.sv
@@ -0,0 +1,99 @@ +module cheriot_counter #( + parameter int CounterWidth = 32, + // When set `counter_val_upd_o` provides an incremented version of the counter value, otherwise + // the output is hard-wired to 0. This is required to allow Xilinx DSP inference to work + // correctly. When `ProvideValUpd` is set no DSPs are inferred. + parameter bit ProvideValUpd = 0 +) ( + input logic clk_i, + input logic rst_ni, + + input logic counter_inc_i, + input logic counterh_we_i, + input logic counter_we_i, + input logic [31:0] counter_val_i, + output logic [63:0] counter_val_o, + output logic [63:0] counter_val_upd_o +); + + logic [63:0] counter; + logic [CounterWidth-1:0] counter_upd; + logic [63:0] counter_load; + logic we; + logic [CounterWidth-1:0] counter_d; + + // Increment + assign counter_upd = counter[CounterWidth-1:0] + {{CounterWidth - 1{1'b0}}, 1'b1}; + + // Update + always_comb begin + // Write + we = counter_we_i | counterh_we_i; + counter_load[63:32] = counter[63:32]; + counter_load[31:0] = counter_val_i; + if (counterh_we_i) begin + counter_load[63:32] = counter_val_i; + counter_load[31:0] = counter[31:0]; + end + + // Next value logic + if (we) begin + counter_d = counter_load[CounterWidth-1:0]; + end else if (counter_inc_i) begin + counter_d = counter_upd[CounterWidth-1:0]; + end else begin + counter_d = counter[CounterWidth-1:0]; + end + end + +`ifdef FPGA_XILINX + // Set DSP pragma for supported xilinx FPGAs + localparam int DspPragma = CounterWidth < 49 ? "yes" : "no"; + (* use_dsp = DspPragma *) logic [CounterWidth-1:0] counter_q; + + // DSP output register requires synchronous reset. + `define COUNTER_FLOP_RST posedge clk_i +`else + logic [CounterWidth-1:0] counter_q; + + `define COUNTER_FLOP_RST posedge clk_i or negedge rst_ni +`endif + + // Counter flop + always_ff @(`COUNTER_FLOP_RST) begin + if (!rst_ni) begin + counter_q <= '0; + end else begin + counter_q <= counter_d; + end + end + + if (CounterWidth < 64) begin : g_counter_narrow + logic [63:CounterWidth] unused_counter_load; + + assign counter[CounterWidth-1:0] = counter_q; + assign counter[63:CounterWidth] = '0; + + if (ProvideValUpd) begin : g_counter_val_upd_o + assign counter_val_upd_o[CounterWidth-1:0] = counter_upd; + end else begin : g_no_counter_val_upd_o + assign counter_val_upd_o[CounterWidth-1:0] = '0; + end + assign counter_val_upd_o[63:CounterWidth] = '0; + assign unused_counter_load = counter_load[63:CounterWidth]; + end else begin : g_counter_full + assign counter = counter_q; + + if (ProvideValUpd) begin : g_counter_val_upd_o + assign counter_val_upd_o = counter_upd; + end else begin : g_no_counter_val_upd_o + assign counter_val_upd_o = '0; + end + end + + assign counter_val_o = counter; + +endmodule + +// Keep helper defines file-local. +`undef COUNTER_FLOP_RST
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_cs_registers.sv b/hw/ip/cheriot-ibex/rtl/cheriot_cs_registers.sv new file mode 100644 index 0000000..c7e91dd --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_cs_registers.sv
@@ -0,0 +1,1998 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Control and Status Registers + * + * Control and Status Registers (CSRs) following the RISC-V Privileged + * Specification, draft version 1.11 + */ + +`include "prim_assert.sv" + +module cheriot_cs_registers import cheri_pkg::*; #( + parameter bit DbgTriggerEn = 0, + parameter int unsigned DbgHwBreakNum = 1, + parameter bit DataIndTiming = 1'b0, + parameter bit DummyInstructions = 1'b0, + parameter bit ShadowCSR = 1'b0, + parameter bit ICache = 1'b0, + parameter int unsigned MHPMCounterNum = 10, + parameter int unsigned MHPMCounterWidth = 40, + parameter bit PMPEnable = 0, + parameter int unsigned PMPGranularity = 0, + parameter int unsigned PMPNumRegions = 4, + parameter bit RV32E = 0, + parameter cheriot_pkg::rv32m_e RV32M = cheriot_pkg::RV32MFast, + parameter cheriot_pkg::rv32b_e RV32B = cheriot_pkg::RV32BNone, + parameter bit CHERIoTEn = 1'b1 +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + input logic cheri_pmode_i, + // Hart ID + input logic [31:0] hart_id_i, + + // Privilege mode + output cheriot_pkg::priv_lvl_e priv_mode_id_o, + output cheriot_pkg::priv_lvl_e priv_mode_lsu_o, + output logic csr_mstatus_tw_o, + + // mtvec + output logic [31:0] csr_mtvec_o, + input logic csr_mtvec_init_i, + input logic [31:0] boot_addr_i, + + // Interface to registers (SRAM like) + input logic csr_access_i, + input cheriot_pkg::csr_num_e csr_addr_i, + input logic [31:0] csr_wdata_i, + input cheriot_pkg::csr_op_e csr_op_i, + input csr_op_en_i, + output logic [31:0] csr_rdata_o, + + input logic cheri_csr_access_i, + input logic [4:0] cheri_csr_addr_i, + input logic [31:0] cheri_csr_wdata_i, + input reg_cap_t cheri_csr_wcap_i, + input cheri_csr_op_e cheri_csr_op_i, + input logic cheri_csr_op_en_i, + input logic cheri_csr_set_mie_i, + input logic cheri_csr_clr_mie_i, + + output logic [31:0] cheri_csr_rdata_o, + output reg_cap_t cheri_csr_rcap_o, + + // stack highwatermark and fast-clearing function + output logic [31:0] csr_mshwm_o, + output logic [31:0] csr_mshwmb_o, + input logic csr_mshwm_set_i, + input logic [31:0] csr_mshwm_new_i, + + // interrupts + input logic irq_software_i, + input logic irq_timer_i, + input logic irq_external_i, + input logic [14:0] irq_fast_i, + input logic nmi_mode_i, + output logic irq_pending_o, // interrupt request pending + output cheriot_pkg::irqs_t irqs_o, // interrupt requests qualified with mie + output logic csr_mstatus_mie_o, + output logic [31:0] csr_mepc_o, + + // PMP + output cheriot_pkg::pmp_cfg_t csr_pmp_cfg_o [PMPNumRegions], + output logic [33:0] csr_pmp_addr_o [PMPNumRegions], + output cheriot_pkg::pmp_mseccfg_t csr_pmp_mseccfg_o, + + // debug + input logic debug_mode_i, + input cheriot_pkg::dbg_cause_e debug_cause_i, + input logic debug_csr_save_i, + output logic [31:0] csr_depc_o, + output logic debug_single_step_o, + output logic debug_ebreakm_o, + output logic debug_ebreaku_o, + output logic trigger_match_o, + + input logic [31:0] pc_if_i, + input logic [31:0] pc_id_i, + input logic [31:0] pc_wb_i, + + // CPU control bits + output logic data_ind_timing_o, + output logic dummy_instr_en_o, + output logic [2:0] dummy_instr_mask_o, + output logic dummy_instr_seed_en_o, + output logic [31:0] dummy_instr_seed_o, + output logic icache_enable_o, + output logic csr_shadow_err_o, + + // Exception save/restore + input logic csr_save_if_i, + input logic csr_save_id_i, + input logic csr_save_wb_i, + input logic csr_restore_mret_i, + input logic csr_restore_dret_i, + input logic csr_save_cause_i, + input logic csr_mepcc_clrtag_i, + input cheriot_pkg::exc_cause_e csr_mcause_i, + input logic [31:0] csr_mtval_i, + output logic illegal_csr_insn_o, // access to non-existent CSR, + // with wrong priviledge level, or + // missing write permissions + output logic double_fault_seen_o, + // Performance Counters + input logic instr_ret_i, // instr retired in ID/EX stage + input logic instr_ret_compressed_i, // compressed instr retired + input logic instr_ret_spec_i, // speculative instr_ret_i + input logic instr_ret_compressed_spec_i, // speculative instr_ret_compressed_i + input logic iside_wait_i, // core waiting for the iside + input logic jump_i, // jump instr seen (j, jr, jal, jalr) + input logic branch_i, // branch instr seen (bf, bnf) + input logic branch_taken_i, // branch was taken + input logic mem_load_i, // load from memory in this cycle + input logic mem_store_i, // store to memory in this cycle + input logic dside_wait_i, // core waiting for the dside + input logic mul_wait_i, // core waiting for multiply + input logic div_wait_i, // core waiting for divide + + input logic cheri_branch_req_i, + input logic [31:0] cheri_branch_target_i, + input pcc_cap_t pcc_cap_i, + output pcc_cap_t pcc_cap_o, + + output logic csr_dbg_tclr_fault_o, + output logic cheri_fatal_err_o + ); + + import cheriot_pkg::*; + + localparam int unsigned RV32BEnabled = (RV32B == RV32BNone) ? 0 : 1; + localparam int unsigned RV32MEnabled = (RV32M == RV32MNone) ? 0 : 1; + localparam int unsigned PMPAddrWidth = (PMPGranularity > 0) ? 33 - PMPGranularity : 32; + + // misa + localparam logic [31:0] MISA_VALUE = + (0 << 0) // A - Atomic Instructions extension + | (RV32BEnabled << 1) // B - Bit-Manipulation extension + | (1 << 2) // C - Compressed extension + | (0 << 3) // D - Double precision floating-point extension + | (32'(RV32E) << 4) // E - RV32E base ISA + | (0 << 5) // F - Single precision floating-point extension + | (32'(!RV32E) << 8) // I - RV32I/64I/128I base ISA + | (RV32MEnabled << 12) // M - Integer Multiply/Divide extension + | (0 << 13) // N - User level interrupts supported + | (0 << 18) // S - Supervisor mode implemented + | (1 << 20) // U - User mode implemented + | (32'(CHERIoTEn) << 23) // X - Non-standard extensions present + | (32'(CSR_MISA_MXL) << 30); // M-XLEN + + typedef struct packed { + logic mie; + logic mpie; + priv_lvl_e mpp; + logic mprv; + logic tw; + } status_t; + + typedef struct packed { + logic mpie; + priv_lvl_e mpp; + } status_stk_t; + + typedef struct packed { + x_debug_ver_e xdebugver; + logic [11:0] zero2; + logic ebreakm; + logic zero1; + logic ebreaks; + logic ebreaku; + logic stepie; + logic stopcount; + logic stoptime; + dbg_cause_e cause; + logic zero0; + logic mprven; + logic nmip; + logic step; + priv_lvl_e prv; + } dcsr_t; + + // CPU control register fields + typedef struct packed { + logic double_fault_seen; + logic sync_exc_seen; + logic [2:0] dummy_instr_mask; + logic dummy_instr_en; + logic data_ind_timing; + logic icache_enable; + } cpu_ctrl_t; + + // Interrupt and exception control signals + logic [31:0] exception_pc; + + // CSRs + priv_lvl_e priv_lvl_q, priv_lvl_d; + status_t mstatus_q, mstatus_d; + logic mstatus_err; + logic mstatus_en; + irqs_t mie_q, mie_d; + logic mie_en; + logic [31:0] mscratch_q; + logic mscratch_en; + logic [31:0] mepc_q, mepc_d; + logic mepc_en; + reg_cap_t mepc_cap; + logic [5:0] mcause_q, mcause_d; + logic mcause_en; + logic [31:0] mtval_q, mtval_d; + logic mtval_en; + logic [31:0] mtvec_q, mtvec_d; + reg_cap_t mtvec_cap; + logic mtvec_err; + logic mtvec_en; + irqs_t mip; + dcsr_t dcsr_q, dcsr_d; + logic dcsr_en; + logic [31:0] depc_q, depc_d; + logic depc_en; + reg_cap_t depc_cap; + logic [31:0] dscratch0_q; + logic [31:0] dscratch1_q; + logic dscratch0_en, dscratch1_en; + reg_cap_t dscratch0_cap, dscratch1_cap; + logic [31:0] mshwm_q, mshwm_d; + logic [31:0] mshwmb_q; + logic mshwm_en, mshwmb_en; + logic [31:0] cdbg_ctrl_q; + logic cdbg_ctrl_en; + pcc_cap_t pcc_cap_q, pcc_cap_d; + + // CSRs for recoverable NMIs + // NOTE: these CSRS are nonstandard, see https://github.com/riscv/riscv-isa-manual/issues/261 + status_stk_t mstack_q, mstack_d; + logic mstack_en; + logic [31:0] mstack_epc_q, mstack_epc_d; + logic [5:0] mstack_cause_q, mstack_cause_d; + + // PMP Signals + logic [31:0] pmp_addr_rdata [PMP_MAX_REGIONS]; + logic [PMP_CFG_W-1:0] pmp_cfg_rdata [PMP_MAX_REGIONS]; + logic pmp_csr_err; + pmp_mseccfg_t pmp_mseccfg; + + // Hardware performance monitor signals + logic [31:0] mcountinhibit; + // Only have mcountinhibit flops for counters that actually exist + logic [MHPMCounterNum+3-1:0] mcountinhibit_d, mcountinhibit_q; + logic mcountinhibit_we; + + // mhpmcounter flops are elaborated below providing only the precise number that is required based + // on MHPMCounterNum/MHPMCounterWidth. This signal connects to the Q output of these flops + // where they exist and is otherwise 0. + logic [63:0] mhpmcounter [32]; + logic [31:0] mhpmcounter_we; + logic [31:0] mhpmcounterh_we; + logic [31:0] mhpmcounter_incr; + logic [31:0] mhpmevent [32]; + logic [4:0] mhpmcounter_idx; + logic unused_mhpmcounter_we_1; + logic unused_mhpmcounterh_we_1; + logic unused_mhpmcounter_incr_1; + + logic [63:0] minstret_next, minstret_raw; + + // Debug / trigger registers + logic [31:0] tselect_rdata; + logic [31:0] tmatch_control_rdata; + logic [31:0] tmatch_value_rdata; + + // CPU control bits + cpu_ctrl_t cpuctrl_q, cpuctrl_d, cpuctrl_wdata_raw, cpuctrl_wdata; + logic cpuctrl_we; + logic cpuctrl_err; + + // CSR update logic + logic [31:0] csr_wdata_int; + logic [31:0] csr_rdata_int; + logic csr_we_int; + logic csr_wr; + + // Access violation signals + logic illegal_csr; + logic illegal_csr_priv; + logic illegal_csr_write; + + logic [7:0] unused_boot_addr; + logic [2:0] unused_csr_addr; + + logic mepc_en_combi, mepc_en_cheri; + logic [31:0] mepc_d_combi; + + logic mtvec_en_combi, mtvec_en_cheri; + logic [31:0] mtvec_d_combi; + + logic depc_en_combi, depc_en_cheri; + logic [31:0] depc_d_combi; + + logic dscratch0_en_combi, dscratch0_en_cheri; + logic [31:0] dscratch0_d_combi; + logic dscratch1_en_combi, dscratch1_en_cheri; + logic [31:0] dscratch1_d_combi; + + assign unused_boot_addr = boot_addr_i[7:0]; + + logic [31:0] misa_value_masked; + + assign misa_value_masked = MISA_VALUE & ~{8'h0, ~cheri_pmode_i, 23'h0}; + + + ///////////// + // CSR reg // + ///////////// + + logic [$bits(csr_num_e)-1:0] csr_addr; + assign csr_addr = {csr_addr_i}; + assign unused_csr_addr = csr_addr[7:5]; + assign mhpmcounter_idx = csr_addr[4:0]; + + // See RISC-V Privileged Specification, version 1.11, Section 2.1 + assign illegal_csr_priv = (csr_addr[9:8] > {priv_lvl_q}); + assign illegal_csr_write = (csr_addr[11:10] == 2'b11) && csr_wr; + assign illegal_csr_insn_o = csr_access_i & (illegal_csr | illegal_csr_write | illegal_csr_priv); + + // mip CSR is purely combinational - must be able to re-enable the clock upon WFI + assign mip.irq_software = irq_software_i; + assign mip.irq_timer = irq_timer_i; + assign mip.irq_external = irq_external_i; + assign mip.irq_fast = irq_fast_i; + + // read logic + always_comb begin + csr_rdata_int = '0; + illegal_csr = 1'b0; + + unique case (csr_addr_i) + // mvendorid: encoding of manufacturer/provider + CSR_MVENDORID: csr_rdata_int = (CHERIoTEn&cheri_pmode_i) ? CSR_MVENDORID_CHERI_VALUE : CSR_MVENDORID_VALUE; + // marchid: encoding of base microarchitecture + CSR_MARCHID: csr_rdata_int = (CHERIoTEn&cheri_pmode_i) ? CSR_MARCHID_CHERI_VALUE : CSR_MARCHID_VALUE; + // mimpid: encoding of processor implementation version + CSR_MIMPID: csr_rdata_int = CSR_MIMPID_VALUE; + // mhartid: unique hardware thread id + CSR_MHARTID: csr_rdata_int = hart_id_i; + + // mstatus: always M-mode, contains IE bit + CSR_MSTATUS: begin + csr_rdata_int = '0; + csr_rdata_int[CSR_MSTATUS_MIE_BIT] = mstatus_q.mie; + csr_rdata_int[CSR_MSTATUS_MPIE_BIT] = mstatus_q.mpie; + csr_rdata_int[CSR_MSTATUS_MPP_BIT_HIGH:CSR_MSTATUS_MPP_BIT_LOW] = mstatus_q.mpp; + csr_rdata_int[CSR_MSTATUS_MPRV_BIT] = mstatus_q.mprv; + csr_rdata_int[CSR_MSTATUS_TW_BIT] = mstatus_q.tw; + end + + // misa + CSR_MISA: csr_rdata_int = misa_value_masked; + + // interrupt enable + CSR_MIE: begin + csr_rdata_int = '0; + csr_rdata_int[CSR_MSIX_BIT] = mie_q.irq_software; + csr_rdata_int[CSR_MTIX_BIT] = mie_q.irq_timer; + csr_rdata_int[CSR_MEIX_BIT] = mie_q.irq_external; + csr_rdata_int[CSR_MFIX_BIT_HIGH:CSR_MFIX_BIT_LOW] = mie_q.irq_fast; + end + + // mcounteren: machine counter enable + CSR_MCOUNTEREN: begin + csr_rdata_int = '0; + end + + CSR_MSCRATCH: csr_rdata_int = mscratch_q; + + // mtvec: trap-vector base address + CSR_MTVEC: csr_rdata_int = mtvec_q; + + // mepc: exception program counter + CSR_MEPC: csr_rdata_int = mepc_q; + + // mcause: exception cause + CSR_MCAUSE: csr_rdata_int = {mcause_q[5], 26'b0, mcause_q[4:0]}; + + // mtval: trap value + CSR_MTVAL: csr_rdata_int = mtval_q; + + // mip: interrupt pending + CSR_MIP: begin + csr_rdata_int = '0; + csr_rdata_int[CSR_MSIX_BIT] = mip.irq_software; + csr_rdata_int[CSR_MTIX_BIT] = mip.irq_timer; + csr_rdata_int[CSR_MEIX_BIT] = mip.irq_external; + csr_rdata_int[CSR_MFIX_BIT_HIGH:CSR_MFIX_BIT_LOW] = mip.irq_fast; + end + + CSR_MSECCFG: begin + if (PMPEnable) begin + csr_rdata_int = '0; + csr_rdata_int[CSR_MSECCFG_MML_BIT] = pmp_mseccfg.mml; + csr_rdata_int[CSR_MSECCFG_MMWP_BIT] = pmp_mseccfg.mmwp; + csr_rdata_int[CSR_MSECCFG_RLB_BIT] = pmp_mseccfg.rlb; + end else begin + illegal_csr = 1'b1; + end + end + + CSR_MSECCFGH: begin + if (PMPEnable) begin + csr_rdata_int = '0; + end else begin + illegal_csr = 1'b1; + end + end + + // PMP registers + CSR_PMPCFG0: csr_rdata_int = {pmp_cfg_rdata[3], pmp_cfg_rdata[2], + pmp_cfg_rdata[1], pmp_cfg_rdata[0]}; + CSR_PMPCFG1: csr_rdata_int = {pmp_cfg_rdata[7], pmp_cfg_rdata[6], + pmp_cfg_rdata[5], pmp_cfg_rdata[4]}; + CSR_PMPCFG2: csr_rdata_int = {pmp_cfg_rdata[11], pmp_cfg_rdata[10], + pmp_cfg_rdata[9], pmp_cfg_rdata[8]}; + CSR_PMPCFG3: csr_rdata_int = {pmp_cfg_rdata[15], pmp_cfg_rdata[14], + pmp_cfg_rdata[13], pmp_cfg_rdata[12]}; + CSR_PMPADDR0: csr_rdata_int = pmp_addr_rdata[0]; + CSR_PMPADDR1: csr_rdata_int = pmp_addr_rdata[1]; + CSR_PMPADDR2: csr_rdata_int = pmp_addr_rdata[2]; + CSR_PMPADDR3: csr_rdata_int = pmp_addr_rdata[3]; + CSR_PMPADDR4: csr_rdata_int = pmp_addr_rdata[4]; + CSR_PMPADDR5: csr_rdata_int = pmp_addr_rdata[5]; + CSR_PMPADDR6: csr_rdata_int = pmp_addr_rdata[6]; + CSR_PMPADDR7: csr_rdata_int = pmp_addr_rdata[7]; + CSR_PMPADDR8: csr_rdata_int = pmp_addr_rdata[8]; + CSR_PMPADDR9: csr_rdata_int = pmp_addr_rdata[9]; + CSR_PMPADDR10: csr_rdata_int = pmp_addr_rdata[10]; + CSR_PMPADDR11: csr_rdata_int = pmp_addr_rdata[11]; + CSR_PMPADDR12: csr_rdata_int = pmp_addr_rdata[12]; + CSR_PMPADDR13: csr_rdata_int = pmp_addr_rdata[13]; + CSR_PMPADDR14: csr_rdata_int = pmp_addr_rdata[14]; + CSR_PMPADDR15: csr_rdata_int = pmp_addr_rdata[15]; + + CSR_DCSR: begin + csr_rdata_int = dcsr_q; + illegal_csr = ~debug_mode_i; + end + CSR_DPC: begin + csr_rdata_int = depc_q; + illegal_csr = ~debug_mode_i; + end + CSR_DSCRATCH0: begin + csr_rdata_int = dscratch0_q; + illegal_csr = ~debug_mode_i; + end + CSR_DSCRATCH1: begin + csr_rdata_int = dscratch1_q; + illegal_csr = ~debug_mode_i; + end + + // machine counter/timers + CSR_MCOUNTINHIBIT: csr_rdata_int = mcountinhibit; + CSR_MHPMEVENT3, + CSR_MHPMEVENT4, CSR_MHPMEVENT5, CSR_MHPMEVENT6, CSR_MHPMEVENT7, + CSR_MHPMEVENT8, CSR_MHPMEVENT9, CSR_MHPMEVENT10, CSR_MHPMEVENT11, + CSR_MHPMEVENT12, CSR_MHPMEVENT13, CSR_MHPMEVENT14, CSR_MHPMEVENT15, + CSR_MHPMEVENT16, CSR_MHPMEVENT17, CSR_MHPMEVENT18, CSR_MHPMEVENT19, + CSR_MHPMEVENT20, CSR_MHPMEVENT21, CSR_MHPMEVENT22, CSR_MHPMEVENT23, + CSR_MHPMEVENT24, CSR_MHPMEVENT25, CSR_MHPMEVENT26, CSR_MHPMEVENT27, + CSR_MHPMEVENT28, CSR_MHPMEVENT29, CSR_MHPMEVENT30, CSR_MHPMEVENT31: begin + csr_rdata_int = mhpmevent[mhpmcounter_idx]; + end + + CSR_MCYCLE, + CSR_MINSTRET, + CSR_MHPMCOUNTER3, + CSR_MHPMCOUNTER4, CSR_MHPMCOUNTER5, CSR_MHPMCOUNTER6, CSR_MHPMCOUNTER7, + CSR_MHPMCOUNTER8, CSR_MHPMCOUNTER9, CSR_MHPMCOUNTER10, CSR_MHPMCOUNTER11, + CSR_MHPMCOUNTER12, CSR_MHPMCOUNTER13, CSR_MHPMCOUNTER14, CSR_MHPMCOUNTER15, + CSR_MHPMCOUNTER16, CSR_MHPMCOUNTER17, CSR_MHPMCOUNTER18, CSR_MHPMCOUNTER19, + CSR_MHPMCOUNTER20, CSR_MHPMCOUNTER21, CSR_MHPMCOUNTER22, CSR_MHPMCOUNTER23, + CSR_MHPMCOUNTER24, CSR_MHPMCOUNTER25, CSR_MHPMCOUNTER26, CSR_MHPMCOUNTER27, + CSR_MHPMCOUNTER28, CSR_MHPMCOUNTER29, CSR_MHPMCOUNTER30, CSR_MHPMCOUNTER31: begin + csr_rdata_int = mhpmcounter[mhpmcounter_idx][31:0]; + end + + CSR_MCYCLEH, + CSR_MINSTRETH, + CSR_MHPMCOUNTER3H, + CSR_MHPMCOUNTER4H, CSR_MHPMCOUNTER5H, CSR_MHPMCOUNTER6H, CSR_MHPMCOUNTER7H, + CSR_MHPMCOUNTER8H, CSR_MHPMCOUNTER9H, CSR_MHPMCOUNTER10H, CSR_MHPMCOUNTER11H, + CSR_MHPMCOUNTER12H, CSR_MHPMCOUNTER13H, CSR_MHPMCOUNTER14H, CSR_MHPMCOUNTER15H, + CSR_MHPMCOUNTER16H, CSR_MHPMCOUNTER17H, CSR_MHPMCOUNTER18H, CSR_MHPMCOUNTER19H, + CSR_MHPMCOUNTER20H, CSR_MHPMCOUNTER21H, CSR_MHPMCOUNTER22H, CSR_MHPMCOUNTER23H, + CSR_MHPMCOUNTER24H, CSR_MHPMCOUNTER25H, CSR_MHPMCOUNTER26H, CSR_MHPMCOUNTER27H, + CSR_MHPMCOUNTER28H, CSR_MHPMCOUNTER29H, CSR_MHPMCOUNTER30H, CSR_MHPMCOUNTER31H: begin + csr_rdata_int = mhpmcounter[mhpmcounter_idx][63:32]; + end + + // Debug triggers + CSR_TSELECT: begin + csr_rdata_int = tselect_rdata; + illegal_csr = ~DbgTriggerEn; + end + CSR_TDATA1: begin + csr_rdata_int = tmatch_control_rdata; + illegal_csr = ~DbgTriggerEn; + end + CSR_TDATA2: begin + csr_rdata_int = tmatch_value_rdata; + illegal_csr = ~DbgTriggerEn; + end + CSR_TDATA3: begin + csr_rdata_int = '0; + illegal_csr = ~DbgTriggerEn; + end + CSR_MCONTEXT: begin + csr_rdata_int = '0; + illegal_csr = ~DbgTriggerEn; + end + CSR_SCONTEXT: begin + csr_rdata_int = '0; + illegal_csr = ~DbgTriggerEn; + end + + // Custom CSR for controlling CPU features + CSR_CPUCTRL: begin + csr_rdata_int = {{32 - $bits(cpu_ctrl_t) {1'b0}}, cpuctrl_q}; + end + + // Custom CSR for LFSR re-seeding (cannot be read) + CSR_SECURESEED: begin + csr_rdata_int = '0; + end + + // MSHWM CSR (stack high watermark in cheriot) + CSR_MSHWM: begin + if (cheri_pmode_i) begin + csr_rdata_int = cheri_pmode_i ? mshwm_q : 32'h0; + end else begin + illegal_csr = 1'b1; + end + end + + CSR_MSHWMB: begin + if (cheri_pmode_i) begin + csr_rdata_int = cheri_pmode_i ? mshwmb_q : 32'h0; + end else begin + illegal_csr = 1'b1; + end + end + + CSR_CDBG_CTRL: begin + if (cheri_pmode_i) begin + csr_rdata_int = cheri_pmode_i ? cdbg_ctrl_q : 32'h0; + end else begin + illegal_csr = 1'b1; + end + end + + default: begin + illegal_csr = 1'b1; + end + endcase + end + + // write logic + always_comb begin + exception_pc = pc_id_i; + + priv_lvl_d = priv_lvl_q; + mstatus_en = 1'b0; + mstatus_d = mstatus_q; + mie_en = 1'b0; + mscratch_en = 1'b0; + mepc_en = 1'b0; + mepc_d = {csr_wdata_int[31:1], 1'b0}; + mcause_en = 1'b0; + mcause_d = {csr_wdata_int[31], csr_wdata_int[4:0]}; + mtval_en = 1'b0; + mtval_d = csr_wdata_int; + mtvec_en = csr_mtvec_init_i; + // mtvec.MODE set to vectored + // mtvec.BASE must be 256-byte aligned + mtvec_d = csr_mtvec_init_i ? {boot_addr_i[31:8], 6'b0, 1'b0, ~(CHERIoTEn&cheri_pmode_i)} : + {csr_wdata_int[31:8], 6'b0, 1'b0, ~(CHERIoTEn&cheri_pmode_i)}; + dcsr_en = 1'b0; + dcsr_d = dcsr_q; + depc_d = {csr_wdata_int[31:1], 1'b0}; + depc_en = 1'b0; + dscratch0_en = 1'b0; + dscratch1_en = 1'b0; + + mstack_en = 1'b0; + mstack_d.mpie = mstatus_q.mpie; + mstack_d.mpp = mstatus_q.mpp; + mstack_epc_d = mepc_q; + mstack_cause_d = mcause_q; + + mcountinhibit_we = 1'b0; + mhpmcounter_we = '0; + mhpmcounterh_we = '0; + + cpuctrl_we = 1'b0; + cpuctrl_d = cpuctrl_q; + + mshwm_en = 1'b0; + mshwmb_en = 1'b0; + cdbg_ctrl_en = 1'b0; + + double_fault_seen_o = 1'b0; + + if (csr_we_int) begin + unique case (csr_addr_i) + // mstatus: IE bit + CSR_MSTATUS: begin + mstatus_en = 1'b1; + mstatus_d = '{ + mie: csr_wdata_int[CSR_MSTATUS_MIE_BIT], + mpie: csr_wdata_int[CSR_MSTATUS_MPIE_BIT], + mpp: priv_lvl_e'(csr_wdata_int[CSR_MSTATUS_MPP_BIT_HIGH:CSR_MSTATUS_MPP_BIT_LOW]), + mprv: csr_wdata_int[CSR_MSTATUS_MPRV_BIT], + tw: csr_wdata_int[CSR_MSTATUS_TW_BIT] + }; + // Convert illegal values to M-mode + if ((mstatus_d.mpp != PRIV_LVL_M) && (mstatus_d.mpp != PRIV_LVL_U)) begin + mstatus_d.mpp = PRIV_LVL_M; + end + end + + // interrupt enable + CSR_MIE: mie_en = 1'b1; + + CSR_MSCRATCH: mscratch_en = 1'b1; + + // mepc: exception program counter + CSR_MEPC: mepc_en = ~CHERIoTEn | ~cheri_pmode_i; // disabled for pure cap mode (only allow cap writes) + + // mcause + CSR_MCAUSE: mcause_en = 1'b1; + + // mtval: trap value + CSR_MTVAL: mtval_en = 1'b1; + + // mtvec + CSR_MTVEC: mtvec_en = ~CHERIoTEn | ~cheri_pmode_i; // disabled for pure cap mode (only allow cap writes) + + CSR_DCSR: begin + dcsr_d = csr_wdata_int; + dcsr_d.xdebugver = XDEBUGVER_STD; + // Change to PRIV_LVL_M if software writes an unsupported value + if ((dcsr_d.prv != PRIV_LVL_M) && (dcsr_d.prv != PRIV_LVL_U)) begin + dcsr_d.prv = PRIV_LVL_M; + end + + // Read-only for SW + dcsr_d.cause = dcsr_q.cause; + + // Interrupts always disabled during single stepping + dcsr_d.stepie = 1'b0; + + // currently not supported: + dcsr_d.nmip = 1'b0; + dcsr_d.mprven = 1'b0; + dcsr_d.stopcount = 1'b0; + dcsr_d.stoptime = 1'b0; + + // forced to be zero + dcsr_d.zero0 = 1'b0; + dcsr_d.zero1 = 1'b0; + dcsr_d.zero2 = 12'h0; + dcsr_en = 1'b1; + end + + // dpc: debug program counter + CSR_DPC: depc_en = 1'b1; + + CSR_DSCRATCH0: dscratch0_en = 1'b1; + CSR_DSCRATCH1: dscratch1_en = 1'b1; + + // machine counter/timers + CSR_MCOUNTINHIBIT: mcountinhibit_we = 1'b1; + + CSR_MCYCLE, + CSR_MINSTRET, + CSR_MHPMCOUNTER3, + CSR_MHPMCOUNTER4, CSR_MHPMCOUNTER5, CSR_MHPMCOUNTER6, CSR_MHPMCOUNTER7, + CSR_MHPMCOUNTER8, CSR_MHPMCOUNTER9, CSR_MHPMCOUNTER10, CSR_MHPMCOUNTER11, + CSR_MHPMCOUNTER12, CSR_MHPMCOUNTER13, CSR_MHPMCOUNTER14, CSR_MHPMCOUNTER15, + CSR_MHPMCOUNTER16, CSR_MHPMCOUNTER17, CSR_MHPMCOUNTER18, CSR_MHPMCOUNTER19, + CSR_MHPMCOUNTER20, CSR_MHPMCOUNTER21, CSR_MHPMCOUNTER22, CSR_MHPMCOUNTER23, + CSR_MHPMCOUNTER24, CSR_MHPMCOUNTER25, CSR_MHPMCOUNTER26, CSR_MHPMCOUNTER27, + CSR_MHPMCOUNTER28, CSR_MHPMCOUNTER29, CSR_MHPMCOUNTER30, CSR_MHPMCOUNTER31: begin + mhpmcounter_we[mhpmcounter_idx] = 1'b1; + end + + CSR_MCYCLEH, + CSR_MINSTRETH, + CSR_MHPMCOUNTER3H, + CSR_MHPMCOUNTER4H, CSR_MHPMCOUNTER5H, CSR_MHPMCOUNTER6H, CSR_MHPMCOUNTER7H, + CSR_MHPMCOUNTER8H, CSR_MHPMCOUNTER9H, CSR_MHPMCOUNTER10H, CSR_MHPMCOUNTER11H, + CSR_MHPMCOUNTER12H, CSR_MHPMCOUNTER13H, CSR_MHPMCOUNTER14H, CSR_MHPMCOUNTER15H, + CSR_MHPMCOUNTER16H, CSR_MHPMCOUNTER17H, CSR_MHPMCOUNTER18H, CSR_MHPMCOUNTER19H, + CSR_MHPMCOUNTER20H, CSR_MHPMCOUNTER21H, CSR_MHPMCOUNTER22H, CSR_MHPMCOUNTER23H, + CSR_MHPMCOUNTER24H, CSR_MHPMCOUNTER25H, CSR_MHPMCOUNTER26H, CSR_MHPMCOUNTER27H, + CSR_MHPMCOUNTER28H, CSR_MHPMCOUNTER29H, CSR_MHPMCOUNTER30H, CSR_MHPMCOUNTER31H: begin + mhpmcounterh_we[mhpmcounter_idx] = 1'b1; + end + + CSR_CPUCTRL: begin + cpuctrl_d = cpuctrl_wdata; + cpuctrl_we = 1'b1; + end + + CSR_MSHWM: mshwm_en = CHERIoTEn & cheri_pmode_i; + CSR_MSHWMB: mshwmb_en = CHERIoTEn & cheri_pmode_i; + CSR_CDBG_CTRL: cdbg_ctrl_en = CHERIoTEn & cheri_pmode_i; + + default:; + endcase + end + + // exception controller gets priority over other writes + unique case (1'b1) + + csr_save_cause_i: begin + unique case (1'b1) + csr_save_if_i: begin + exception_pc = pc_if_i; + end + csr_save_id_i: begin + exception_pc = pc_id_i; + end + csr_save_wb_i: begin + exception_pc = pc_wb_i; + end + default:; + endcase + + // Any exception, including debug mode, causes a switch to M-mode + priv_lvl_d = PRIV_LVL_M; + + if (debug_csr_save_i) begin + // all interrupts are masked + // do not update cause, epc, tval, epc and status + dcsr_d.prv = priv_lvl_q; + dcsr_d.cause = debug_cause_i; + dcsr_en = 1'b1; + depc_d = exception_pc; + depc_en = 1'b1; + end else if (!debug_mode_i) begin + // In debug mode, "exceptions do not update any registers. That + // includes cause, epc, tval, dpc and mstatus." [Debug Spec v0.13.2, p.39] + mtval_en = 1'b1; + mtval_d = csr_mtval_i; + mstatus_en = 1'b1; + mstatus_d.mie = 1'b0; // disable interrupts + // save current status + mstatus_d.mpie = mstatus_q.mie; + mstatus_d.mpp = priv_lvl_q; + mepc_en = 1'b1; + mepc_d = exception_pc; + mcause_en = 1'b1; + mcause_d = {csr_mcause_i}; + // save previous status for recoverable NMI + mstack_en = 1'b1; + + if (!mcause_d[5]) begin + cpuctrl_we = 1'b1; + + cpuctrl_d.sync_exc_seen = 1'b1; + if (cpuctrl_q.sync_exc_seen) begin + double_fault_seen_o = 1'b1; + cpuctrl_d.double_fault_seen = 1'b1; + end + end + end + end // csr_save_cause_i + + csr_restore_dret_i: begin // DRET + priv_lvl_d = dcsr_q.prv; + end // csr_restore_dret_i + + csr_restore_mret_i: begin // MRET + priv_lvl_d = mstatus_q.mpp; + mstatus_en = 1'b1; + mstatus_d.mie = mstatus_q.mpie; // re-enable interrupts + + // merge in upstream change 9/7/2022 // LEC_NOT_COMPATIBLE + if (mstatus_q.mpp != PRIV_LVL_M) begin + mstatus_d.mprv = 1'b0; + end + + cpuctrl_we = 1'b1; + cpuctrl_d.sync_exc_seen = 1'b0; + + if (nmi_mode_i) begin + // when returning from an NMI restore state from mstack CSR + mstatus_d.mpie = mstack_q.mpie; + mstatus_d.mpp = mstack_q.mpp; + mepc_en = 1'b1; + mepc_d = mstack_epc_q; + mcause_en = 1'b1; + mcause_d = mstack_cause_q; + end else begin + // otherwise just set mstatus.MPIE/MPP + // See RISC-V Privileged Specification, version 1.11, Section 3.1.6.1 + mstatus_d.mpie = 1'b1; + mstatus_d.mpp = PRIV_LVL_U; + end + end // csr_restore_mret_i + + default:; + endcase + end + + // Update current priv level + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + priv_lvl_q <= PRIV_LVL_M; + end else begin + priv_lvl_q <= priv_lvl_d; + end + end + + // Send current priv level to the decoder + assign priv_mode_id_o = priv_lvl_q; + // Load/store instructions must factor in MPRV for PMP checking + assign priv_mode_lsu_o = mstatus_q.mprv ? mstatus_q.mpp : priv_lvl_q; + + // CSR operation logic + always_comb begin + unique case (csr_op_i) + CSR_OP_WRITE: csr_wdata_int = csr_wdata_i; + CSR_OP_SET: csr_wdata_int = csr_wdata_i | csr_rdata_o; + CSR_OP_CLEAR: csr_wdata_int = ~csr_wdata_i & csr_rdata_o; + CSR_OP_READ: csr_wdata_int = csr_wdata_i; + default: csr_wdata_int = csr_wdata_i; + endcase + end + + assign csr_wr = (csr_op_i inside {CSR_OP_WRITE, CSR_OP_SET, CSR_OP_CLEAR}); + + // only write CSRs during one clock cycle + + // enforcing the CHERI CSR access policy. + // - exceptions for ASR violation is generated in the controller. + // - we never allow writes to any CSR if ASR=0 + // - no need to gate csr_rdata for ASR violation since the instruction will be faulted anyway + + // logic read_ok; + // assign read_ok = ~CHERIoTEn || ~cheri_pmode_i || debug_mode_i || pcc_cap_q.perms[PERM_SR] || + // ((csr_addr_i>=CSR_MCYCLE) && (csr_addr_i<=CSR_CDBG_CTRL)); + assign csr_we_int = csr_wr & csr_op_en_i & (~CHERIoTEn | ~cheri_pmode_i | debug_mode_i | pcc_cap_q.perms[PERM_SR]) & ~illegal_csr_insn_o; + + // assign csr_rdata_o = read_ok ? csr_rdata_int : 0; + assign csr_rdata_o = csr_rdata_int; + + // directly output some registers + assign csr_mepc_o = mepc_q; + assign csr_depc_o = depc_q; + assign csr_mtvec_o = mtvec_q; + + assign csr_mshwm_o = mshwm_q; + assign csr_mshwmb_o = mshwmb_q; + + assign csr_mstatus_mie_o = mstatus_q.mie; + assign csr_mstatus_tw_o = mstatus_q.tw; + assign debug_single_step_o = dcsr_q.step; + assign debug_ebreakm_o = dcsr_q.ebreakm; + assign debug_ebreaku_o = dcsr_q.ebreaku; + + // Qualify incoming interrupt requests in mip CSR with mie CSR for controller and to re-enable + // clock upon WFI (must be purely combinational). + assign irqs_o = mip & mie_q; + assign irq_pending_o = |irqs_o; + + //////////////////////// + // CSR instantiations // + //////////////////////// + + // MSTATUS + localparam status_t MSTATUS_RST_VAL = '{mie: 1'b0, + mpie: 1'b1, + mpp: PRIV_LVL_U, + mprv: 1'b0, + tw: 1'b0}; + + // adding set/clr of mie based on sentry type for CHERIoT + logic mstatus_en_combi; + status_t mstatus_d_combi; + + assign mstatus_en_combi = mstatus_en | cheri_csr_clr_mie_i | cheri_csr_set_mie_i; + + always_comb begin + mstatus_d_combi = mstatus_d; + mstatus_d_combi.mie = (mstatus_d.mie & ~cheri_csr_clr_mie_i) | cheri_csr_set_mie_i; + end + + cheriot_csr #( + .Width ($bits(status_t)), + .ShadowCopy(ShadowCSR), + .ResetValue({MSTATUS_RST_VAL}) + ) u_mstatus_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i ({mstatus_d_combi}), + .wr_en_i (mstatus_en_combi), + .rd_data_o (mstatus_q), + .rd_error_o(mstatus_err) + ); + + assign mepc_en_combi = mepc_en | mepc_en_cheri; + assign mepc_d_combi = ({32{mepc_en}} & mepc_d) | ({32{mepc_en_cheri}} & cheri_csr_wdata_i); + + // MEPC + cheriot_csr #( + .Width (32), + .ShadowCopy(1'b0), + .ResetValue('0) + ) u_mepc_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i (mepc_d_combi), + .wr_en_i (mepc_en_combi), + .rd_data_o (mepc_q), + .rd_error_o() + ); + + // MIE + assign mie_d.irq_software = csr_wdata_int[CSR_MSIX_BIT]; + assign mie_d.irq_timer = csr_wdata_int[CSR_MTIX_BIT]; + assign mie_d.irq_external = csr_wdata_int[CSR_MEIX_BIT]; + assign mie_d.irq_fast = csr_wdata_int[CSR_MFIX_BIT_HIGH:CSR_MFIX_BIT_LOW]; + cheriot_csr #( + .Width ($bits(irqs_t)), + .ShadowCopy(1'b0), + .ResetValue('0) + ) u_mie_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i ({mie_d}), + .wr_en_i (mie_en), + .rd_data_o (mie_q), + .rd_error_o() + ); + + // MSCRATCH + cheriot_csr #( + .Width (32), + .ShadowCopy(1'b0), + .ResetValue('0) + ) u_mscratch_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i (csr_wdata_int), + .wr_en_i (mscratch_en), + .rd_data_o (mscratch_q), + .rd_error_o() + ); + + // MCAUSE + cheriot_csr #( + .Width (6), + .ShadowCopy(1'b0), + .ResetValue('0) + ) u_mcause_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i (mcause_d), + .wr_en_i (mcause_en), + .rd_data_o (mcause_q), + .rd_error_o() + ); + + // MTVAL + cheriot_csr #( + .Width (32), + .ShadowCopy(1'b0), + .ResetValue('0) + ) u_mtval_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i (mtval_d), + .wr_en_i (mtval_en), + .rd_data_o (mtval_q), + .rd_error_o() + ); + + + assign mtvec_en_combi = mtvec_en | mtvec_en_cheri; + + // use only 2'b00 (direct mode) for CHERIoT + assign mtvec_d_combi = ({32{mtvec_en}} & mtvec_d) | ({32{mtvec_en_cheri}} & + {cheri_csr_wdata_i[31:2],2'b00}); + + // MTVEC + cheriot_csr #( + .Width (32), + .ShadowCopy(ShadowCSR), + .ResetValue({32'd1}) // retain this to make lec vs ibex pass + ) u_mtvec_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i (mtvec_d_combi), + .wr_en_i (mtvec_en_combi), + .rd_data_o (mtvec_q), + .rd_error_o(mtvec_err) + ); + + // DCSR + localparam dcsr_t DCSR_RESET_VAL = '{ + xdebugver: XDEBUGVER_STD, + cause: DBG_CAUSE_NONE, // 3'h0 + prv: PRIV_LVL_M, + default: '0 + }; + cheriot_csr #( + .Width ($bits(dcsr_t)), + .ShadowCopy(1'b0), + .ResetValue({DCSR_RESET_VAL}) + ) u_dcsr_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i ({dcsr_d}), + .wr_en_i (dcsr_en), + .rd_data_o (dcsr_q), + .rd_error_o() + ); + + assign depc_en_combi = depc_en | depc_en_cheri; + assign depc_d_combi = ({32{depc_en}} & depc_d) | ({32{depc_en_cheri}} & cheri_csr_wdata_i); + + // DEPC + cheriot_csr #( + .Width (32), + .ShadowCopy(1'b0), + .ResetValue('0) + ) u_depc_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i (depc_d_combi), + .wr_en_i (depc_en_combi), + .rd_data_o (depc_q), + .rd_error_o() + ); + + assign dscratch0_en_combi = dscratch0_en | dscratch0_en_cheri; + assign dscratch0_d_combi = ({32{dscratch0_en}} & csr_wdata_int) | ({32{dscratch0_en_cheri}} & cheri_csr_wdata_i); + + // DSCRATCH0 + cheriot_csr #( + .Width (32), + .ShadowCopy(1'b0), + .ResetValue('0) + ) u_dscratch0_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i (dscratch0_d_combi), + .wr_en_i (dscratch0_en_combi), + .rd_data_o (dscratch0_q), + .rd_error_o() + ); + + assign dscratch1_en_combi = dscratch1_en | dscratch1_en_cheri; + assign dscratch1_d_combi = ({32{dscratch1_en}} & csr_wdata_int) | ({32{dscratch1_en_cheri}} & cheri_csr_wdata_i); + + // DSCRATCH0 + cheriot_csr #( + .Width (32), + .ShadowCopy(1'b0), + .ResetValue('0) + ) u_dscratch1_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i (dscratch1_d_combi), + .wr_en_i (dscratch1_en_combi), + .rd_data_o (dscratch1_q), + .rd_error_o() + ); + + // MSTACK + localparam status_stk_t MSTACK_RESET_VAL = '{mpie: 1'b1, mpp: PRIV_LVL_U}; + cheriot_csr #( + .Width ($bits(status_stk_t)), + .ShadowCopy(1'b0), + .ResetValue({MSTACK_RESET_VAL}) + ) u_mstack_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i ({mstack_d}), + .wr_en_i (mstack_en), + .rd_data_o (mstack_q), + .rd_error_o() + ); + + // MSTACK_EPC + cheriot_csr #( + .Width (32), + .ShadowCopy(1'b0), + .ResetValue('0) + ) u_mstack_epc_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i (mstack_epc_d), + .wr_en_i (mstack_en), + .rd_data_o (mstack_epc_q), + .rd_error_o() + ); + + // MSTACK_CAUSE + cheriot_csr #( + .Width (6), + .ShadowCopy(1'b0), + .ResetValue('0) + ) u_mstack_cause_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i (mstack_cause_d), + .wr_en_i (mstack_en), + .rd_data_o (mstack_cause_q), + .rd_error_o() + ); + + // MSHWM and HSHWMB + logic mshwm_en_combi; + assign mshwm_en_combi = mshwm_en | csr_mshwm_set_i; + assign mshwm_d = csr_mshwm_set_i ? csr_mshwm_new_i : {csr_wdata_int[31:4], 4'h0}; + + if (CHERIoTEn) begin: g_mshwm + cheriot_csr #( + .Width (32), + .ShadowCopy(ShadowCSR), + .ResetValue(32'd0) + ) u_mshwm_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i (mshwm_d), + .wr_en_i (mshwm_en_combi), + .rd_data_o (mshwm_q), + .rd_error_o() + ); + + cheriot_csr #( + .Width (32), + .ShadowCopy(ShadowCSR), + .ResetValue(32'd0) + ) u_mshwmb_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i ({csr_wdata_int[31:4], 4'h0}), + .wr_en_i (mshwmb_en), + .rd_data_o (mshwmb_q), + .rd_error_o() + ); + + // cheri debug feature control + cheriot_csr #( + .Width (32), + .ShadowCopy(ShadowCSR), + .ResetValue(32'd0) + ) u_cdbg_ctrl_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i ({31'h0, csr_wdata_int[0]}), + .wr_en_i (cdbg_ctrl_en), + .rd_data_o (cdbg_ctrl_q), + .rd_error_o() + ); + + assign csr_dbg_tclr_fault_o = cdbg_ctrl_q[0]; + + end else begin + assign mshwm_q = 32'h0; + assign mshwmb_q = 32'h0; + + assign csr_dbg_tclr_fault_o = 1'b0; + end + + // ----------------- + // PMP registers + // ----------------- + + if (PMPEnable) begin : g_pmp_registers + // PMP reset values + `ifdef CHERIOT_CUSTOM_PMP_RESET_VALUES + `include "cheriot_pmp_reset.svh" + `else + `include "cheriot_pmp_reset_default.svh" + `endif + + pmp_mseccfg_t pmp_mseccfg_q, pmp_mseccfg_d; + logic pmp_mseccfg_we; + logic pmp_mseccfg_err; + pmp_cfg_t pmp_cfg [PMPNumRegions]; + logic [PMPNumRegions-1:0] pmp_cfg_locked; + pmp_cfg_t pmp_cfg_wdata [PMPNumRegions]; + logic [PMPAddrWidth-1:0] pmp_addr [PMPNumRegions]; + logic [PMPNumRegions-1:0] pmp_cfg_we; + logic [PMPNumRegions-1:0] pmp_cfg_err; + logic [PMPNumRegions-1:0] pmp_addr_we; + logic [PMPNumRegions-1:0] pmp_addr_err; + logic any_pmp_entry_locked; + + // Expanded / qualified register read data + for (genvar i = 0; i < PMP_MAX_REGIONS; i++) begin : g_exp_rd_data + if (i < PMPNumRegions) begin : g_implemented_regions + // Add in zero padding for reserved fields + assign pmp_cfg_rdata[i] = {pmp_cfg[i].lock, 2'b00, pmp_cfg[i].mode, + pmp_cfg[i].exec, pmp_cfg[i].write, pmp_cfg[i].read}; + + // Address field read data depends on the current programmed mode and the granularity + // See RISC-V Privileged Specification, version 1.11, Section 3.6.1 + if (PMPGranularity == 0) begin : g_pmp_g0 + // If G == 0, read data is unmodified + assign pmp_addr_rdata[i] = pmp_addr[i]; + + end else if (PMPGranularity == 1) begin : g_pmp_g1 + // If G == 1, bit [G-1] reads as zero in TOR or OFF mode + always_comb begin + pmp_addr_rdata[i] = pmp_addr[i]; + if ((pmp_cfg[i].mode == PMP_MODE_OFF) || (pmp_cfg[i].mode == PMP_MODE_TOR)) begin + pmp_addr_rdata[i][PMPGranularity-1:0] = '0; + end + end + + end else begin : g_pmp_g2 + // For G >= 2, bits are masked to one or zero depending on the mode + always_comb begin + // In NAPOT mode, bits [G-2:0] must read as one + pmp_addr_rdata[i] = {pmp_addr[i], {PMPGranularity - 1{1'b1}}}; + + if ((pmp_cfg[i].mode == PMP_MODE_OFF) || (pmp_cfg[i].mode == PMP_MODE_TOR)) begin + // In TOR or OFF mode, bits [G-1:0] must read as zero + pmp_addr_rdata[i][PMPGranularity-1:0] = '0; + end + end + end + + end else begin : g_other_regions + // Non-implemented regions read as zero + assign pmp_cfg_rdata[i] = '0; + assign pmp_addr_rdata[i] = '0; + end + end + + // Write data calculation + for (genvar i = 0; i < PMPNumRegions; i++) begin : g_pmp_csrs + // ------------------------- + // Instantiate cfg registers + // ------------------------- + assign pmp_cfg_we[i] = csr_we_int & ~pmp_cfg_locked[i] & + (csr_addr == (CSR_OFF_PMP_CFG + (i[11:0] >> 2))); + + // Select the correct WDATA (each CSR contains 4 CFG fields, each with 2 RES bits) + assign pmp_cfg_wdata[i].lock = csr_wdata_int[(i%4)*PMP_CFG_W+7]; + // NA4 mode is not selectable when G > 0, mode is treated as OFF + always_comb begin + unique case (csr_wdata_int[(i%4)*PMP_CFG_W+3+:2]) + 2'b00 : pmp_cfg_wdata[i].mode = PMP_MODE_OFF; + 2'b01 : pmp_cfg_wdata[i].mode = PMP_MODE_TOR; + 2'b10 : pmp_cfg_wdata[i].mode = (PMPGranularity == 0) ? PMP_MODE_NA4: + PMP_MODE_OFF; + 2'b11 : pmp_cfg_wdata[i].mode = PMP_MODE_NAPOT; + default : pmp_cfg_wdata[i].mode = PMP_MODE_OFF; + endcase + end + assign pmp_cfg_wdata[i].exec = csr_wdata_int[(i%4)*PMP_CFG_W+2]; + // When MSECCFG.MML is unset, W = 1, R = 0 is a reserved combination, so force W to 0 if R == + // 0. Otherwise allow all possible values to be written. + assign pmp_cfg_wdata[i].write = pmp_mseccfg_q.mml ? csr_wdata_int[(i%4)*PMP_CFG_W+1] : + &csr_wdata_int[(i%4)*PMP_CFG_W+:2]; + assign pmp_cfg_wdata[i].read = csr_wdata_int[(i%4)*PMP_CFG_W]; + + cheriot_csr #( + .Width ($bits(pmp_cfg_t)), + .ShadowCopy(ShadowCSR), + .ResetValue(pmp_cfg_rst[i]) + ) u_pmp_cfg_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i ({pmp_cfg_wdata[i]}), + .wr_en_i (pmp_cfg_we[i]), + .rd_data_o (pmp_cfg[i]), + .rd_error_o(pmp_cfg_err[i]) + ); + + // MSECCFG.RLB allows the lock bit to be bypassed (allowing cfg writes when MSECCFG.RLB is + // set). + assign pmp_cfg_locked[i] = pmp_cfg[i].lock & ~pmp_mseccfg_q.rlb; + + // -------------------------- + // Instantiate addr registers + // -------------------------- + if (i < PMPNumRegions - 1) begin : g_lower + assign pmp_addr_we[i] = csr_we_int & ~pmp_cfg_locked[i] & + (~pmp_cfg_locked[i+1] | (pmp_cfg[i+1].mode != PMP_MODE_TOR)) & + (csr_addr == (CSR_OFF_PMP_ADDR + i[11:0])); + end else begin : g_upper + assign pmp_addr_we[i] = csr_we_int & ~pmp_cfg_locked[i] & + (csr_addr == (CSR_OFF_PMP_ADDR + i[11:0])); + end + + cheriot_csr #( + .Width (PMPAddrWidth), + .ShadowCopy(ShadowCSR), + .ResetValue(pmp_addr_rst[i][33-:PMPAddrWidth]) + ) u_pmp_addr_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i (csr_wdata_int[31-:PMPAddrWidth]), + .wr_en_i (pmp_addr_we[i]), + .rd_data_o (pmp_addr[i]), + .rd_error_o(pmp_addr_err[i]) + ); + + `ASSERT_INIT(PMPAddrRstLowBitsZero_A, pmp_addr_rst[i][33-PMPAddrWidth:0] == '0) + + assign csr_pmp_cfg_o[i] = pmp_cfg[i]; + assign csr_pmp_addr_o[i] = {pmp_addr_rdata[i], 2'b00}; + end + + assign pmp_mseccfg_we = csr_we_int & (csr_addr == CSR_MSECCFG); + + // MSECCFG.MML/MSECCFG.MMWP cannot be unset once set + assign pmp_mseccfg_d.mml = pmp_mseccfg_q.mml ? 1'b1 : csr_wdata_int[CSR_MSECCFG_MML_BIT]; + assign pmp_mseccfg_d.mmwp = pmp_mseccfg_q.mmwp ? 1'b1 : csr_wdata_int[CSR_MSECCFG_MMWP_BIT]; + + // pmp_cfg_locked factors in MSECCFG.RLB so any_pmp_entry_locked will only be set if MSECCFG.RLB + // is unset + assign any_pmp_entry_locked = |pmp_cfg_locked; + + // When any PMP entry is locked (A PMP entry has the L bit set and MSECCFG.RLB is unset), + // MSECCFG.RLB cannot be set again + assign pmp_mseccfg_d.rlb = any_pmp_entry_locked ? 1'b0 : csr_wdata_int[CSR_MSECCFG_RLB_BIT]; + + cheriot_csr #( + .Width ($bits(pmp_mseccfg_t)), + .ShadowCopy(ShadowCSR), + .ResetValue(pmp_mseccfg_rst) + ) u_pmp_mseccfg ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i (pmp_mseccfg_d), + .wr_en_i (pmp_mseccfg_we), + .rd_data_o (pmp_mseccfg_q), + .rd_error_o(pmp_mseccfg_err) + ); + + assign pmp_csr_err = (|pmp_cfg_err) | (|pmp_addr_err) | pmp_mseccfg_err; + assign pmp_mseccfg = pmp_mseccfg_q; + + end else begin : g_no_pmp_tieoffs + // Generate tieoffs when PMP is not configured + for (genvar i = 0; i < PMP_MAX_REGIONS; i++) begin : g_rdata + assign pmp_addr_rdata[i] = '0; + assign pmp_cfg_rdata[i] = '0; + end + for (genvar i = 0; i < PMPNumRegions; i++) begin : g_outputs + assign csr_pmp_cfg_o[i] = pmp_cfg_t'(1'b0); + assign csr_pmp_addr_o[i] = '0; + end + assign pmp_csr_err = 1'b0; + assign pmp_mseccfg = '0; + end + + assign csr_pmp_mseccfg_o = pmp_mseccfg; + + ////////////////////////// + // Performance monitor // + ////////////////////////// + + // update enable signals + always_comb begin : mcountinhibit_update + if (mcountinhibit_we == 1'b1) begin + // bit 1 must always be 0 + mcountinhibit_d = {csr_wdata_int[MHPMCounterNum+2:2], 1'b0, csr_wdata_int[0]}; + end else begin + mcountinhibit_d = mcountinhibit_q; + end + end + + // event selection (hardwired) & control + always_comb begin : gen_mhpmcounter_incr + + // Assign inactive counters (first to prevent latch inference) + for (int unsigned i = 0; i < 32; i++) begin : gen_mhpmcounter_incr_inactive + mhpmcounter_incr[i] = 1'b0; + end + + // When adding or altering performance counter meanings and default + // mappings please update dv/verilator/pcount/cpp/ibex_pcounts.cc + // appropriately. + // + // active counters + mhpmcounter_incr[0] = 1'b1; // mcycle + mhpmcounter_incr[1] = 1'b0; // reserved + mhpmcounter_incr[2] = instr_ret_i; // minstret + mhpmcounter_incr[3] = dside_wait_i; // cycles waiting for data memory + mhpmcounter_incr[4] = iside_wait_i; // cycles waiting for instr fetches + mhpmcounter_incr[5] = mem_load_i; // num of loads + mhpmcounter_incr[6] = mem_store_i; // num of stores + mhpmcounter_incr[7] = jump_i; // num of jumps (unconditional) + mhpmcounter_incr[8] = branch_i; // num of branches (conditional) + mhpmcounter_incr[9] = branch_taken_i; // num of taken branches (conditional) + mhpmcounter_incr[10] = instr_ret_compressed_i; // num of compressed instr + mhpmcounter_incr[11] = mul_wait_i; // cycles waiting for multiply + mhpmcounter_incr[12] = div_wait_i; // cycles waiting for divide + end + + // event selector (hardwired, 0 means no event) + always_comb begin : gen_mhpmevent + + // activate all + for (int i = 0; i < 32; i++) begin : gen_mhpmevent_active + mhpmevent[i] = '0; + mhpmevent[i][i] = 1'b1; + end + + // deactivate + mhpmevent[1] = '0; // not existing, reserved + for (int unsigned i = 3 + MHPMCounterNum; i < 32; i++) begin : gen_mhpmevent_inactive + mhpmevent[i] = '0; + end + end + + // mcycle + cheriot_counter #( + .CounterWidth(64) + ) mcycle_counter_i ( + .clk_i(clk_i), + .rst_ni(rst_ni), + .counter_inc_i(mhpmcounter_incr[0] & ~mcountinhibit[0]), + .counterh_we_i(mhpmcounterh_we[0]), + .counter_we_i(mhpmcounter_we[0]), + .counter_val_i(csr_wdata_int), + .counter_val_o(mhpmcounter[0]), + .counter_val_upd_o() + ); + + + // minstret + cheriot_counter #( + .CounterWidth(64), + .ProvideValUpd(1) + ) minstret_counter_i ( + .clk_i(clk_i), + .rst_ni(rst_ni), + .counter_inc_i(mhpmcounter_incr[2] & ~mcountinhibit[2]), + .counterh_we_i(mhpmcounterh_we[2]), + .counter_we_i(mhpmcounter_we[2]), + .counter_val_i(csr_wdata_int), + .counter_val_o(minstret_raw), + .counter_val_upd_o(minstret_next) + ); + + // Where the writeback stage is present instruction in ID observing value of minstret must take + // into account any instruction in the writeback stage. If one is present the incremented value of + // minstret is used. A speculative version of the signal is used to aid timing. When the writeback + // stage sees an exception (so the speculative signal is incorrect) the ID stage will be flushed + // so the incorrect value doesn't matter. A similar behaviour is required for the compressed + // instruction retired counter below. When the writeback stage isn't present the speculative + // signals are always 0. + assign mhpmcounter[2] = instr_ret_spec_i & ~mcountinhibit[2] ? minstret_next : minstret_raw; + + // reserved: + assign mhpmcounter[1] = '0; + assign unused_mhpmcounter_we_1 = mhpmcounter_we[1]; + assign unused_mhpmcounterh_we_1 = mhpmcounterh_we[1]; + assign unused_mhpmcounter_incr_1 = mhpmcounter_incr[1]; + + // Iterate through optionally included counters (MHPMCounterNum controls how many are included) + for (genvar i = 0; i < 29; i++) begin : gen_cntrs + localparam int Cnt = i + 3; + + if (i < MHPMCounterNum) begin : gen_imp + logic [63:0] mhpmcounter_raw, mhpmcounter_next; + + cheriot_counter #( + .CounterWidth(MHPMCounterWidth), + .ProvideValUpd(Cnt == 10) + ) mcounters_variable_i ( + .clk_i(clk_i), + .rst_ni(rst_ni), + .counter_inc_i(mhpmcounter_incr[Cnt] & ~mcountinhibit[Cnt]), + .counterh_we_i(mhpmcounterh_we[Cnt]), + .counter_we_i(mhpmcounter_we[Cnt]), + .counter_val_i(csr_wdata_int), + .counter_val_o(mhpmcounter_raw), + .counter_val_upd_o(mhpmcounter_next) + ); + + if (Cnt == 10) begin : gen_compressed_instr_cnt + // Special behaviour for reading compressed instruction retired counter, see comment on + // `mhpmcounter[2]` above for further information. + assign mhpmcounter[Cnt] = + instr_ret_compressed_spec_i & ~mcountinhibit[Cnt] ? mhpmcounter_next: + mhpmcounter_raw; + end else begin : gen_other_cnts + logic [63:0] unused_mhpmcounter_next; + // All other counters just see the raw counter value directly. + assign mhpmcounter[Cnt] = mhpmcounter_raw; + assign unused_mhpmcounter_next = mhpmcounter_next; + end + end else begin : gen_unimp + assign mhpmcounter[Cnt] = '0; + + if (Cnt == 10) begin : gen_no_compressed_instr_cnt + logic unused_instr_ret_compressed_spec_i; + assign unused_instr_ret_compressed_spec_i = instr_ret_compressed_spec_i; + end + end + end + + if (MHPMCounterNum < 29) begin : g_mcountinhibit_reduced + logic [29-MHPMCounterNum-1:0] unused_mhphcounter_we; + logic [29-MHPMCounterNum-1:0] unused_mhphcounterh_we; + logic [29-MHPMCounterNum-1:0] unused_mhphcounter_incr; + + assign mcountinhibit = {{29 - MHPMCounterNum{1'b1}}, mcountinhibit_q}; + // Lint tieoffs for unused bits + assign unused_mhphcounter_we = mhpmcounter_we[31:MHPMCounterNum+3]; + assign unused_mhphcounterh_we = mhpmcounterh_we[31:MHPMCounterNum+3]; + assign unused_mhphcounter_incr = mhpmcounter_incr[31:MHPMCounterNum+3]; + end else begin : g_mcountinhibit_full + assign mcountinhibit = mcountinhibit_q; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + mcountinhibit_q <= '0; + end else begin + mcountinhibit_q <= mcountinhibit_d; + end + end + + ///////////////////////////// + // Debug trigger registers // + ///////////////////////////// + + if (DbgTriggerEn) begin : gen_trigger_regs + localparam int unsigned DbgHwNumLen = DbgHwBreakNum > 1 ? $clog2(DbgHwBreakNum) : 1; + localparam int unsigned MaxTselect = DbgHwBreakNum - 1; + + // Register values + logic [DbgHwNumLen-1:0] tselect_d, tselect_q; + logic tmatch_control_d; + logic [DbgHwBreakNum-1:0] tmatch_control_q; + logic [31:0] tmatch_value_d; + logic [31:0] tmatch_value_q[DbgHwBreakNum]; + logic selected_tmatch_control; + logic [31:0] selected_tmatch_value; + + // Write enables + logic tselect_we; + logic [DbgHwBreakNum-1:0] tmatch_control_we; + logic [DbgHwBreakNum-1:0] tmatch_value_we; + // Trigger comparison result + logic [DbgHwBreakNum-1:0] trigger_match; + + // Write select + assign tselect_we = csr_we_int & debug_mode_i & (csr_addr_i == CSR_TSELECT); + for (genvar i = 0; i < DbgHwBreakNum; i++) begin : g_dbg_tmatch_we + assign tmatch_control_we[i] = (i[DbgHwNumLen-1:0] == tselect_q) & csr_we_int & debug_mode_i & + (csr_addr_i == CSR_TDATA1); + assign tmatch_value_we[i] = (i[DbgHwNumLen-1:0] == tselect_q) & csr_we_int & debug_mode_i & + (csr_addr_i == CSR_TDATA2); + end + + // Debug interface tests the available number of triggers by writing and reading the trigger + // select register. Only allow changes to the register if it is within the supported region. + assign tselect_d = (csr_wdata_int < DbgHwBreakNum) ? csr_wdata_int[DbgHwNumLen-1:0] : + MaxTselect[DbgHwNumLen-1:0]; + + // tmatch_control is enabled when the execute bit is set + assign tmatch_control_d = csr_wdata_int[2]; + assign tmatch_value_d = csr_wdata_int[31:0]; + + // Registers + cheriot_csr #( + .Width (DbgHwNumLen), + .ShadowCopy(1'b0), + .ResetValue('0) + ) u_tselect_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i (tselect_d), + .wr_en_i (tselect_we), + .rd_data_o (tselect_q), + .rd_error_o() + ); + + for (genvar i = 0; i < DbgHwBreakNum; i++) begin : g_dbg_tmatch_reg + cheriot_csr #( + .Width (1), + .ShadowCopy(1'b0), + .ResetValue('0) + ) u_tmatch_control_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i (tmatch_control_d), + .wr_en_i (tmatch_control_we[i]), + .rd_data_o (tmatch_control_q[i]), + .rd_error_o() + ); + + cheriot_csr #( + .Width (32), + .ShadowCopy(1'b0), + .ResetValue('0) + ) u_tmatch_value_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i (tmatch_value_d), + .wr_en_i (tmatch_value_we[i]), + .rd_data_o (tmatch_value_q[i]), + .rd_error_o() + ); + end + + // Assign read data + // TSELECT - number of supported triggers defined by parameter DbgHwBreakNum + localparam int unsigned TSelectRdataPadlen = DbgHwNumLen >= 32 ? 0 : (32 - DbgHwNumLen); + assign tselect_rdata = {{TSelectRdataPadlen{1'b0}}, tselect_q}; + + if (DbgHwBreakNum > 1) begin : g_dbg_tmatch_multiple_select + assign selected_tmatch_control = tmatch_control_q[tselect_q]; + assign selected_tmatch_value = tmatch_value_q[tselect_q]; + end else begin : g_dbg_tmatch_single_select + assign selected_tmatch_control = tmatch_control_q[0]; + assign selected_tmatch_value = tmatch_value_q[0]; + end + + // TDATA0 - only support simple address matching + assign tmatch_control_rdata = {4'h2, // type : address/data match + 1'b1, // dmode : access from D mode only + 6'h00, // maskmax : exact match only + 1'b0, // hit : not supported + 1'b0, // select : address match only + 1'b0, // timing : match before execution + 2'b00, // sizelo : match any access + 4'h1, // action : enter debug mode + 1'b0, // chain : not supported + 4'h0, // match : simple match + 1'b1, // m : match in m-mode + 1'b0, // 0 : zero + 1'b0, // s : not supported + 1'b1, // u : match in u-mode + selected_tmatch_control, // execute : match instruction address + 1'b0, // store : not supported + 1'b0}; // load : not supported + + // TDATA1 - address match value only + assign tmatch_value_rdata = selected_tmatch_value; + + // Breakpoint matching + // We match against the next address, as the breakpoint must be taken before execution + for (genvar i = 0; i < DbgHwBreakNum; i++) begin : g_dbg_trigger_match + assign trigger_match[i] = tmatch_control_q[i] & (pc_if_i[31:0] == tmatch_value_q[i]); + end + assign trigger_match_o = |trigger_match; + + end else begin : gen_no_trigger_regs + assign tselect_rdata = 'b0; + assign tmatch_control_rdata = 'b0; + assign tmatch_value_rdata = 'b0; + assign trigger_match_o = 'b0; + end + + ////////////////////////// + // CPU control register // + ////////////////////////// + + // Cast register write data + assign cpuctrl_wdata_raw = cpu_ctrl_t'(csr_wdata_int[$bits(cpu_ctrl_t)-1:0]); + + // Generate fixed time execution bit + if (DataIndTiming) begin : gen_dit + assign cpuctrl_wdata.data_ind_timing = cpuctrl_wdata_raw.data_ind_timing; + + end else begin : gen_no_dit + // tieoff for the unused bit + logic unused_dit; + assign unused_dit = cpuctrl_wdata_raw.data_ind_timing; + + // field will always read as zero if not configured + assign cpuctrl_wdata.data_ind_timing = 1'b0; + end + + assign data_ind_timing_o = cpuctrl_q.data_ind_timing; + + // Generate dummy instruction signals + if (DummyInstructions) begin : gen_dummy + assign cpuctrl_wdata.dummy_instr_en = cpuctrl_wdata_raw.dummy_instr_en; + assign cpuctrl_wdata.dummy_instr_mask = cpuctrl_wdata_raw.dummy_instr_mask; + + // Signal a write to the seed register + assign dummy_instr_seed_en_o = csr_we_int && (csr_addr == CSR_SECURESEED); + assign dummy_instr_seed_o = csr_wdata_int; + + end else begin : gen_no_dummy + // tieoff for the unused bit + logic unused_dummy_en; + logic [2:0] unused_dummy_mask; + assign unused_dummy_en = cpuctrl_wdata_raw.dummy_instr_en; + assign unused_dummy_mask = cpuctrl_wdata_raw.dummy_instr_mask; + + // field will always read as zero if not configured + assign cpuctrl_wdata.dummy_instr_en = 1'b0; + assign cpuctrl_wdata.dummy_instr_mask = 3'b000; + assign dummy_instr_seed_en_o = 1'b0; + assign dummy_instr_seed_o = '0; + end + + assign dummy_instr_en_o = cpuctrl_q.dummy_instr_en; + assign dummy_instr_mask_o = cpuctrl_q.dummy_instr_mask; + + // Generate icache enable bit + if (ICache) begin : gen_icache_enable + assign cpuctrl_wdata.icache_enable = cpuctrl_wdata_raw.icache_enable; + end else begin : gen_no_icache + // tieoff for the unused icen bit + logic unused_icen; + assign unused_icen = cpuctrl_wdata_raw.icache_enable; + + // icen field will always read as zero if ICache not configured + assign cpuctrl_wdata.icache_enable = 1'b0; + end + + assign cpuctrl_wdata.double_fault_seen = cpuctrl_wdata_raw.double_fault_seen; + assign cpuctrl_wdata.sync_exc_seen = cpuctrl_wdata_raw.sync_exc_seen; + + assign icache_enable_o = cpuctrl_q.icache_enable; + + cheriot_csr #( + .Width ($bits(cpu_ctrl_t)), + .ShadowCopy(ShadowCSR), + .ResetValue('0) + ) u_cpuctrl_csr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .wr_data_i ({cpuctrl_d}), + .wr_en_i (cpuctrl_we), + .rd_data_o (cpuctrl_q), + .rd_error_o(cpuctrl_err) + ); + + assign csr_shadow_err_o = mstatus_err | mtvec_err | pmp_csr_err | cpuctrl_err; + + //////////////// + // Assertions // + //////////////// + + `ASSERT(IbexCsrOpEnRequiresAccess, csr_op_en_i |-> csr_access_i) + + ////////////////////// + // Cheriot SCR's + ////////////////////// + + if (CHERIoTEn) begin: gen_scr + reg_cap_t pcc_exc_cap; + reg_cap_t mtdc_cap; + logic [31:0] mtdc_data; + reg_cap_t mscratchc_cap; + logic [31:0] mscratchc_data; // note this is separate from legacy mscratch + + + logic mtdc_en_cheri, mscratchc_en_cheri; + + always_comb begin + case (cheri_csr_addr_i) + CHERI_SCR_DEPCC: + begin + cheri_csr_rdata_o = debug_mode_i ? depc_q : 0; + cheri_csr_rcap_o = debug_mode_i ? depc_cap : NULL_REG_CAP; + end + CHERI_SCR_DSCRATCHC0: + begin + cheri_csr_rdata_o = debug_mode_i ? dscratch0_q : 0; + cheri_csr_rcap_o = debug_mode_i ? dscratch0_cap : NULL_REG_CAP; + end + CHERI_SCR_DSCRATCHC1: + begin + cheri_csr_rdata_o = debug_mode_i ? dscratch1_q : 0; + cheri_csr_rcap_o = debug_mode_i ? dscratch1_cap : NULL_REG_CAP; + end + CHERI_SCR_MTCC: + begin + cheri_csr_rdata_o = mtvec_q; + cheri_csr_rcap_o = mtvec_cap; + end + CHERI_SCR_MTDC: + begin + cheri_csr_rdata_o = mtdc_data; + cheri_csr_rcap_o = mtdc_cap; + end + CHERI_SCR_MSCRATCHC: + begin + cheri_csr_rdata_o = mscratchc_data; + cheri_csr_rcap_o = mscratchc_cap; + end + CHERI_SCR_MEPCC: + begin + cheri_csr_rdata_o = mepc_q; + cheri_csr_rcap_o = mepc_cap; + end + default: + begin + cheri_csr_rdata_o = 32'h0; + cheri_csr_rcap_o = NULL_REG_CAP; + end + endcase + end + + assign pcc_cap_o = pcc_cap_q; + + assign pcc_exc_cap = pcc2mepcc(pcc_cap_q, exception_pc, csr_mepcc_clrtag_i); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + pcc_cap_q <= PCC_RESET_CAP; + end else begin + pcc_cap_q <= pcc_cap_d; + end + end + + // PCC updating + // -- PC address range checking is always against the pcc_cap, which is only updated with + // CHER CJALR or exceptions. Legacy RV32 jumps/branches can change PC but not the PCC + // bounds/perms, so they are still limited by the orginal bounds in IF stage checking + always_comb begin + full_cap_t tf_cap; + reg_cap_t tr_cap; + logic [31:0] tr_addr; + + if (csr_save_cause_i) begin // Exception cases + tr_cap = mtvec_cap; + tr_addr = mtvec_q; + end else if (csr_restore_mret_i) begin + tr_cap = mepc_cap; + tr_addr = mepc_q; + end else if (csr_restore_dret_i & debug_mode_i) begin + tr_cap = depc_cap; + tr_addr = depc_q; + end else begin + tr_cap = NULL_REG_CAP; + tr_addr = 32'h0; + end + + tf_cap = reg2fullcap(tr_cap, tr_addr); + + // Exception cases + if (csr_save_cause_i | csr_restore_mret_i | (csr_restore_dret_i & debug_mode_i)) begin + pcc_cap_d = full2pcap(tf_cap); + end else if (cheri_branch_req_i) begin + pcc_cap_d = pcc_cap_i; + end else begin + pcc_cap_d = pcc_cap_q; + end + end + + // mtvec extended capability + assign mtvec_en_cheri = cheri_csr_op_en_i && (cheri_csr_addr_i == CHERI_SCR_MTCC) && (cheri_csr_op_i == CHERI_CSR_RW); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) + mtvec_cap <= MTVEC_RESET_CAP; + else if (mtvec_en_cheri) + mtvec_cap <= cheri_csr_wcap_i; + end + + // mepc extended capability + assign mepc_en_cheri = cheri_csr_op_en_i && (cheri_csr_addr_i == CHERI_SCR_MEPCC) && (cheri_csr_op_i == CHERI_CSR_RW); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) + mepc_cap <= MEPC_RESET_CAP; + else if (csr_save_cause_i & (~debug_csr_save_i) & (~debug_mode_i)) + mepc_cap <= pcc_exc_cap; + else if (cheri_pmode_i & mepc_en) // legacy cssrw; NMI recover + mepc_cap <= NULL_REG_CAP; + else if (mepc_en_cheri) + mepc_cap <= cheri_csr_wcap_i; + end + + // MTDC capability + assign mtdc_en_cheri = cheri_csr_op_en_i && (cheri_csr_addr_i == CHERI_SCR_MTDC) && (cheri_csr_op_i == CHERI_CSR_RW); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + mtdc_cap <= MTDC_RESET_CAP; + mtdc_data <= 32'h0; + end else if (mtdc_en_cheri) begin + mtdc_cap <= cheri_csr_wcap_i; + mtdc_data <= cheri_csr_wdata_i; + end + end + + // MSCRATCHC capability + assign mscratchc_en_cheri = cheri_csr_op_en_i && (cheri_csr_addr_i == CHERI_SCR_MSCRATCHC) && (cheri_csr_op_i == CHERI_CSR_RW); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + mscratchc_cap <= MSCRATCHC_RESET_CAP; + mscratchc_data <= 32'h0; + end else if (mscratchc_en_cheri) begin + mscratchc_cap <= cheri_csr_wcap_i; + mscratchc_data <= cheri_csr_wdata_i; + end + end + + // depc extended capability + assign depc_en_cheri = debug_mode_i & cheri_csr_op_en_i && (cheri_csr_addr_i == CHERI_SCR_DEPCC) && (cheri_csr_op_i == CHERI_CSR_RW); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) + depc_cap <= NULL_REG_CAP; + else if (csr_save_cause_i & debug_csr_save_i) + depc_cap <= pcc_exc_cap; + else if (depc_en_cheri) + depc_cap <= cheri_csr_wcap_i; + end + + // dscratch0/1 extended capability + assign dscratch0_en_cheri = debug_mode_i & cheri_csr_op_en_i && (cheri_csr_addr_i == CHERI_SCR_DSCRATCHC0) && (cheri_csr_op_i == CHERI_CSR_RW); + assign dscratch1_en_cheri = debug_mode_i & cheri_csr_op_en_i && (cheri_csr_addr_i == CHERI_SCR_DSCRATCHC1) && (cheri_csr_op_i == CHERI_CSR_RW); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + dscratch0_cap <= NULL_REG_CAP; + dscratch1_cap <= NULL_REG_CAP; + end else if (dscratch0_en_cheri) + dscratch0_cap <= cheri_csr_wcap_i; + else if (dscratch1_en_cheri) + dscratch1_cap <= cheri_csr_wcap_i; + + end + + // fatal error condition (unrecoverable, need external reset) + // exception with invalid mepcc + logic cheri_fatal_err_q; + + assign cheri_fatal_err_o = cheri_fatal_err_q; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + cheri_fatal_err_q <= 1'b0; + end else begin + if (cheri_pmode_i & csr_save_cause_i & ~mtvec_cap.valid) + cheri_fatal_err_q <= 1'b1; + end + end + + + end else begin: gen_no_scr + + assign cheri_csr_rdata_o = 32'h0; + assign cheri_csr_rcap_o = NULL_REG_CAP; + + assign pcc_cap_o = NULL_PCC_CAP; + assign pcc_cap_q = NULL_PCC_CAP; + + assign mtvec_en_cheri = 1'b0; + assign mepc_en_cheri = 1'b0; + assign depc_en_cheri = 1'b0; + assign dscratch0_en_cheri = 1'b0; + assign dscratch1_en_cheri = 1'b0; + + assign cheri_fatal_err_o = 1'b0; + + end + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_csr.sv b/hw/ip/cheriot-ibex/rtl/cheriot_csr.sv new file mode 100644 index 0000000..9dbe1b6 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_csr.sv
@@ -0,0 +1,57 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Control / status register primitive + */ + +`include "prim_assert.sv" + +module cheriot_csr #( + parameter int unsigned Width = 32, + parameter bit ShadowCopy = 1'b0, + parameter bit [Width-1:0] ResetValue = '0 + ) ( + input logic clk_i, + input logic rst_ni, + + input logic [Width-1:0] wr_data_i, + input logic wr_en_i, + output logic [Width-1:0] rd_data_o, + + output logic rd_error_o +); + + logic [Width-1:0] rdata_q; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rdata_q <= ResetValue; + end else if (wr_en_i) begin + rdata_q <= wr_data_i; + end + end + + assign rd_data_o = rdata_q; + + if (ShadowCopy) begin : gen_shadow + logic [Width-1:0] shadow_q; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + shadow_q <= ~ResetValue; + end else if (wr_en_i) begin + shadow_q <= ~wr_data_i; + end + end + + assign rd_error_o = rdata_q != ~shadow_q; + + end else begin : gen_no_shadow + assign rd_error_o = 1'b0; + end + + `ASSERT_KNOWN(IbexCSREnValid, wr_en_i) + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_decoder.sv b/hw/ip/cheriot-ibex/rtl/cheriot_decoder.sv new file mode 100644 index 0000000..8b0fcdb --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_decoder.sv
@@ -0,0 +1,1432 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + + +/** + * Instruction decoder + * + * This module is fully combinatorial, clock and reset are used for + * assertions only. + */ + +`include "prim_assert.sv" + +module cheriot_decoder import cheri_pkg::*; #( + parameter bit RV32E = 0, + parameter cheriot_pkg::rv32m_e RV32M = cheriot_pkg::RV32MFast, + parameter cheriot_pkg::rv32b_e RV32B = cheriot_pkg::RV32BNone, + parameter bit BranchTargetALU = 0, + parameter bit CHERIoTEn = 1'b1, + parameter bit CheriPPLBC = 1'b0, + parameter bit CheriSBND2 = 1'b0 +) ( + input logic clk_i, + input logic rst_ni, + + input logic cheri_pmode_i, + input logic cheri_tsafe_en_i, + + // to/from controller + output logic illegal_insn_o, // illegal instr encountered + output logic ebrk_insn_o, // trap instr encountered + output logic mret_insn_o, // return from exception instr + // encountered + output logic dret_insn_o, // return from debug instr encountered + output logic ecall_insn_o, // syscall instr encountered + output logic wfi_insn_o, // wait for interrupt instr encountered + output logic jump_set_o, // jump taken set signal + input logic branch_taken_i, // registered branch decision + output logic icache_inval_o, + + // from IF-ID pipeline register + input logic instr_first_cycle_i, // instruction read is in its first cycle + input logic [31:0] instr_rdata_i, // instruction read from memory/cache + input logic [31:0] instr_rdata_alu_i, // instruction read from memory/cache + // replicated to ease fan-out) + + input logic illegal_c_insn_i, // compressed instruction decode failed + + // immediates + output cheriot_pkg::imm_a_sel_e imm_a_mux_sel_o, // immediate selection for operand a + output cheriot_pkg::imm_b_sel_e imm_b_mux_sel_o, // immediate selection for operand b + output cheriot_pkg::op_a_sel_e bt_a_mux_sel_o, // branch target selection operand a + output cheriot_pkg::imm_b_sel_e bt_b_mux_sel_o, // branch target selection operand b + output logic [31:0] imm_i_type_o, + output logic [31:0] imm_s_type_o, + output logic [31:0] imm_b_type_o, + output logic [31:0] imm_u_type_o, + output logic [31:0] imm_j_type_o, + output logic [31:0] zimm_rs1_type_o, + + // register file + output cheriot_pkg::rf_wd_sel_e rf_wdata_sel_o, // RF write data selection + output logic rf_we_o, // write enable for regfile + output logic rf_we_or_load_o, + output logic [4:0] rf_raddr_a_o, + output logic [4:0] rf_raddr_b_o, + output logic [4:0] rf_waddr_o, + output logic rf_ren_a_o, // Instruction reads from RF addr A + output logic rf_ren_b_o, // Instruction reads from RF addr B + + // ALU + output cheriot_pkg::alu_op_e alu_operator_o, // ALU operation selection + output cheriot_pkg::op_a_sel_e alu_op_a_mux_sel_o, // operand a selection: reg value, PC, + // immediate or zero + output cheriot_pkg::op_b_sel_e alu_op_b_mux_sel_o, // operand b selection: reg value or + // immediate + output logic alu_multicycle_o, // ternary bitmanip instruction + + // MULT & DIV + output logic mult_en_o, // perform integer multiplication + output logic div_en_o, // perform integer division or remainder + output logic mult_sel_o, // as above but static, for data muxes + output logic div_sel_o, // as above but static, for data muxes + + output cheriot_pkg::md_op_e multdiv_operator_o, + output logic [1:0] multdiv_signed_mode_o, + + // CSRs + output logic csr_access_o, // access to CSR + output cheriot_pkg::csr_op_e csr_op_o, // operation to perform on CSR + output logic csr_cheri_always_ok_o, // CHERI safe-listed (no ASR needed) CSRs + + // LSU + output logic data_req_o, // start transaction to data memory + output logic cheri_data_req_o, // cheri lsu transaction + output logic data_we_o, // write enable + output logic [1:0] data_type_o, // size of transaction: byte, half + // word or word + output logic data_sign_extension_o, // sign extension for data read from + // memory + + // jump/branches + output logic jump_in_dec_o, // jump is being calculated in ALU + output logic branch_in_dec_o, + + // output to cheri EX + output logic instr_is_cheri_o, + output logic instr_is_legal_cheri_o, + output logic [11:0] cheri_imm12_o, + output logic [19:0] cheri_imm20_o, + output logic [20:0] cheri_imm21_o, + output logic [OPDW-1:0] cheri_operator_o, + output logic [4:0] cheri_cs2_dec_o, + output logic cheri_multicycle_dec_o +); + + import cheriot_pkg::*; + + localparam bit CheriLimit16Regs = CHERIoTEn; + + logic illegal_insn; + logic illegal_reg_rv32e; + logic illegal_reg_cheri; + logic csr_illegal; + logic rf_we; + + logic [31:0] instr; + logic [31:0] instr_alu; + logic [9:0] unused_instr_alu; + // Source/Destination register instruction index + logic [4:0] instr_rs1; + logic [4:0] instr_rs2; + logic [4:0] instr_rs3; + logic [4:0] instr_rd; + + logic use_rs3_d; + logic use_rs3_q; + + csr_op_e csr_op; + + opcode_e opcode; + opcode_e opcode_alu; + + logic cheri_opcode_en; + logic cheri_auipcc_en; + logic cheri_auicgp_en; + logic cheri_jalr_en; + logic cheri_jal_en; + logic cheri_cload_en; + logic cheri_cstore_en; + logic instr_is_legal_cheri; + logic cheri_rf_ren_a, cheri_rf_ren_b; + logic cheri_rf_we_dec; + + // To help timing the flops containing the current instruction are replicated to reduce fan-out. + // instr_alu is used to determine the ALU control logic and associated operand/imm select signals + // as the ALU is often on the more critical timing paths. instr is used for everything else. + assign instr = instr_rdata_i; + assign instr_alu = instr_rdata_alu_i; + + ////////////////////////////////////// + // Register and immediate selection // + ////////////////////////////////////// + + // immediate extraction and sign extension + assign imm_i_type_o = { {20{instr[31]}}, instr[31:20] }; + assign imm_s_type_o = { {20{instr[31]}}, instr[31:25], instr[11:7] }; + assign imm_b_type_o = { {19{instr[31]}}, instr[31], instr[7], instr[30:25], instr[11:8], 1'b0 }; + assign imm_u_type_o = { instr[31:12], 12'b0 }; + assign imm_j_type_o = { {12{instr[31]}}, instr[19:12], instr[20], instr[30:21], 1'b0 }; + + // immediate for CSR manipulation (zero extended) + assign zimm_rs1_type_o = { 27'b0, instr_rs1 }; // rs1 + + if (RV32B != RV32BNone) begin : gen_rs3_flop + // the use of rs3 is known one cycle ahead. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + use_rs3_q <= 1'b0; + end else begin + use_rs3_q <= use_rs3_d; + end + end + end else begin : gen_no_rs3_flop + logic unused_clk; + logic unused_rst_n; + + // Clock and reset unused when there's no rs3 flop + assign unused_clk = clk_i; + assign unused_rst_n = rst_ni; + + // always zero + assign use_rs3_q = use_rs3_d; + end + + // source registers + assign instr_rs1 = instr[19:15]; + assign instr_rs2 = instr[24:20]; + assign instr_rs3 = instr[31:27]; + + // read cx3 if AUICGP + // note for GDC (c3) we want to use the regular scheme to resovel data hazards, instead of using + // sideband signals to export CX3 from register file directly + logic [4:0] raddr_a, raddr_b; + assign raddr_a = cheri_auicgp_en ? 5'h3 : ((use_rs3_q & ~instr_first_cycle_i) ? instr_rs3 : instr_rs1); // rs3 / rs1 + assign raddr_b = instr_rs2; // rs2 + + // cheriot only uses 16 registers and repurposes the MSB addr bits + if (CheriLimit16Regs) begin + assign rf_raddr_a_o = cheri_pmode_i ?{1'b0, raddr_a[3:0]} : raddr_a; + assign rf_raddr_b_o = cheri_pmode_i ?{1'b0, raddr_b[3:0]} : raddr_b; + end else begin + assign rf_raddr_a_o = raddr_a; + assign rf_raddr_b_o = raddr_b; + end + + // destination register + assign instr_rd = instr[11:7]; + if (CheriLimit16Regs) begin + assign rf_waddr_o = cheri_pmode_i ? {1'b0, instr_rd[3:0]} : instr_rd; // rd + end else begin + assign rf_waddr_o = instr_rd; // rd + end + + //////////////////// + // Register check // + //////////////////// + + // rf_we from decoder doesn't cover memory load case (where regfile write signal comes from LSU response) + logic rf_we_or_load; + assign rf_we_or_load = rf_we | (opcode == OPCODE_LOAD); + + assign rf_we_or_load_o = rf_we_or_load; + + if (RV32E) begin : gen_rv32e_reg_check_active + //assign illegal_reg_rv32e = ((rf_raddr_a_o[4] & (alu_op_a_mux_sel_o == OP_A_REG_A)) | + // (rf_raddr_b_o[4] & (alu_op_b_mux_sel_o == OP_B_REG_B)) | + assign illegal_reg_rv32e = ((rf_raddr_a_o[4] & rf_ren_a_o) | + (rf_raddr_b_o[4] & rf_ren_b_o) | + (instr_rs3[4] & use_rs3_d & rf_ren_a_o) | + (rf_waddr_o[4] & rf_we_or_load)); + end else begin : gen_rv32e_reg_check_inactive + assign illegal_reg_rv32e = 1'b0; + end + + if (CheriLimit16Regs) begin : gen_cheri_reg_check_active + assign illegal_reg_cheri = cheri_pmode_i & + ((raddr_a[4] & rf_ren_a_o) | + (raddr_b[4] & rf_ren_b_o) | + (instr_rs3[4] & use_rs3_d & rf_ren_a_o) | + (instr_rd[4] & rf_we_or_load )); + end else begin : gen_cheri_reg_check_inactive + assign illegal_reg_cheri = 1'b0; + end + + /////////////////////// + // CSR operand check // + /////////////////////// + always_comb begin : csr_operand_check + csr_op_o = csr_op; + + // CSRRSI/CSRRCI must not write 0 to CSRs (uimm[4:0]=='0) + // CSRRS/CSRRC must not write from x0 to CSRs (rs1=='0) + if ((csr_op == CSR_OP_SET || csr_op == CSR_OP_CLEAR) && + instr_rs1 == '0) begin + csr_op_o = CSR_OP_READ; + end + end + + ///////////// + // Decoder // + ///////////// + + always_comb begin + jump_in_dec_o = 1'b0; + jump_set_o = 1'b0; + branch_in_dec_o = 1'b0; + icache_inval_o = 1'b0; + + multdiv_operator_o = MD_OP_MULL; + multdiv_signed_mode_o = 2'b00; + + rf_wdata_sel_o = RF_WD_EX; + rf_we = 1'b0; + rf_ren_a_o = 1'b0; + rf_ren_b_o = 1'b0; + + csr_access_o = 1'b0; + csr_illegal = 1'b0; + csr_op = CSR_OP_READ; + csr_cheri_always_ok_o = 1'b0; + + data_we_o = 1'b0; + data_type_o = 2'b00; + data_sign_extension_o = 1'b0; + data_req_o = 1'b0; + cheri_data_req_o = 1'b0; + + illegal_insn = 1'b0; + ebrk_insn_o = 1'b0; + mret_insn_o = 1'b0; + dret_insn_o = 1'b0; + ecall_insn_o = 1'b0; + wfi_insn_o = 1'b0; + + cheri_opcode_en = 1'b0; + cheri_cload_en = 1'b0; + cheri_cstore_en = 1'b0; + cheri_auipcc_en = 1'b0; + cheri_auicgp_en = 1'b0; + cheri_jalr_en = 1'b0; + cheri_jal_en = 1'b0; + + opcode = opcode_e'(instr[6:0]); + + unique case (opcode) + + /////////// + // Jumps // + /////////// + + OPCODE_JAL: begin // Jump and Link + if (CHERIoTEn & cheri_pmode_i & ~illegal_c_insn_i) begin + // cheri_ex takes over JAL now as a single-cycle jump + cheri_jal_en = 1'b1; + illegal_insn = 1'b0; + rf_we = 1'b1; + end else begin + jump_in_dec_o = 1'b1; + + if (instr_first_cycle_i) begin + // Calculate jump target (and store PC + 4 if BranchTargetALU is configured) + rf_we = BranchTargetALU; + jump_set_o = 1'b1; + end else begin + // Calculate and store PC+4 + rf_we = 1'b1; + end + end + end + + OPCODE_JALR: begin // Jump and Link Register + if (CHERIoTEn & cheri_pmode_i & ~illegal_c_insn_i) begin + // cheri_ex takes over JALR now as a single-cycle jump + cheri_jalr_en = (instr[14:12] == 3'b0); + rf_ren_a_o = 1'b1; + rf_we = 1'b1; + + if (instr[14:12] != 3'b0) begin + illegal_insn = 1'b1; + end + end else begin + jump_in_dec_o = 1'b1; + + if (instr_first_cycle_i) begin + // Calculate jump target (and store PC + 4 if BranchTargetALU is configured) + rf_we = BranchTargetALU; + jump_set_o = 1'b1; + end else begin + // Calculate and store PC+4 + rf_we = 1'b1; + end + if (instr[14:12] != 3'b0) begin + illegal_insn = 1'b1; + end + + rf_ren_a_o = 1'b1; + end + end + + OPCODE_BRANCH: begin // Branch + branch_in_dec_o = 1'b1; + // Check branch condition selection + unique case (instr[14:12]) + 3'b000, + 3'b001, + 3'b100, + 3'b101, + 3'b110, + 3'b111: illegal_insn = 1'b0; + default: illegal_insn = 1'b1; + endcase + + rf_ren_a_o = 1'b1; + rf_ren_b_o = 1'b1; + end + + //////////////// + // Load/store // + //////////////// + + OPCODE_STORE: begin + rf_ren_a_o = 1'b1; + rf_ren_b_o = 1'b1; + data_req_o = 1'b1; // keep this to pass LEC w/ ibex + data_we_o = 1'b1; + + if (instr[14]) begin + illegal_insn = 1'b1; + end else if (instr[13:12] == 2'b11) begin + if (CHERIoTEn & cheri_pmode_i) begin + cheri_cstore_en = ~illegal_c_insn_i; // csc + cheri_data_req_o = ~illegal_c_insn_i; + data_req_o = 1'b0; + illegal_insn = 1'b0; + end else begin + cheri_cstore_en = 1'b0; // csc + cheri_data_req_o = 1'b0; + illegal_insn = 1'b1; + end + end + + // store size + unique case (instr[13:12]) + 2'b00: data_type_o = 2'b10; // sb + 2'b01: data_type_o = 2'b01; // sh + 2'b10: data_type_o = 2'b00; // sw + default: data_type_o = 2'b00; + endcase + + end + + OPCODE_LOAD: begin + rf_ren_a_o = 1'b1; + data_req_o = 1'b1; + data_type_o = 2'b00; + + // sign/zero extension + data_sign_extension_o = ~instr[14]; + + // load size + unique case (instr[13:12]) + 2'b00: data_type_o = 2'b10; // lb(u) + 2'b01: data_type_o = 2'b01; // lh(u) + 2'b10: begin + data_type_o = 2'b00; // lw + if (instr[14]) begin + illegal_insn = 1'b1; // lwu does not exist + end + end + 2'b11: begin + // illegal_c_insn_i is added to fix the c.clcsp case + // (compressed decoder translate to cheri instruction but could still assert illegal_c_insn + // if rd == 0 + if (CHERIoTEn & cheri_pmode_i && ~instr[14] && ~illegal_c_insn_i) begin + cheri_cload_en = 1'b1; + cheri_data_req_o = ~cheri_tsafe_en_i | CheriPPLBC; + data_req_o = 1'b0; // req generated by cheri_ex + illegal_insn = 1'b0; + end else begin // CHERIoT consider instr[14]=1 illegal + cheri_cload_en = 1'b0; + cheri_data_req_o = 1'b0; + illegal_insn = 1'b1; + end + end + default: begin + illegal_insn = 1'b1; + end + endcase + end + + ///////// + // ALU // + ///////// + + OPCODE_LUI: begin // Load Upper Immediate + rf_we = 1'b1; + end + + OPCODE_AUIPC: begin + if (CHERIoTEn & cheri_pmode_i & ~illegal_c_insn_i) begin + cheri_auipcc_en = 1'b1; + illegal_insn = 1'b0; + rf_we = 1'b1; + end else begin + // OPCODE_AUIPC: begin // Add Upper Immediate to PC + rf_we = 1'b1; + end + end + + OPCODE_OP_IMM: begin // Register-Immediate ALU Operations + rf_ren_a_o = 1'b1; + rf_we = 1'b1; + + unique case (instr[14:12]) + 3'b000, + 3'b010, + 3'b011, + 3'b100, + 3'b110, + 3'b111: illegal_insn = 1'b0; + + 3'b001: begin + unique case (instr[31:27]) + 5'b0_0000: illegal_insn = (instr[26:25] == 2'b00) ? 1'b0 : 1'b1; // slli + 5'b0_0100: begin // sloi + illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1; + end + 5'b0_1001, // bclri + 5'b0_0101, // bseti + // 5'b0_1101: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // binvi + 5'b0_1101: illegal_insn = (RV32B != RV32BNone) ? (instr[26:25] != 2'b00) : 1'b1; // binvi + 5'b0_0001: begin + if (instr[26] == 1'b0) begin // shfl + illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1; + end else begin + illegal_insn = 1'b1; + end + end + 5'b0_1100: begin + unique case(instr[26:20]) + 7'b000_0000, // clz + 7'b000_0001, // ctz + 7'b000_0010, // cpop + 7'b000_0100, // sext.b + 7'b000_0101: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // sext.h + 7'b001_0000, // crc32.b + 7'b001_0001, // crc32.h + 7'b001_0010, // crc32.w + 7'b001_1000, // crc32c.b + 7'b001_1001, // crc32c.h + 7'b001_1010: begin // crc32c.w + illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1; + end + default: illegal_insn = 1'b1; + endcase + end + default : illegal_insn = 1'b1; + endcase + end + + 3'b101: begin + if (instr[26]) begin + illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // fsri + end else begin + unique case (instr[31:27]) + 5'b0_0000, // srli + 5'b0_1000: illegal_insn = (instr[26:25] == 2'b00) ? 1'b0 : 1'b1; // srai + + 5'b0_0100: begin // sroi + illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1; + end + 5'b0_1100, // rori + // 5'b0_1001: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // bexti + 5'b0_1001: illegal_insn = (RV32B != RV32BNone) ? (instr[26:25] != 2'b00) : 1'b1; // bexti + + 5'b0_1101: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin + illegal_insn = 1'b0; // grevi + end else if (RV32B == RV32BBalanced) begin + illegal_insn = (instr[24:20] == 5'b11000) ? 1'b0 : 1'b1; // rev8 + end else begin + illegal_insn = 1'b1; + end + end + 5'b0_0101: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin + illegal_insn = 1'b0; // gorci + end else if (instr[24:20] == 5'b00111) begin + illegal_insn = (RV32B == RV32BBalanced) ? 1'b0 : 1'b1; // orc.b + end else begin + illegal_insn = 1'b1; + end + end + 5'b0_0001: begin + if (instr[26] == 1'b0) begin // unshfl + illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1; + end else begin + illegal_insn = 1'b1; + end + end + + default: illegal_insn = 1'b1; + endcase + end + end + + default: illegal_insn = 1'b1; + endcase + end + + OPCODE_OP: begin // Register-Register ALU operation + rf_ren_a_o = 1'b1; + rf_ren_b_o = 1'b1; + rf_we = 1'b1; + if ({instr[26], instr[13:12]} == {1'b1, 2'b01}) begin + illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // cmix / cmov / fsl / fsr + end else begin + unique case ({instr[31:25], instr[14:12]}) + // RV32I ALU operations + {7'b000_0000, 3'b000}, + {7'b010_0000, 3'b000}, + {7'b000_0000, 3'b010}, + {7'b000_0000, 3'b011}, + {7'b000_0000, 3'b100}, + {7'b000_0000, 3'b110}, + {7'b000_0000, 3'b111}, + {7'b000_0000, 3'b001}, + {7'b000_0000, 3'b101}, + {7'b010_0000, 3'b101}: illegal_insn = 1'b0; + + // RV32B zba + {7'b001_0000, 3'b010}, // sh1add + {7'b001_0000, 3'b100}, // sh2add + {7'b001_0000, 3'b110}, // sh3add + // RV32B zbb + {7'b010_0000, 3'b111}, // andn + {7'b010_0000, 3'b110}, // orn + {7'b010_0000, 3'b100}, // xnor + {7'b011_0000, 3'b001}, // rol + {7'b011_0000, 3'b101}, // ror + {7'b000_0101, 3'b100}, // min + {7'b000_0101, 3'b110}, // max + {7'b000_0101, 3'b101}, // minu + {7'b000_0101, 3'b111}, // maxu + {7'b000_0100, 3'b100}, // pack + {7'b010_0100, 3'b100}, // packu + {7'b000_0100, 3'b111}, // packh + // RV32B zbs + {7'b010_0100, 3'b001}, // bclr + {7'b001_0100, 3'b001}, // bset + {7'b011_0100, 3'b001}, // binv + {7'b010_0100, 3'b101}, // bext + // RV32B zbf + {7'b010_0100, 3'b111}: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // bfp + // RV32B zbp + {7'b011_0100, 3'b101}, // grev + {7'b001_0100, 3'b101}, // gorc + {7'b000_0100, 3'b001}, // shfl + {7'b000_0100, 3'b101}, // unshfl + {7'b001_0100, 3'b010}, // xperm.n + {7'b001_0100, 3'b100}, // xperm.b + {7'b001_0100, 3'b110}, // xperm.h + {7'b001_0000, 3'b001}, // slo + {7'b001_0000, 3'b101}, // sro + // RV32B zbc + {7'b000_0101, 3'b001}, // clmul + {7'b000_0101, 3'b010}, // clmulr + {7'b000_0101, 3'b011}: begin // clmulh + illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1; + end + // RV32B zbe + {7'b010_0100, 3'b110}, // bdecompress + {7'b000_0100, 3'b110}: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // bcompress + + // RV32M instructions + {7'b000_0001, 3'b000}: begin // mul + multdiv_operator_o = MD_OP_MULL; + multdiv_signed_mode_o = 2'b00; + illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0; + end + {7'b000_0001, 3'b001}: begin // mulh + multdiv_operator_o = MD_OP_MULH; + multdiv_signed_mode_o = 2'b11; + illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0; + end + {7'b000_0001, 3'b010}: begin // mulhsu + multdiv_operator_o = MD_OP_MULH; + multdiv_signed_mode_o = 2'b01; + illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0; + end + {7'b000_0001, 3'b011}: begin // mulhu + multdiv_operator_o = MD_OP_MULH; + multdiv_signed_mode_o = 2'b00; + illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0; + end + {7'b000_0001, 3'b100}: begin // div + multdiv_operator_o = MD_OP_DIV; + multdiv_signed_mode_o = 2'b11; + illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0; + end + {7'b000_0001, 3'b101}: begin // divu + multdiv_operator_o = MD_OP_DIV; + multdiv_signed_mode_o = 2'b00; + illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0; + end + {7'b000_0001, 3'b110}: begin // rem + multdiv_operator_o = MD_OP_REM; + multdiv_signed_mode_o = 2'b11; + illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0; + end + {7'b000_0001, 3'b111}: begin // remu + multdiv_operator_o = MD_OP_REM; + multdiv_signed_mode_o = 2'b00; + illegal_insn = (RV32M == RV32MNone) ? 1'b1 : 1'b0; + end + default: begin + illegal_insn = 1'b1; + end + endcase + end + end + + ///////////// + // Special // + ///////////// + + OPCODE_MISC_MEM: begin + unique case (instr[14:12]) + 3'b000: begin + // FENCE is treated as a NOP since all memory operations are already strictly ordered. + rf_we = 1'b0; + end + 3'b001: begin + // FENCE.I is implemented as a jump to the next PC, this gives the required flushing + // behaviour (iside prefetch buffer flushed and response to any outstanding iside + // requests will be ignored). + // If present, the ICache will also be flushed. + jump_in_dec_o = 1'b1; + + rf_we = 1'b0; + + if (instr_first_cycle_i) begin + jump_set_o = 1'b1; + icache_inval_o = 1'b1; + end + end + default: begin + illegal_insn = 1'b1; + end + endcase + end + + OPCODE_SYSTEM: begin + if (instr[14:12] == 3'b000) begin + // non CSR related SYSTEM instructions + unique case (instr[31:20]) + 12'h000: // ECALL + // environment (system) call + ecall_insn_o = 1'b1; + + 12'h001: // ebreak + // debugger trap + ebrk_insn_o = 1'b1; + + 12'h302: // mret + mret_insn_o = 1'b1; + + 12'h7b2: // dret + dret_insn_o = 1'b1; + + 12'h105: // wfi + wfi_insn_o = 1'b1; + + default: + illegal_insn = 1'b1; + endcase + + // rs1 and rd must be 0 + if (instr_rs1 != 5'b0 || instr_rd != 5'b0) begin + illegal_insn = 1'b1; + end + end else begin + // instruction to read/modify CSR + csr_access_o = 1'b1; + rf_wdata_sel_o = RF_WD_CSR; + rf_we = 1'b1; + + if (~instr[14]) begin + rf_ren_a_o = 1'b1; + end + + unique case (instr[13:12]) + 2'b01: csr_op = CSR_OP_WRITE; + 2'b10: csr_op = CSR_OP_SET; + 2'b11: csr_op = CSR_OP_CLEAR; + default: csr_illegal = 1'b1; + endcase + + // always allow access to the following CSRs even without ASR permission + // -- 0xC01-0xC9F (unpriviledged counters) + // -- 0xB01-0xB9F (m-mode counters). + // note 0xb01 is undefined per rvi spec. CSR register logic will handle it. + csr_cheri_always_ok_o = CHERIoTEn & cheri_pmode_i & + (((instr[31:28] == 4'hb) || (instr[31:28] == 4'hc)) && + ((instr[27] == 1'b0) || (instr[26:25] == 2'b00))); + + illegal_insn = csr_illegal; + end + end + + OPCODE_CHERI: begin + if (CHERIoTEn & cheri_pmode_i & ~illegal_c_insn_i) begin + cheri_opcode_en = 1'b1; + rf_ren_a_o = cheri_rf_ren_a; + rf_ren_b_o = cheri_rf_ren_b; + rf_we = cheri_rf_we_dec; + illegal_insn = ~instr_is_legal_cheri; + end else begin + cheri_opcode_en = 1'b0; + rf_ren_a_o = 1'b0; + rf_ren_b_o = 1'b0; + rf_we = 1'b0; + illegal_insn = 1'b1; + end + end + + OPCODE_AUICGP: begin + if (CHERIoTEn & cheri_pmode_i & ~illegal_c_insn_i) begin + cheri_auicgp_en = 1'b1; + rf_ren_a_o = 1'b1; + rf_ren_b_o = 1'b0; + rf_we = 1'b1; + illegal_insn = 1'b0; + end else begin + cheri_opcode_en = 1'b0; + rf_ren_a_o = 1'b0; + rf_ren_b_o = 1'b0; + illegal_insn = 1'b1; + end + end + + default: begin + illegal_insn = 1'b1; + end + endcase + + // make sure illegal compressed instructions cause illegal instruction exceptions + if (illegal_c_insn_i) begin + illegal_insn = 1'b1; + end + + // make sure illegal instructions detected in the decoder do not propagate from decoder + // into register file, LSU, EX, WB, CSRs, PC + // NOTE: instructions can also be detected to be illegal inside the CSRs (upon accesses with + // insufficient privileges), or when accessing non-available registers in RV32E, + // these cases are not handled here + if (illegal_insn) begin + rf_we = 1'b0; + data_req_o = 1'b0; + data_we_o = 1'b0; + jump_in_dec_o = 1'b0; + jump_set_o = 1'b0; + branch_in_dec_o = 1'b0; + csr_access_o = 1'b0; + end + end + + ///////////////////////////// + // Decoder for ALU control // + ///////////////////////////// + + always_comb begin + alu_operator_o = ALU_SLTU; + alu_op_a_mux_sel_o = OP_A_IMM; + alu_op_b_mux_sel_o = OP_B_IMM; + + imm_a_mux_sel_o = IMM_A_ZERO; + imm_b_mux_sel_o = IMM_B_I; + + bt_a_mux_sel_o = OP_A_CURRPC; + bt_b_mux_sel_o = IMM_B_I; + + + opcode_alu = opcode_e'(instr_alu[6:0]); + + use_rs3_d = 1'b0; + alu_multicycle_o = 1'b0; + mult_sel_o = 1'b0; + div_sel_o = 1'b0; + + unique case (opcode_alu) + + /////////// + // Jumps // + /////////// + + OPCODE_JAL: begin // Jump and Link + if (BranchTargetALU) begin + bt_a_mux_sel_o = OP_A_CURRPC; + bt_b_mux_sel_o = IMM_B_J; + end + + // Jumps take two cycles without the BTALU + if (instr_first_cycle_i && !BranchTargetALU) begin + // Calculate jump target + alu_op_a_mux_sel_o = OP_A_CURRPC; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMM_B_J; + alu_operator_o = ALU_ADD; + end else begin + // Calculate and store PC+4 + alu_op_a_mux_sel_o = OP_A_CURRPC; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMM_B_INCR_PC; + alu_operator_o = ALU_ADD; + end + end + + OPCODE_JALR: begin // Jump and Link Register + if (BranchTargetALU) begin + bt_a_mux_sel_o = OP_A_REG_A; + bt_b_mux_sel_o = IMM_B_I; + end + + // Jumps take two cycles without the BTALU + if (instr_first_cycle_i && !BranchTargetALU) begin + // Calculate jump target + alu_op_a_mux_sel_o = OP_A_REG_A; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMM_B_I; + alu_operator_o = ALU_ADD; + end else begin + // Calculate and store PC+4 + alu_op_a_mux_sel_o = OP_A_CURRPC; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMM_B_INCR_PC; + alu_operator_o = ALU_ADD; + end + end + + OPCODE_BRANCH: begin // Branch + // Check branch condition selection + unique case (instr_alu[14:12]) + 3'b000: alu_operator_o = ALU_EQ; + 3'b001: alu_operator_o = ALU_NE; + 3'b100: alu_operator_o = ALU_LT; + 3'b101: alu_operator_o = ALU_GE; + 3'b110: alu_operator_o = ALU_LTU; + 3'b111: alu_operator_o = ALU_GEU; + default: ; + endcase + + if (BranchTargetALU) begin + bt_a_mux_sel_o = OP_A_CURRPC; + // Not-taken branch will jump to next instruction (used in secure mode) + bt_b_mux_sel_o = branch_taken_i ? IMM_B_B : IMM_B_INCR_PC; + end + + // Without branch target ALU, a branch is a two-stage operation using the Main ALU in both + // stages + if (instr_first_cycle_i) begin + // First evaluate the branch condition + alu_op_a_mux_sel_o = OP_A_REG_A; + alu_op_b_mux_sel_o = OP_B_REG_B; + end else if (!BranchTargetALU) begin + // Then calculate jump target + alu_op_a_mux_sel_o = OP_A_CURRPC; + alu_op_b_mux_sel_o = OP_B_IMM; + // Not-taken branch will jump to next instruction (used in secure mode) + imm_b_mux_sel_o = branch_taken_i ? IMM_B_B : IMM_B_INCR_PC; + alu_operator_o = ALU_ADD; + end + end + + //////////////// + // Load/store // + //////////////// + + OPCODE_STORE: begin + alu_op_a_mux_sel_o = OP_A_REG_A; + alu_op_b_mux_sel_o = OP_B_REG_B; + alu_operator_o = ALU_ADD; + + if (!instr_alu[14]) begin + // offset from immediate + imm_b_mux_sel_o = IMM_B_S; + alu_op_b_mux_sel_o = OP_B_IMM; + end + end + + OPCODE_LOAD: begin + alu_op_a_mux_sel_o = OP_A_REG_A; + + // offset from immediate + alu_operator_o = ALU_ADD; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMM_B_I; + end + + ///////// + // ALU // + ///////// + + OPCODE_LUI: begin // Load Upper Immediate + alu_op_a_mux_sel_o = OP_A_IMM; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_a_mux_sel_o = IMM_A_ZERO; + imm_b_mux_sel_o = IMM_B_U; + alu_operator_o = ALU_ADD; + end + + // use CHERI version of AUIPCC when pmode == 1 + OPCODE_AUIPC: begin // Add Upper Immediate to PC + alu_op_a_mux_sel_o = OP_A_CURRPC; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMM_B_U; + alu_operator_o = ALU_ADD; + end + + OPCODE_OP_IMM: begin // Register-Immediate ALU Operations + alu_op_a_mux_sel_o = OP_A_REG_A; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMM_B_I; + + unique case (instr_alu[14:12]) + 3'b000: alu_operator_o = ALU_ADD; // Add Immediate + 3'b010: alu_operator_o = ALU_SLT; // Set to one if Lower Than Immediate + 3'b011: alu_operator_o = ALU_SLTU; // Set to one if Lower Than Immediate Unsigned + 3'b100: alu_operator_o = ALU_XOR; // Exclusive Or with Immediate + 3'b110: alu_operator_o = ALU_OR; // Or with Immediate + 3'b111: alu_operator_o = ALU_AND; // And with Immediate + + 3'b001: begin + if (RV32B != RV32BNone) begin + unique case (instr_alu[31:27]) + 5'b0_0000: alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate + // Shift Left Ones by Immediate + 5'b0_0100: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_SLO; + end + 5'b0_1001: alu_operator_o = ALU_BCLR; // Clear bit specified by immediate + 5'b0_0101: alu_operator_o = ALU_BSET; // Set bit specified by immediate + 5'b0_1101: alu_operator_o = ALU_BINV; // Invert bit specified by immediate. + // Shuffle with Immediate Control Value + 5'b0_0001: if (instr_alu[26] == 0) alu_operator_o = ALU_SHFL; + 5'b0_1100: begin + unique case (instr_alu[26:20]) + 7'b000_0000: alu_operator_o = ALU_CLZ; // clz + 7'b000_0001: alu_operator_o = ALU_CTZ; // ctz + 7'b000_0010: alu_operator_o = ALU_CPOP; // cpop + 7'b000_0100: alu_operator_o = ALU_SEXTB; // sext.b + 7'b000_0101: alu_operator_o = ALU_SEXTH; // sext.h + 7'b001_0000: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin + alu_operator_o = ALU_CRC32_B; // crc32.b + alu_multicycle_o = 1'b1; + end + end + 7'b001_0001: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin + alu_operator_o = ALU_CRC32_H; // crc32.h + alu_multicycle_o = 1'b1; + end + end + 7'b001_0010: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin + alu_operator_o = ALU_CRC32_W; // crc32.w + alu_multicycle_o = 1'b1; + end + end + 7'b001_1000: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin + alu_operator_o = ALU_CRC32C_B; // crc32c.b + alu_multicycle_o = 1'b1; + end + end + 7'b001_1001: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin + alu_operator_o = ALU_CRC32C_H; // crc32c.h + alu_multicycle_o = 1'b1; + end + end + 7'b001_1010: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin + alu_operator_o = ALU_CRC32C_W; // crc32c.w + alu_multicycle_o = 1'b1; + end + end + default: ; + endcase + end + + default: ; + endcase + end else begin + alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate + end + end + + 3'b101: begin + if (RV32B != RV32BNone) begin + if (instr_alu[26] == 1'b1) begin + alu_operator_o = ALU_FSR; + alu_multicycle_o = 1'b1; + if (instr_first_cycle_i) begin + use_rs3_d = 1'b1; + end else begin + use_rs3_d = 1'b0; + end + end else begin + unique case (instr_alu[31:27]) + 5'b0_0000: alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate + 5'b0_1000: alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate + // Shift Right Ones by Immediate + 5'b0_0100: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_SRO; + end + 5'b0_1001: alu_operator_o = ALU_BEXT; // Extract bit specified by immediate. + 5'b0_1100: begin + alu_operator_o = ALU_ROR; // Rotate Right by Immediate + alu_multicycle_o = 1'b1; + end + 5'b0_1101: alu_operator_o = ALU_GREV; // General Reverse with Imm Control Val + 5'b0_0101: alu_operator_o = ALU_GORC; // General Or-combine with Imm Control Val + // Unshuffle with Immediate Control Value + 5'b0_0001: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin + if (instr_alu[26] == 1'b0) alu_operator_o = ALU_UNSHFL; + end + end + default: ; + endcase + end + + end else begin + if (instr_alu[31:27] == 5'b0_0000) begin + alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate + end else if (instr_alu[31:27] == 5'b0_1000) begin + alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate + end + end + end + + default: ; + endcase + end + + OPCODE_OP: begin // Register-Register ALU operation + alu_op_a_mux_sel_o = OP_A_REG_A; + alu_op_b_mux_sel_o = OP_B_REG_B; + + if (instr_alu[26]) begin + if (RV32B != RV32BNone) begin + unique case ({instr_alu[26:25], instr_alu[14:12]}) + {2'b11, 3'b001}: begin + alu_operator_o = ALU_CMIX; // cmix + alu_multicycle_o = 1'b1; + if (instr_first_cycle_i) begin + use_rs3_d = 1'b1; + end else begin + use_rs3_d = 1'b0; + end + end + {2'b11, 3'b101}: begin + alu_operator_o = ALU_CMOV; // cmov + alu_multicycle_o = 1'b1; + if (instr_first_cycle_i) begin + use_rs3_d = 1'b1; + end else begin + use_rs3_d = 1'b0; + end + end + {2'b10, 3'b001}: begin + alu_operator_o = ALU_FSL; // fsl + alu_multicycle_o = 1'b1; + if (instr_first_cycle_i) begin + use_rs3_d = 1'b1; + end else begin + use_rs3_d = 1'b0; + end + end + {2'b10, 3'b101}: begin + alu_operator_o = ALU_FSR; // fsr + alu_multicycle_o = 1'b1; + if (instr_first_cycle_i) begin + use_rs3_d = 1'b1; + end else begin + use_rs3_d = 1'b0; + end + end + default: ; + endcase + end + end else begin + unique case ({instr_alu[31:25], instr_alu[14:12]}) + // RV32I ALU operations + {7'b000_0000, 3'b000}: alu_operator_o = ALU_ADD; // Add + {7'b010_0000, 3'b000}: alu_operator_o = ALU_SUB; // Sub + {7'b000_0000, 3'b010}: alu_operator_o = ALU_SLT; // Set Lower Than + {7'b000_0000, 3'b011}: alu_operator_o = ALU_SLTU; // Set Lower Than Unsigned + {7'b000_0000, 3'b100}: alu_operator_o = ALU_XOR; // Xor + {7'b000_0000, 3'b110}: alu_operator_o = ALU_OR; // Or + {7'b000_0000, 3'b111}: alu_operator_o = ALU_AND; // And + {7'b000_0000, 3'b001}: alu_operator_o = ALU_SLL; // Shift Left Logical + {7'b000_0000, 3'b101}: alu_operator_o = ALU_SRL; // Shift Right Logical + {7'b010_0000, 3'b101}: alu_operator_o = ALU_SRA; // Shift Right Arithmetic + + // RV32B ALU Operations + {7'b011_0000, 3'b001}: begin + if (RV32B != RV32BNone) begin + alu_operator_o = ALU_ROL; + alu_multicycle_o = 1'b1; + end + end + {7'b011_0000, 3'b101}: begin + if (RV32B != RV32BNone) begin + alu_operator_o = ALU_ROR; + alu_multicycle_o = 1'b1; + end + end + + {7'b000_0101, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_MIN; + {7'b000_0101, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_MAX; + {7'b000_0101, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_MINU; + {7'b000_0101, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_MAXU; + + {7'b000_0100, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACK; + {7'b010_0100, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACKU; + {7'b000_0100, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACKH; + + {7'b010_0000, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_XNOR; + {7'b010_0000, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_ORN; + {7'b010_0000, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_ANDN; + + // RV32B zba + {7'b001_0000, 3'b010}: if (RV32B != RV32BNone) alu_operator_o = ALU_SH1ADD; + {7'b001_0000, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_SH2ADD; + {7'b001_0000, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_SH3ADD; + + // RV32B zbs + {7'b010_0100, 3'b001}: if (RV32B != RV32BNone) alu_operator_o = ALU_BCLR; + {7'b001_0100, 3'b001}: if (RV32B != RV32BNone) alu_operator_o = ALU_BSET; + {7'b011_0100, 3'b001}: if (RV32B != RV32BNone) alu_operator_o = ALU_BINV; + {7'b010_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_BEXT; + + // RV32B zbf + {7'b010_0100, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_BFP; + + // RV32B zbp + {7'b011_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GREV; + {7'b001_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GORC; + {7'b000_0100, 3'b001}: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_SHFL; + end + {7'b000_0100, 3'b101}: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_UNSHFL; + end + {7'b001_0100, 3'b010}: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_XPERM_N; + end + {7'b001_0100, 3'b100}: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_XPERM_B; + end + {7'b001_0100, 3'b110}: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_XPERM_H; + end + {7'b001_0000, 3'b001}: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_SLO; + end + {7'b001_0000, 3'b101}: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_SRO; + end + + // RV32B zbc + {7'b000_0101, 3'b001}: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_CLMUL; + end + {7'b000_0101, 3'b010}: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_CLMULR; + end + {7'b000_0101, 3'b011}: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_CLMULH; + end + + // RV32B zbe + {7'b010_0100, 3'b110}: begin + if (RV32B == RV32BFull) begin + alu_operator_o = ALU_BDECOMPRESS; + alu_multicycle_o = 1'b1; + end + end + {7'b000_0100, 3'b110}: begin + if (RV32B == RV32BFull) begin + alu_operator_o = ALU_BCOMPRESS; + alu_multicycle_o = 1'b1; + end + end + + // RV32M instructions, all use the same ALU operation + {7'b000_0001, 3'b000}: begin // mul + alu_operator_o = ALU_ADD; + mult_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1; + end + {7'b000_0001, 3'b001}: begin // mulh + alu_operator_o = ALU_ADD; + mult_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1; + end + {7'b000_0001, 3'b010}: begin // mulhsu + alu_operator_o = ALU_ADD; + mult_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1; + end + {7'b000_0001, 3'b011}: begin // mulhu + alu_operator_o = ALU_ADD; + mult_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1; + end + {7'b000_0001, 3'b100}: begin // div + alu_operator_o = ALU_ADD; + div_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1; + end + {7'b000_0001, 3'b101}: begin // divu + alu_operator_o = ALU_ADD; + div_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1; + end + {7'b000_0001, 3'b110}: begin // rem + alu_operator_o = ALU_ADD; + div_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1; + end + {7'b000_0001, 3'b111}: begin // remu + alu_operator_o = ALU_ADD; + div_sel_o = (RV32M == RV32MNone) ? 1'b0 : 1'b1; + end + + default: ; + endcase + end + end + + ///////////// + // Special // + ///////////// + + OPCODE_MISC_MEM: begin + unique case (instr_alu[14:12]) + 3'b000: begin + // FENCE is treated as a NOP since all memory operations are already strictly ordered. + alu_operator_o = ALU_ADD; // nop + alu_op_a_mux_sel_o = OP_A_REG_A; + alu_op_b_mux_sel_o = OP_B_IMM; + end + 3'b001: begin + // FENCE.I will flush the IF stage, prefetch buffer and ICache if present. + if (BranchTargetALU) begin + bt_a_mux_sel_o = OP_A_CURRPC; + bt_b_mux_sel_o = IMM_B_INCR_PC; + end else begin + alu_op_a_mux_sel_o = OP_A_CURRPC; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMM_B_INCR_PC; + alu_operator_o = ALU_ADD; + end + end + default: ; + endcase + end + + OPCODE_SYSTEM: begin + if (instr_alu[14:12] == 3'b000) begin + // non CSR related SYSTEM instructions + alu_op_a_mux_sel_o = OP_A_REG_A; + alu_op_b_mux_sel_o = OP_B_IMM; + end else begin + // instruction to read/modify CSR + alu_op_b_mux_sel_o = OP_B_IMM; + imm_a_mux_sel_o = IMM_A_Z; + imm_b_mux_sel_o = IMM_B_I; // CSR address is encoded in I imm + + if (instr_alu[14]) begin + // rs1 field is used as immediate + alu_op_a_mux_sel_o = OP_A_IMM; + end else begin + alu_op_a_mux_sel_o = OP_A_REG_A; + end + end + + end + default: ; + endcase + end + + // do not enable multdiv in case of illegal instruction exceptions + assign mult_en_o = illegal_insn_o ? 1'b0 : mult_sel_o; + assign div_en_o = illegal_insn_o ? 1'b0 : div_sel_o; + + // make sure instructions accessing non-available registers in RV32E cause illegal + // instruction exceptions + assign illegal_insn_o = illegal_insn | illegal_reg_rv32e | illegal_reg_cheri; + + // do not propgate regfile write enable if non-available registers are accessed in RV32E + assign rf_we_o = rf_we & ~illegal_reg_rv32e & ~illegal_reg_cheri; + + // Not all bits are used + assign unused_instr_alu = {instr_alu[19:15],instr_alu[11:7]}; + + assign instr_is_legal_cheri_o = instr_is_legal_cheri & ~illegal_reg_cheri; + + // cheri decoder + if (CHERIoTEn) begin : gen_cheri_decoder + cheri_decoder # ( + .CheriPPLBC (CheriPPLBC), + .CheriSBND2 (CheriSBND2) + ) u_cheri_decoder ( + .cheri_opcode_en_i (cheri_opcode_en), + .cheri_tsafe_en_i (cheri_tsafe_en_i), + .cheri_auipcc_en_i (cheri_auipcc_en), + .cheri_auicgp_en_i (cheri_auicgp_en), + .cheri_jalr_en_i (cheri_jalr_en), + .cheri_jal_en_i (cheri_jal_en), + .cheri_cload_en_i (cheri_cload_en), + .cheri_cstore_en_i (cheri_cstore_en), + .instr_rdata_i (instr_rdata_i), + .instr_is_cheri_o (instr_is_cheri_o), + .instr_is_legal_cheri_o (instr_is_legal_cheri), + .cheri_imm12_o (cheri_imm12_o), + .cheri_imm20_o (cheri_imm20_o), + .cheri_imm21_o (cheri_imm21_o), + .cheri_operator_o (cheri_operator_o), + .cheri_cs2_dec_o (cheri_cs2_dec_o), + .cheri_rf_ren_a_o (cheri_rf_ren_a), + .cheri_rf_ren_b_o (cheri_rf_ren_b), + .cheri_rf_we_dec_o (cheri_rf_we_dec), + .cheri_multicycle_dec_o (cheri_multicycle_dec_o) + ); + end else begin + assign instr_is_cheri_o = 1'b0; + assign instr_is_legal_cheri = 1'b0; + assign cheri_imm12_o = 12'h0; + assign cheri_imm20_o = 20'h0; + assign cheri_imm21_o = 21'h0; + assign cheri_operator_o = 'h0; + assign cheri_cs2_dec_o = 1'b0; + assign cheri_rf_ren_a = 1'b0; + assign cheri_rf_ren_b = 1'b0; + assign cheri_rf_we_dec = 1'b0; + assign cheri_multicycle_dec_o = 1'b0; + + end + + ////////////////a + // Assertions // + //////////////// + + // Selectors must be known/valid. + `ASSERT(IbexRegImmAluOpKnown, (opcode == OPCODE_OP_IMM) |-> + !$isunknown(instr[14:12])) +endmodule // controller
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_dummy_instr.sv b/hw/ip/cheriot-ibex/rtl/cheriot_dummy_instr.sv new file mode 100644 index 0000000..897172d --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_dummy_instr.sv
@@ -0,0 +1,149 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Dummy instruction module + * + * Provides pseudo-randomly inserted fake instructions for secure code obfuscation + */ + +module cheriot_dummy_instr import cheriot_pkg::*; #( + parameter lfsr_seed_t RndCnstLfsrSeed = RndCnstLfsrSeedDefault, + parameter lfsr_perm_t RndCnstLfsrPerm = RndCnstLfsrPermDefault +) ( + // Clock and reset + input logic clk_i, + input logic rst_ni, + + // Interface to CSRs + input logic dummy_instr_en_i, + input logic [2:0] dummy_instr_mask_i, + input logic dummy_instr_seed_en_i, + input logic [31:0] dummy_instr_seed_i, + + // Interface to IF stage + input logic fetch_valid_i, + input logic id_in_ready_i, + output logic insert_dummy_instr_o, + output logic [31:0] dummy_instr_data_o +); + + localparam int unsigned TIMEOUT_CNT_W = 5; + localparam int unsigned OP_W = 5; + + typedef enum logic [1:0] { + DUMMY_ADD = 2'b00, + DUMMY_MUL = 2'b01, + DUMMY_DIV = 2'b10, + DUMMY_AND = 2'b11 + } dummy_instr_e; + + typedef struct packed { + dummy_instr_e instr_type; + logic [OP_W-1:0] op_b; + logic [OP_W-1:0] op_a; + logic [TIMEOUT_CNT_W-1:0] cnt; + } lfsr_data_t; + localparam int unsigned LFSR_OUT_W = $bits(lfsr_data_t); + + lfsr_data_t lfsr_data; + logic [TIMEOUT_CNT_W-1:0] dummy_cnt_incr, dummy_cnt_threshold; + logic [TIMEOUT_CNT_W-1:0] dummy_cnt_d, dummy_cnt_q; + logic dummy_cnt_en; + logic lfsr_en; + logic [LFSR_OUT_W-1:0] lfsr_state; + logic insert_dummy_instr; + logic [6:0] dummy_set; + logic [2:0] dummy_opcode; + logic [31:0] dummy_instr; + logic [31:0] dummy_instr_seed_q, dummy_instr_seed_d; + + // Shift the LFSR every time we insert an instruction + assign lfsr_en = insert_dummy_instr & id_in_ready_i; + + assign dummy_instr_seed_d = dummy_instr_seed_q ^ dummy_instr_seed_i; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + dummy_instr_seed_q <= '0; + end else if (dummy_instr_seed_en_i) begin + dummy_instr_seed_q <= dummy_instr_seed_d; + end + end + + prim_lfsr #( + .LfsrDw ( LfsrWidth ), + .StateOutDw ( LFSR_OUT_W ), + .DefaultSeed ( RndCnstLfsrSeed ), + .StatePermEn ( 1'b1 ), + .StatePerm ( RndCnstLfsrPerm ) + ) lfsr_i ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .seed_en_i ( dummy_instr_seed_en_i ), + .seed_i ( dummy_instr_seed_d ), + .lfsr_en_i ( lfsr_en ), + .entropy_i ( '0 ), + .state_o ( lfsr_state ) + ); + + // Extract fields from LFSR + assign lfsr_data = lfsr_data_t'(lfsr_state); + + // Set count threshold for inserting a new instruction. This is the pseudo-random value from the + // LFSR with a mask applied (based on CSR config data) to shorten the period if required. + assign dummy_cnt_threshold = lfsr_data.cnt & {dummy_instr_mask_i,{TIMEOUT_CNT_W-3{1'b1}}}; + assign dummy_cnt_incr = dummy_cnt_q + {{TIMEOUT_CNT_W-1{1'b0}},1'b1}; + // Clear the counter everytime a new instruction is inserted + assign dummy_cnt_d = insert_dummy_instr ? '0 : dummy_cnt_incr; + // Increment the counter for each executed instruction while dummy instuctions are + // enabled. + assign dummy_cnt_en = dummy_instr_en_i & id_in_ready_i & + (fetch_valid_i | insert_dummy_instr); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + dummy_cnt_q <= '0; + end else if (dummy_cnt_en) begin + dummy_cnt_q <= dummy_cnt_d; + end + end + + // Insert a dummy instruction each time the counter hits the threshold + assign insert_dummy_instr = dummy_instr_en_i & (dummy_cnt_q == dummy_cnt_threshold); + + // Encode instruction + always_comb begin + unique case (lfsr_data.instr_type) + DUMMY_ADD: begin + dummy_set = 7'b0000000; + dummy_opcode = 3'b000; + end + DUMMY_MUL: begin + dummy_set = 7'b0000001; + dummy_opcode = 3'b000; + end + DUMMY_DIV: begin + dummy_set = 7'b0000001; + dummy_opcode = 3'b100; + end + DUMMY_AND: begin + dummy_set = 7'b0000000; + dummy_opcode = 3'b111; + end + default: begin + dummy_set = 7'b0000000; + dummy_opcode = 3'b000; + end + endcase + end + + // SET RS2 RS1 OP RD + assign dummy_instr = {dummy_set, lfsr_data.op_b, lfsr_data.op_a, dummy_opcode, 5'h00, 7'h33}; + + // Assign outputs + assign insert_dummy_instr_o = insert_dummy_instr; + assign dummy_instr_data_o = dummy_instr; + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_ex_block.sv b/hw/ip/cheriot-ibex/rtl/cheriot_ex_block.sv new file mode 100644 index 0000000..8eb30a5 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_ex_block.sv
@@ -0,0 +1,199 @@ +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Execution stage + * + * Execution block: Hosts ALU and MUL/DIV unit + */ +module cheriot_ex_block #( + parameter cheriot_pkg::rv32m_e RV32M = cheriot_pkg::RV32MFast, + parameter cheriot_pkg::rv32b_e RV32B = cheriot_pkg::RV32BNone, + parameter bit BranchTargetALU = 0 +) ( + input logic clk_i, + input logic rst_ni, + + // ALU + input cheriot_pkg::alu_op_e alu_operator_i, + input logic [31:0] alu_operand_a_i, + input logic [31:0] alu_operand_b_i, + input logic alu_instr_first_cycle_i, + + // Branch Target ALU + // All of these signals are unusued when BranchTargetALU == 0 + input logic [31:0] bt_a_operand_i, + input logic [31:0] bt_b_operand_i, + + // Multiplier/Divider + input cheriot_pkg::md_op_e multdiv_operator_i, + input logic mult_en_i, // dynamic enable signal, for FSM control + input logic div_en_i, // dynamic enable signal, for FSM control + input logic mult_sel_i, // static decoder output, for data muxes + input logic div_sel_i, // static decoder output, for data muxes + input logic [1:0] multdiv_signed_mode_i, + input logic [31:0] multdiv_operand_a_i, + input logic [31:0] multdiv_operand_b_i, + input logic multdiv_ready_id_i, + input logic data_ind_timing_i, + + // intermediate val reg + output logic [1:0] imd_val_we_o, + output logic [33:0] imd_val_d_o[2], + input logic [33:0] imd_val_q_i[2], + + // Outputs + output logic [31:0] alu_adder_result_ex_o, // to LSU + output logic [31:0] result_ex_o, + output logic [31:0] branch_target_o, // to IF + output logic branch_decision_o, // to ID + + output logic ex_valid_o // EX has valid output +); + + import cheriot_pkg::*; + + logic [31:0] alu_result, multdiv_result; + + logic [32:0] multdiv_alu_operand_b, multdiv_alu_operand_a; + logic [33:0] alu_adder_result_ext; + logic alu_cmp_result, alu_is_equal_result; + logic multdiv_valid; + logic multdiv_sel; + logic [31:0] alu_imd_val_q[2]; + logic [31:0] alu_imd_val_d[2]; + logic [ 1:0] alu_imd_val_we; + logic [33:0] multdiv_imd_val_d[2]; + logic [ 1:0] multdiv_imd_val_we; + + /* + The multdiv_i output is never selected if RV32M=RV32MNone + At synthesis time, all the combinational and sequential logic + from the multdiv_i module are eliminated + */ + if (RV32M != RV32MNone) begin : gen_multdiv_m + assign multdiv_sel = mult_sel_i | div_sel_i; + end else begin : gen_multdiv_no_m + assign multdiv_sel = 1'b0; + end + + // Intermediate Value Register Mux + assign imd_val_d_o[0] = multdiv_sel ? multdiv_imd_val_d[0] : {2'b0, alu_imd_val_d[0]}; + assign imd_val_d_o[1] = multdiv_sel ? multdiv_imd_val_d[1] : {2'b0, alu_imd_val_d[1]}; + assign imd_val_we_o = multdiv_sel ? multdiv_imd_val_we : alu_imd_val_we; + + assign alu_imd_val_q = '{imd_val_q_i[0][31:0], imd_val_q_i[1][31:0]}; + + assign result_ex_o = multdiv_sel ? multdiv_result : alu_result; + + // branch handling + assign branch_decision_o = alu_cmp_result; + + if (BranchTargetALU) begin : g_branch_target_alu + logic [32:0] bt_alu_result; + logic unused_bt_carry; + + assign bt_alu_result = bt_a_operand_i + bt_b_operand_i; + + assign unused_bt_carry = bt_alu_result[32]; + assign branch_target_o = bt_alu_result[31:0]; + end else begin : g_no_branch_target_alu + // Unused bt_operand signals cause lint errors, this avoids them + logic [31:0] unused_bt_a_operand, unused_bt_b_operand; + + assign unused_bt_a_operand = bt_a_operand_i; + assign unused_bt_b_operand = bt_b_operand_i; + + assign branch_target_o = alu_adder_result_ex_o; + end + + ///////// + // ALU // + ///////// + + cheriot_alu #( + .RV32B(RV32B) + ) alu_i ( + .operator_i (alu_operator_i), + .operand_a_i (alu_operand_a_i), + .operand_b_i (alu_operand_b_i), + .instr_first_cycle_i(alu_instr_first_cycle_i), + .imd_val_q_i (alu_imd_val_q), + .imd_val_we_o (alu_imd_val_we), + .imd_val_d_o (alu_imd_val_d), + .multdiv_operand_a_i(multdiv_alu_operand_a), + .multdiv_operand_b_i(multdiv_alu_operand_b), + .multdiv_sel_i (multdiv_sel), + .adder_result_o (alu_adder_result_ex_o), + .adder_result_ext_o (alu_adder_result_ext), + .result_o (alu_result), + .comparison_result_o(alu_cmp_result), + .is_equal_result_o (alu_is_equal_result) + ); + + //////////////// + // Multiplier // + //////////////// + + if (RV32M == RV32MSlow) begin : gen_multdiv_slow + cheriot_multdiv_slow multdiv_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .mult_en_i (mult_en_i), + .div_en_i (div_en_i), + .mult_sel_i (mult_sel_i), + .div_sel_i (div_sel_i), + .operator_i (multdiv_operator_i), + .signed_mode_i (multdiv_signed_mode_i), + .op_a_i (multdiv_operand_a_i), + .op_b_i (multdiv_operand_b_i), + .alu_adder_ext_i (alu_adder_result_ext), + .alu_adder_i (alu_adder_result_ex_o), + .equal_to_zero_i (alu_is_equal_result), + .data_ind_timing_i (data_ind_timing_i), + .valid_o (multdiv_valid), + .alu_operand_a_o (multdiv_alu_operand_a), + .alu_operand_b_o (multdiv_alu_operand_b), + .imd_val_q_i (imd_val_q_i), + .imd_val_d_o (multdiv_imd_val_d), + .imd_val_we_o (multdiv_imd_val_we), + .multdiv_ready_id_i(multdiv_ready_id_i), + .multdiv_result_o (multdiv_result) + ); + end else if (RV32M == RV32MFast || RV32M == RV32MSingleCycle) begin : gen_multdiv_fast + cheriot_multdiv_fast #( + .RV32M(RV32M) + ) multdiv_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .mult_en_i (mult_en_i), + .div_en_i (div_en_i), + .mult_sel_i (mult_sel_i), + .div_sel_i (div_sel_i), + .operator_i (multdiv_operator_i), + .signed_mode_i (multdiv_signed_mode_i), + .op_a_i (multdiv_operand_a_i), + .op_b_i (multdiv_operand_b_i), + .alu_operand_a_o (multdiv_alu_operand_a), + .alu_operand_b_o (multdiv_alu_operand_b), + .alu_adder_ext_i (alu_adder_result_ext), + .alu_adder_i (alu_adder_result_ex_o), + .equal_to_zero_i (alu_is_equal_result), + .data_ind_timing_i (data_ind_timing_i), + .imd_val_q_i (imd_val_q_i), + .imd_val_d_o (multdiv_imd_val_d), + .imd_val_we_o (multdiv_imd_val_we), + .multdiv_ready_id_i(multdiv_ready_id_i), + .valid_o (multdiv_valid), + .multdiv_result_o (multdiv_result) + ); + end + + // Multiplier/divider may require multiple cycles. The ALU output is valid in the same cycle + // unless the intermediate result register is being written (which indicates this isn't the + // final cycle of ALU operation). + assign ex_valid_o = multdiv_sel ? multdiv_valid : ~(|alu_imd_val_we); + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_fetch_fifo.sv b/hw/ip/cheriot-ibex/rtl/cheriot_fetch_fifo.sv new file mode 100644 index 0000000..463a9ec --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_fetch_fifo.sv
@@ -0,0 +1,298 @@ +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Fetch Fifo for 32 bit memory interface + * + * input port: send address and data to the FIFO + * clear_i clears the FIFO for the following cycle, including any new request + */ + +`include "prim_assert.sv" + +module cheriot_fetch_fifo #( + parameter int unsigned NUM_REQS = 2, + parameter bit ResetAll = 1'b0 +) ( + input logic clk_i, + input logic rst_ni, + + // control signals + input logic clear_i, // clears the contents of the FIFO + output logic [NUM_REQS-1:0] busy_o, + + // input port + input logic in_valid_i, + input logic [31:0] in_addr_i, + input logic [31:0] in_rdata_i, + input logic in_err_i, + + input logic cheri_force_uc_i, // force unaligned compressed based on CHERI bounds check + + // output port + output logic out_valid_o, + input logic out_ready_i, + output logic [31:0] out_addr_o, + output logic [31:0] out_rdata_o, + output logic out_err_o, + output logic out_err_plus2_o +); + + localparam int unsigned DEPTH = NUM_REQS+1; + + // index 0 is used for output + logic [DEPTH-1:0] [31:0] rdata_d, rdata_q; + logic [DEPTH-1:0] err_d, err_q; + logic [DEPTH-1:0] valid_d, valid_q; + logic [DEPTH-1:0] lowest_free_entry; + logic [DEPTH-1:0] valid_pushed, valid_popped; + logic [DEPTH-1:0] entry_en; + + logic pop_fifo; + logic [31:0] rdata, rdata_unaligned; + logic err, err_unaligned, err_plus2; + logic valid, valid_unaligned; + + logic aligned_is_compressed, unaligned_is_compressed; + + logic addr_incr_two; + logic [31:1] instr_addr_next; + logic [31:1] instr_addr_d, instr_addr_q; + logic instr_addr_en; + logic unused_addr_in; + + ///////////////// + // Output port // + ///////////////// + + assign rdata = valid_q[0] ? rdata_q[0] : in_rdata_i; + assign err = valid_q[0] ? err_q[0] : in_err_i; + assign valid = valid_q[0] | in_valid_i; + + // The FIFO contains word aligned memory fetches, but the instructions contained in each entry + // might be half-word aligned (due to compressed instructions) + // e.g. + // | 31 16 | 15 0 | + // FIFO entry 0 | Instr 1 [15:0] | Instr 0 [15:0] | + // FIFO entry 1 | Instr 2 [15:0] | Instr 1 [31:16] | + // + // The FIFO also has a direct bypass path, so a complete instruction might be made up of data + // from the FIFO and new incoming data. + // + + // Construct the output data for an unaligned instruction + assign rdata_unaligned = valid_q[1] ? {rdata_q[1][15:0], rdata[31:16]} : + {in_rdata_i[15:0], rdata[31:16]}; + + // If entry[1] is valid, an error can come from entry[0] or entry[1], unless the + // instruction in entry[0] is compressed (entry[1] is a new instruction) + // If entry[1] is not valid, and entry[0] is, an error can come from entry[0] or the incoming + // data, unless the instruction in entry[0] is compressed + // If entry[0] is not valid, the error must come from the incoming data + assign err_unaligned = valid_q[1] ? ((err_q[1] & ~unaligned_is_compressed) | err_q[0]) : + ((valid_q[0] & err_q[0]) | + (in_err_i & (~valid_q[0] | ~unaligned_is_compressed))); + + // Record when an error is caused by the second half of an unaligned 32bit instruction. + // Only needs to be correct when unaligned and if err_unaligned is set + assign err_plus2 = valid_q[1] ? (err_q[1] & ~err_q[0]) : + (in_err_i & valid_q[0] & ~err_q[0]); + + // An uncompressed unaligned instruction is only valid if both parts are available + assign valid_unaligned = valid_q[1] ? 1'b1 : + (valid_q[0] & in_valid_i); + + // If there is an error, rdata is unknown +`ifdef DII_SIM + logic [31:0] instr_rdata_dii; + logic [31:0] instr_pc; + logic instr_ack; + + // for DII we directly force out_rdata_o (re-aligned instruction) + // to keep the unaligned/aligned_is_compressed signals in sync + // 32-bit instruction; instr_rdata_dii[31:0] = instr + // 16-bit instruction: instr_rdata_dii[15:0] = compressed instruction + // instr_rdata_dii[31:0] = don't care + + assign unaligned_is_compressed = out_addr_o[1] & cheri_force_uc_i | ((instr_rdata_dii[1:0] != 2'b11) & ~err); + assign aligned_is_compressed = ~out_addr_o[1] & (instr_rdata_dii[1:0] != 2'b11) & ~err; + + assign instr_ack = out_ready_i & out_valid_o; + assign instr_pc = out_addr_o; +`else + assign unaligned_is_compressed = cheri_force_uc_i | ((rdata[17:16] != 2'b11) & ~err); + assign aligned_is_compressed = (rdata[ 1: 0] != 2'b11) & ~err; +`endif + + //////////////////////////////////////// + // Instruction aligner (if unaligned) // + //////////////////////////////////////// + + always_comb begin + if (out_addr_o[1]) begin + // unaligned case + +`ifdef DII_SIM + out_rdata_o = instr_rdata_dii; +`else + out_rdata_o = rdata_unaligned; +`endif + out_err_o = err_unaligned; + out_err_plus2_o = err_plus2; + + if (unaligned_is_compressed) begin + out_valid_o = valid; + end else begin + out_valid_o = valid_unaligned; + end + end else begin + // aligned case +`ifdef DII_SIM + out_rdata_o = instr_rdata_dii; +`else + out_rdata_o = rdata; +`endif + out_err_o = err; + out_err_plus2_o = 1'b0; + out_valid_o = valid; + end + end + + ///////////////////////// + // Instruction address // + ///////////////////////// + + // Update the address on branches and every time an instruction is driven + assign instr_addr_en = clear_i | (out_ready_i & out_valid_o); + + // Increment the address by two every time a compressed instruction is popped + assign addr_incr_two = instr_addr_q[1] ? unaligned_is_compressed : + aligned_is_compressed; + + assign instr_addr_next = (instr_addr_q[31:1] + + // Increment address by 4 or 2 + {29'd0,~addr_incr_two,addr_incr_two}); + + assign instr_addr_d = clear_i ? in_addr_i[31:1] : + instr_addr_next; + + if (ResetAll) begin : g_instr_addr_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + instr_addr_q <= '0; + end else if (instr_addr_en) begin + instr_addr_q <= instr_addr_d; + end + end + end else begin : g_instr_addr_nr + always_ff @(posedge clk_i) begin + if (instr_addr_en) begin + instr_addr_q <= instr_addr_d; + end + end + end + + // Output PC of current instruction + assign out_addr_o = {instr_addr_q, 1'b0}; + + // The LSB of the address is unused, since all addresses are halfword aligned + assign unused_addr_in = in_addr_i[0]; + + ///////////////// + // FIFO status // + ///////////////// + + // Indicate the fill level of fifo-entries. This is used to determine when a new request can be + // made on the bus. The prefetch buffer only needs to know about the upper entries which overlap + // with NUM_REQS. + assign busy_o = valid_q[DEPTH-1:DEPTH-NUM_REQS]; + + ///////////////////// + // FIFO management // + ///////////////////// + + // Since an entry can contain unaligned instructions, popping an entry can leave the entry valid + assign pop_fifo = out_ready_i & out_valid_o & (~aligned_is_compressed | out_addr_o[1]); + + for (genvar i = 0; i < (DEPTH - 1); i++) begin : g_fifo_next + // Calculate lowest free entry (write pointer) + if (i == 0) begin : g_ent0 + assign lowest_free_entry[i] = ~valid_q[i]; + end else begin : g_ent_others + assign lowest_free_entry[i] = ~valid_q[i] & valid_q[i-1]; + end + + // An entry is set when an incoming request chooses the lowest available entry + assign valid_pushed[i] = (in_valid_i & lowest_free_entry[i]) | + valid_q[i]; + // Popping the FIFO shifts all entries down + assign valid_popped[i] = pop_fifo ? valid_pushed[i+1] : valid_pushed[i]; + // All entries are wiped out on a clear + assign valid_d[i] = valid_popped[i] & ~clear_i; + + // data flops are enabled if there is new data to shift into it, or + assign entry_en[i] = (valid_pushed[i+1] & pop_fifo) | + // a new request is incoming and this is the lowest free entry + (in_valid_i & lowest_free_entry[i] & ~pop_fifo); + + // take the next entry or the incoming data + assign rdata_d[i] = valid_q[i+1] ? rdata_q[i+1] : in_rdata_i; + assign err_d [i] = valid_q[i+1] ? err_q [i+1] : in_err_i; + end + // The top entry is similar but with simpler muxing + assign lowest_free_entry[DEPTH-1] = ~valid_q[DEPTH-1] & valid_q[DEPTH-2]; + assign valid_pushed [DEPTH-1] = valid_q[DEPTH-1] | (in_valid_i & lowest_free_entry[DEPTH-1]); + assign valid_popped [DEPTH-1] = pop_fifo ? 1'b0 : valid_pushed[DEPTH-1]; + assign valid_d [DEPTH-1] = valid_popped[DEPTH-1] & ~clear_i; + assign entry_en[DEPTH-1] = in_valid_i & lowest_free_entry[DEPTH-1]; + assign rdata_d [DEPTH-1] = in_rdata_i; + assign err_d [DEPTH-1] = in_err_i; + + //////////////////// + // FIFO registers // + //////////////////// + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + valid_q <= '0; + end else begin + valid_q <= valid_d; + end + end + + for (genvar i = 0; i < DEPTH; i++) begin : g_fifo_regs + if (ResetAll) begin : g_rdata_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rdata_q[i] <= '0; + err_q[i] <= '0; + end else if (entry_en[i]) begin + rdata_q[i] <= rdata_d[i]; + err_q[i] <= err_d[i]; + end + end + end else begin : g_rdata_nr + always_ff @(posedge clk_i) begin + if (entry_en[i]) begin + rdata_q[i] <= rdata_d[i]; + err_q[i] <= err_d[i]; + end + end + end + end + + //////////////// + // Assertions // + //////////////// + + // Must not push and pop simultaneously when FIFO full. + `ASSERT(IbexFetchFifoPushPopFull, + (in_valid_i && pop_fifo) |-> (!valid_q[DEPTH-1] || clear_i)) + + // Must not push to FIFO when full. + `ASSERT(IbexFetchFifoPushFull, + (in_valid_i) |-> (!valid_q[DEPTH-1] || clear_i)) + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_icache.sv b/hw/ip/cheriot-ibex/rtl/cheriot_icache.sv new file mode 100644 index 0000000..91ab025 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_icache.sv
@@ -0,0 +1,1155 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Instruction cache + * + * Provides an instruction cache along with cache management, instruction buffering and prefetching + */ + +`include "prim_assert.sv" + +module cheriot_icache import cheriot_pkg::*; #( + parameter bit ICacheECC = 1'b0, + parameter bit ResetAll = 1'b0, + parameter int unsigned BusSizeECC = BUS_SIZE, + parameter int unsigned TagSizeECC = IC_TAG_SIZE, + parameter int unsigned LineSizeECC = IC_LINE_SIZE, + // Only cache branch targets + parameter bit BranchCache = 1'b0 +) ( + // Clock and reset + input logic clk_i, + input logic rst_ni, + + // Signal that the core would like instructions + input logic req_i, + + // Set the cache's address counter + input logic branch_i, + input logic branch_mispredict_i, + input logic [31:0] mispredict_addr_i, + input logic [31:0] addr_i, + + // IF stage interface: Pass fetched instructions to the core + input logic ready_i, + output logic valid_o, + output logic [31:0] rdata_o, + output logic [31:0] addr_o, + output logic err_o, + output logic err_plus2_o, + + // Instruction memory / interconnect interface: Fetch instruction data from memory + output logic instr_req_o, + input logic instr_gnt_i, + output logic [31:0] instr_addr_o, + input logic [BUS_SIZE-1:0] instr_rdata_i, + input logic instr_err_i, + input logic instr_rvalid_i, + + // RAM IO + output logic [IC_NUM_WAYS-1:0] ic_tag_req_o, + output logic ic_tag_write_o, + output logic [IC_INDEX_W-1:0] ic_tag_addr_o, + output logic [TagSizeECC-1:0] ic_tag_wdata_o, + input logic [TagSizeECC-1:0] ic_tag_rdata_i [IC_NUM_WAYS], + output logic [IC_NUM_WAYS-1:0] ic_data_req_o, + output logic ic_data_write_o, + output logic [IC_INDEX_W-1:0] ic_data_addr_o, + output logic [LineSizeECC-1:0] ic_data_wdata_o, + input logic [LineSizeECC-1:0] ic_data_rdata_i [IC_NUM_WAYS], + input logic ic_scr_key_valid_i, + + // Cache status + input logic icache_enable_i, + input logic icache_inval_i, + output logic busy_o +); + + // Number of fill buffers (must be >= 2) + localparam int unsigned NUM_FB = 4; + // Request throttling threshold + localparam int unsigned FB_THRESHOLD = NUM_FB - 2; + + // Prefetch signals + logic [ADDR_W-1:0] lookup_addr_aligned; + logic [ADDR_W-1:0] prefetch_addr_d, prefetch_addr_q; + logic prefetch_addr_en; + logic branch_or_mispredict; + // Cache pipelipe IC0 signals + logic lookup_throttle; + logic lookup_req_ic0; + logic [ADDR_W-1:0] lookup_addr_ic0; + logic [IC_INDEX_W-1:0] lookup_index_ic0; + logic fill_req_ic0; + logic [IC_INDEX_W-1:0] fill_index_ic0; + logic [IC_TAG_SIZE-1:0] fill_tag_ic0; + logic [IC_LINE_SIZE-1:0] fill_wdata_ic0; + logic lookup_grant_ic0; + logic lookup_actual_ic0; + logic fill_grant_ic0; + logic tag_req_ic0; + logic [IC_INDEX_W-1:0] tag_index_ic0; + logic [IC_NUM_WAYS-1:0] tag_banks_ic0; + logic tag_write_ic0; + logic [TagSizeECC-1:0] tag_wdata_ic0; + logic data_req_ic0; + logic [IC_INDEX_W-1:0] data_index_ic0; + logic [IC_NUM_WAYS-1:0] data_banks_ic0; + logic data_write_ic0; + logic [LineSizeECC-1:0] data_wdata_ic0; + // Cache pipelipe IC1 signals + logic [TagSizeECC-1:0] tag_rdata_ic1 [IC_NUM_WAYS]; + logic [LineSizeECC-1:0] data_rdata_ic1 [IC_NUM_WAYS]; + logic [LineSizeECC-1:0] hit_data_ecc_ic1; + logic [IC_LINE_SIZE-1:0] hit_data_ic1; + logic lookup_valid_ic1; + logic [ADDR_W-1:IC_INDEX_HI+1] lookup_addr_ic1; + logic [IC_NUM_WAYS-1:0] tag_match_ic1; + logic tag_hit_ic1; + logic [IC_NUM_WAYS-1:0] tag_invalid_ic1; + logic [IC_NUM_WAYS-1:0] lowest_invalid_way_ic1; + logic [IC_NUM_WAYS-1:0] round_robin_way_ic1, round_robin_way_q; + logic [IC_NUM_WAYS-1:0] sel_way_ic1; + logic ecc_err_ic1; + logic ecc_write_req; + logic [IC_NUM_WAYS-1:0] ecc_write_ways; + logic [IC_INDEX_W-1:0] ecc_write_index; + // Fill buffer signals + logic [$clog2(NUM_FB)-1:0] fb_fill_level; + logic fill_cache_new; + logic fill_new_alloc; + logic fill_spec_req, fill_spec_done, fill_spec_hold; + logic [NUM_FB-1:0][NUM_FB-1:0] fill_older_d, fill_older_q; + logic [NUM_FB-1:0] fill_alloc_sel, fill_alloc; + logic [NUM_FB-1:0] fill_busy_d, fill_busy_q; + logic [NUM_FB-1:0] fill_done; + logic [NUM_FB-1:0] fill_in_ic1; + logic [NUM_FB-1:0] fill_stale_d, fill_stale_q; + logic [NUM_FB-1:0] fill_cache_d, fill_cache_q; + logic [NUM_FB-1:0] fill_hit_ic1, fill_hit_d, fill_hit_q; + logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_ext_cnt_d, fill_ext_cnt_q; + logic [NUM_FB-1:0] fill_ext_hold_d, fill_ext_hold_q; + logic [NUM_FB-1:0] fill_ext_done_d, fill_ext_done_q; + logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_rvd_cnt_d, fill_rvd_cnt_q; + logic [NUM_FB-1:0] fill_rvd_done; + logic [NUM_FB-1:0] fill_ram_done_d, fill_ram_done_q; + logic [NUM_FB-1:0] fill_out_grant; + logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_out_cnt_d, fill_out_cnt_q; + logic [NUM_FB-1:0] fill_out_done; + logic [NUM_FB-1:0] fill_ext_req, fill_rvd_exp, fill_ram_req, fill_out_req; + logic [NUM_FB-1:0] fill_data_sel, fill_data_reg; + logic [NUM_FB-1:0] fill_data_hit, fill_data_rvd; + logic [NUM_FB-1:0][IC_LINE_BEATS_W-1:0] fill_ext_off, fill_rvd_off; + logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_ext_beat, fill_rvd_beat; + logic [NUM_FB-1:0] fill_ext_arb, fill_ram_arb, fill_out_arb; + logic [NUM_FB-1:0] fill_rvd_arb; + logic [NUM_FB-1:0] fill_entry_en; + logic [NUM_FB-1:0] fill_addr_en; + logic [NUM_FB-1:0] fill_way_en; + logic [NUM_FB-1:0][IC_LINE_BEATS-1:0] fill_data_en; + logic [NUM_FB-1:0][IC_LINE_BEATS-1:0] fill_err_d, fill_err_q; + logic [ADDR_W-1:0] fill_addr_q [NUM_FB]; + logic [IC_NUM_WAYS-1:0] fill_way_q [NUM_FB]; + logic [IC_LINE_SIZE-1:0] fill_data_d [NUM_FB]; + logic [IC_LINE_SIZE-1:0] fill_data_q [NUM_FB]; + logic [ADDR_W-1:BUS_W] fill_ext_req_addr; + logic [ADDR_W-1:0] fill_ram_req_addr; + logic [IC_NUM_WAYS-1:0] fill_ram_req_way; + logic [IC_LINE_SIZE-1:0] fill_ram_req_data; + logic [IC_LINE_SIZE-1:0] fill_out_data; + logic [IC_LINE_BEATS-1:0] fill_out_err; + // External req signals + logic instr_req; + logic [ADDR_W-1:BUS_W] instr_addr; + // Data output signals + logic skid_complete_instr; + logic skid_ready; + logic output_compressed; + logic skid_valid_d, skid_valid_q, skid_en; + logic [15:0] skid_data_d, skid_data_q; + logic skid_err_q; + logic output_valid; + logic addr_incr_two; + logic output_addr_en; + logic [ADDR_W-1:1] output_addr_incr; + logic [ADDR_W-1:1] output_addr_d, output_addr_q; + logic [15:0] output_data_lo, output_data_hi; + logic data_valid, output_ready; + logic [IC_LINE_SIZE-1:0] line_data; + logic [IC_LINE_BEATS-1:0] line_err; + logic [31:0] line_data_muxed; + logic line_err_muxed; + logic [31:0] output_data; + logic output_err; + // Invalidations + logic start_inval, inval_done; + logic inval_lock, inval_req_d, inval_req_q; + logic reset_inval_q; + logic inval_prog_d, inval_prog_q; + logic [IC_INDEX_W-1:0] inval_index_d, inval_index_q; + + ////////////////////////// + // Instruction prefetch // + ////////////////////////// + + assign branch_or_mispredict = branch_i | branch_mispredict_i; + + assign lookup_addr_aligned = {lookup_addr_ic0[ADDR_W-1:IC_LINE_W], {IC_LINE_W{1'b0}}}; + + // The prefetch address increments by one cache line for each granted request. + // This address is also updated if there is a branch that is not granted, since the target + // address (addr_i) is only valid for one cycle while branch_i is high. + + // The captured branch target address is not forced to be aligned since the offset in the cache + // line must also be recorded for later use by the fill buffers. + assign prefetch_addr_d = + lookup_grant_ic0 ? (lookup_addr_aligned + + {{ADDR_W-IC_LINE_W-1{1'b0}}, 1'b1, {IC_LINE_W{1'b0}}}) : + branch_i ? addr_i : + mispredict_addr_i; + + assign prefetch_addr_en = branch_or_mispredict | lookup_grant_ic0; + + if (ResetAll) begin : g_prefetch_addr_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + prefetch_addr_q <= '0; + end else if (prefetch_addr_en) begin + prefetch_addr_q <= prefetch_addr_d; + end + end + end else begin : g_prefetch_addr_nr + always_ff @(posedge clk_i) begin + if (prefetch_addr_en) begin + prefetch_addr_q <= prefetch_addr_d; + end + end + end + + //////////////////////// + // Pipeline stage IC0 // + //////////////////////// + + // Cache lookup + assign lookup_throttle = (fb_fill_level > FB_THRESHOLD[$clog2(NUM_FB)-1:0]); + + assign lookup_req_ic0 = req_i & ~&fill_busy_q & (branch_or_mispredict | ~lookup_throttle) & + ~ecc_write_req; + assign lookup_addr_ic0 = branch_i ? addr_i : + branch_mispredict_i ? mispredict_addr_i : + prefetch_addr_q; + assign lookup_index_ic0 = lookup_addr_ic0[IC_INDEX_HI:IC_LINE_W]; + + // Cache write + assign fill_req_ic0 = (|fill_ram_req); + assign fill_index_ic0 = fill_ram_req_addr[IC_INDEX_HI:IC_LINE_W]; + assign fill_tag_ic0 = {(~inval_prog_q & ~ecc_write_req), + fill_ram_req_addr[ADDR_W-1:IC_INDEX_HI+1]}; + assign fill_wdata_ic0 = fill_ram_req_data; + + // Arbitrated signals - lookups have highest priority + assign lookup_grant_ic0 = lookup_req_ic0; + assign fill_grant_ic0 = fill_req_ic0 & ~lookup_req_ic0 & ~inval_prog_q & + ~ecc_write_req; + // Qualified lookup grant to mask ram signals in IC1 if access was not made + assign lookup_actual_ic0 = lookup_grant_ic0 & icache_enable_i & ~inval_prog_q & + ~icache_inval_i & ~inval_lock & ~start_inval; + + // Tagram + assign tag_req_ic0 = lookup_req_ic0 | fill_req_ic0 | inval_prog_q | ecc_write_req; + assign tag_index_ic0 = inval_prog_q ? inval_index_q : + ecc_write_req ? ecc_write_index : + fill_grant_ic0 ? fill_index_ic0 : + lookup_index_ic0; + assign tag_banks_ic0 = ecc_write_req ? ecc_write_ways : + fill_grant_ic0 ? fill_ram_req_way : + {IC_NUM_WAYS{1'b1}}; + assign tag_write_ic0 = fill_grant_ic0 | inval_prog_q | ecc_write_req; + + // Dataram + assign data_req_ic0 = lookup_req_ic0 | fill_req_ic0; + assign data_index_ic0 = tag_index_ic0; + assign data_banks_ic0 = tag_banks_ic0; + assign data_write_ic0 = tag_write_ic0; + + // Append ECC checkbits to write data if required + if (ICacheECC) begin : gen_ecc_wdata + + // Tagram ECC + // Reuse the same ecc encoding module for larger cache sizes by padding with zeros + logic [21:0] tag_ecc_input_padded; + logic [27:0] tag_ecc_output_padded; + logic [22-IC_TAG_SIZE:0] unused_tag_ecc_output; + + assign tag_ecc_input_padded = {{22-IC_TAG_SIZE{1'b0}},fill_tag_ic0}; + assign unused_tag_ecc_output = tag_ecc_output_padded[21:IC_TAG_SIZE-1]; + + prim_secded_inv_28_22_enc tag_ecc_enc ( + .data_i (tag_ecc_input_padded), + .data_o (tag_ecc_output_padded) + ); + + assign tag_wdata_ic0 = {tag_ecc_output_padded[27:22],tag_ecc_output_padded[IC_TAG_SIZE-1:0]}; + + // Dataram ECC + for (genvar bank = 0; bank < IC_LINE_BEATS; bank++) begin : gen_ecc_banks + prim_secded_inv_39_32_enc data_ecc_enc ( + .data_i (fill_wdata_ic0[bank*BUS_SIZE+:BUS_SIZE]), + .data_o (data_wdata_ic0[bank*BusSizeECC+:BusSizeECC]) + ); + end + + end else begin : gen_noecc_wdata + assign tag_wdata_ic0 = fill_tag_ic0; + assign data_wdata_ic0 = fill_wdata_ic0; + end + + //////////////// + // IC0 -> IC1 // + //////////////// + + // Tag RAMs outputs + assign ic_tag_req_o = {IC_NUM_WAYS{tag_req_ic0}} & tag_banks_ic0; + assign ic_tag_write_o = tag_write_ic0; + assign ic_tag_addr_o = tag_index_ic0; + assign ic_tag_wdata_o = tag_wdata_ic0; + + // Tag RAMs inputs + assign tag_rdata_ic1 = ic_tag_rdata_i; + + // Data RAMs outputs + assign ic_data_req_o = {IC_NUM_WAYS{data_req_ic0}} & data_banks_ic0; + assign ic_data_write_o = data_write_ic0; + assign ic_data_addr_o = data_index_ic0; + assign ic_data_wdata_o = data_wdata_ic0; + + // Data RAMs inputs + assign data_rdata_ic1 = ic_data_rdata_i; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + lookup_valid_ic1 <= 1'b0; + end else begin + lookup_valid_ic1 <= lookup_actual_ic0; + end + end + + if (ResetAll) begin : g_lookup_addr_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + lookup_addr_ic1 <= '0; + fill_in_ic1 <= '0; + end else if (lookup_grant_ic0) begin + lookup_addr_ic1 <= lookup_addr_ic0[ADDR_W-1:IC_INDEX_HI+1]; + fill_in_ic1 <= fill_alloc_sel; + end + end + end else begin : g_lookup_addr_nr + always_ff @(posedge clk_i) begin + if (lookup_grant_ic0) begin + lookup_addr_ic1 <= lookup_addr_ic0[ADDR_W-1:IC_INDEX_HI+1]; + fill_in_ic1 <= fill_alloc_sel; + end + end + end + + //////////////////////// + // Pipeline stage IC1 // + //////////////////////// + + // Tag matching + for (genvar way = 0; way < IC_NUM_WAYS; way++) begin : gen_tag_match + assign tag_match_ic1[way] = (tag_rdata_ic1[way][IC_TAG_SIZE-1:0] == + {1'b1,lookup_addr_ic1[ADDR_W-1:IC_INDEX_HI+1]}); + assign tag_invalid_ic1[way] = ~tag_rdata_ic1[way][IC_TAG_SIZE-1]; + end + + assign tag_hit_ic1 = |tag_match_ic1; + + // Hit data mux + always_comb begin + hit_data_ecc_ic1 = 'b0; + for (int way = 0; way < IC_NUM_WAYS; way++) begin + if (tag_match_ic1[way]) begin + hit_data_ecc_ic1 |= data_rdata_ic1[way]; + end + end + end + + // Way selection for allocations to the cache (onehot signals) + // 1 first invalid way + // 2 global round-robin (pseudorandom) way + assign lowest_invalid_way_ic1[0] = tag_invalid_ic1[0]; + assign round_robin_way_ic1[0] = round_robin_way_q[IC_NUM_WAYS-1]; + for (genvar way = 1; way < IC_NUM_WAYS; way++) begin : gen_lowest_way + assign lowest_invalid_way_ic1[way] = tag_invalid_ic1[way] & ~|tag_invalid_ic1[way-1:0]; + assign round_robin_way_ic1[way] = round_robin_way_q[way-1]; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + round_robin_way_q <= {{IC_NUM_WAYS-1{1'b0}}, 1'b1}; + end else if (lookup_valid_ic1) begin + round_robin_way_q <= round_robin_way_ic1; + end + end + + assign sel_way_ic1 = |tag_invalid_ic1 ? lowest_invalid_way_ic1 : + round_robin_way_q; + + // ECC checking logic + if (ICacheECC) begin : gen_data_ecc_checking + logic [IC_NUM_WAYS-1:0] tag_err_ic1; + logic [IC_LINE_BEATS*2-1:0] data_err_ic1; + logic ecc_correction_write_d, ecc_correction_write_q; + logic [IC_NUM_WAYS-1:0] ecc_correction_ways_d, ecc_correction_ways_q; + logic [IC_INDEX_W-1:0] lookup_index_ic1, ecc_correction_index_q; + + // Tag ECC checking + for (genvar way = 0; way < IC_NUM_WAYS; way++) begin : gen_tag_ecc + logic [1:0] tag_err_bank_ic1; + logic [27:0] tag_rdata_padded_ic1; + + // Expand the tag rdata with extra padding if the tag size is less than the maximum + assign tag_rdata_padded_ic1 = {tag_rdata_ic1[way][TagSizeECC-1-:6], + {22-IC_TAG_SIZE{1'b0}}, + tag_rdata_ic1[way][IC_TAG_SIZE-1:0]}; + + prim_secded_inv_28_22_dec data_ecc_dec ( + .data_i (tag_rdata_padded_ic1), + .data_o (), + .syndrome_o (), + .err_o (tag_err_bank_ic1) + ); + assign tag_err_ic1[way] = |tag_err_bank_ic1; + end + + // Data ECC checking + // Note - could generate for all ways and mux after + for (genvar bank = 0; bank < IC_LINE_BEATS; bank++) begin : gen_ecc_banks + prim_secded_inv_39_32_dec data_ecc_dec ( + .data_i (hit_data_ecc_ic1[bank*BusSizeECC+:BusSizeECC]), + .data_o (), + .syndrome_o (), + .err_o (data_err_ic1[bank*2+:2]) + ); + + assign hit_data_ic1[bank*BUS_SIZE+:BUS_SIZE] = + hit_data_ecc_ic1[bank*BusSizeECC+:BUS_SIZE]; + + end + + assign ecc_err_ic1 = lookup_valid_ic1 & ((|data_err_ic1) | (|tag_err_ic1)); + + // Error correction + // All ways will be invalidated on a tag error to prevent X-propagation from data_err_ic1 on + // spurious hits. Also prevents the same line being allocated twice when there was a true + // hit and a spurious hit. + assign ecc_correction_ways_d = {IC_NUM_WAYS{|tag_err_ic1}} | + (tag_match_ic1 & {IC_NUM_WAYS{|data_err_ic1}}); + assign ecc_correction_write_d = ecc_err_ic1; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + ecc_correction_write_q <= 1'b0; + end else begin + ecc_correction_write_q <= ecc_correction_write_d; + end + end + + // The index is required in IC1 only when ECC is configured so is registered here + if (ResetAll) begin : g_lookup_ind_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + lookup_index_ic1 <= '0; + end else if (lookup_grant_ic0) begin + lookup_index_ic1 <= lookup_addr_ic0[IC_INDEX_HI-:IC_INDEX_W]; + end + end + end else begin : g_lookup_ind_nr + always_ff @(posedge clk_i) begin + if (lookup_grant_ic0) begin + lookup_index_ic1 <= lookup_addr_ic0[IC_INDEX_HI-:IC_INDEX_W]; + end + end + end + + // Store the ways with errors to be invalidated + if (ResetAll) begin : g_ecc_correction_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + ecc_correction_ways_q <= '0; + ecc_correction_index_q <= '0; + end else if (ecc_err_ic1) begin + ecc_correction_ways_q <= ecc_correction_ways_d; + ecc_correction_index_q <= lookup_index_ic1; + end + end + end else begin : g_ecc_correction_nr + always_ff @(posedge clk_i) begin + if (ecc_err_ic1) begin + ecc_correction_ways_q <= ecc_correction_ways_d; + ecc_correction_index_q <= lookup_index_ic1; + end + end + end + + assign ecc_write_req = ecc_correction_write_q; + assign ecc_write_ways = ecc_correction_ways_q; + assign ecc_write_index = ecc_correction_index_q; + + end else begin : gen_no_data_ecc + assign ecc_err_ic1 = 1'b0; + assign ecc_write_req = 1'b0; + assign ecc_write_ways = '0; + assign ecc_write_index = '0; + assign hit_data_ic1 = hit_data_ecc_ic1; + end + + /////////////////////////////// + // Cache allocation decision // + /////////////////////////////// + + if (BranchCache) begin : gen_caching_logic + + // Cache branch target + a number of subsequent lines + localparam int unsigned CACHE_AHEAD = 2; + localparam int unsigned CACHE_CNT_W = (CACHE_AHEAD == 1) ? 1 : $clog2(CACHE_AHEAD) + 1; + logic cache_cnt_dec; + logic [CACHE_CNT_W-1:0] cache_cnt_d, cache_cnt_q; + + assign cache_cnt_dec = lookup_grant_ic0 & (|cache_cnt_q); + assign cache_cnt_d = branch_i ? CACHE_AHEAD[CACHE_CNT_W-1:0] : + (cache_cnt_q - {{CACHE_CNT_W-1{1'b0}},cache_cnt_dec}); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + cache_cnt_q <= '0; + end else begin + cache_cnt_q <= cache_cnt_d; + end + end + + assign fill_cache_new = (branch_i | (|cache_cnt_q)) & icache_enable_i & + ~icache_inval_i & ~inval_lock & ~inval_prog_q; + + end else begin : gen_cache_all + + // Cache all missing fetches + assign fill_cache_new = icache_enable_i & ~start_inval & ~inval_prog_q; + end + + ////////////////////////// + // Fill buffer tracking // + ////////////////////////// + + always_comb begin + fb_fill_level = '0; + for (int i = 0; i < NUM_FB; i++) begin + if (fill_busy_q[i] & ~fill_stale_q[i]) begin + fb_fill_level += {{$clog2(NUM_FB) - 1{1'b0}}, 1'b1}; + end + end + end + + // Allocate a new buffer for every granted lookup + assign fill_new_alloc = lookup_grant_ic0; + // Track whether a speculative external request was made from IC0, and whether it was granted + // Speculative requests are only made for branches, or if the cache is disabled + assign fill_spec_req = (~icache_enable_i | branch_or_mispredict) & ~|fill_ext_req; + assign fill_spec_done = fill_spec_req & instr_gnt_i; + assign fill_spec_hold = fill_spec_req & ~instr_gnt_i; + + for (genvar fb = 0; fb < NUM_FB; fb++) begin : gen_fbs + + ///////////////////////////// + // Fill buffer allocations // + ///////////////////////////// + + // Allocate the lowest available buffer + if (fb == 0) begin : gen_fb_zero + assign fill_alloc_sel[fb] = ~fill_busy_q[fb]; + end else begin : gen_fb_rest + assign fill_alloc_sel[fb] = ~fill_busy_q[fb] & (&fill_busy_q[fb-1:0]); + end + + assign fill_alloc[fb] = fill_alloc_sel[fb] & fill_new_alloc; + assign fill_busy_d[fb] = fill_alloc[fb] | (fill_busy_q[fb] & ~fill_done[fb]); + + // Track which other fill buffers are older than this one (for age-based arbitration) + // TODO sparsify + assign fill_older_d[fb] = (fill_alloc[fb] ? fill_busy_q : fill_older_q[fb]) & ~fill_done; + + // A fill buffer can release once all its actions are completed + // all data written to the cache (unless hit or error) + assign fill_done[fb] = (fill_ram_done_q[fb] | fill_hit_q[fb] | ~fill_cache_q[fb] | + (|fill_err_q[fb])) & + // all data output unless stale due to intervening branch + (fill_out_done[fb] | fill_stale_q[fb] | branch_or_mispredict) & + // all external requests completed + fill_rvd_done[fb]; + + ///////////////////////////////// + // Fill buffer status tracking // + ///////////////////////////////// + + // Track staleness (requests become stale when a branch intervenes) + assign fill_stale_d[fb] = fill_busy_q[fb] & (branch_or_mispredict | fill_stale_q[fb]); + // Track whether or not this request should allocate to the cache + // Any invalidation or disabling of the cache while the buffer is busy will stop allocation + assign fill_cache_d[fb] = (fill_alloc[fb] & fill_cache_new) | + (fill_cache_q[fb] & fill_busy_q[fb] & + icache_enable_i & ~icache_inval_i & ~inval_lock); + // Record whether the request hit in the cache + assign fill_hit_ic1[fb] = lookup_valid_ic1 & fill_in_ic1[fb] & tag_hit_ic1 & ~ecc_err_ic1; + assign fill_hit_d[fb] = fill_hit_ic1[fb] | (fill_hit_q[fb] & fill_busy_q[fb]); + + /////////////////////////////////////////// + // Fill buffer external request tracking // + /////////////////////////////////////////// + + // Make an external request + assign fill_ext_req[fb] = fill_busy_q[fb] & ~fill_ext_done_d[fb]; + + // Count the number of completed external requests (each line requires IC_LINE_BEATS requests) + assign fill_ext_cnt_d[fb] = fill_alloc[fb] ? + {{IC_LINE_BEATS_W{1'b0}},fill_spec_done} : + (fill_ext_cnt_q[fb] + {{IC_LINE_BEATS_W{1'b0}}, + fill_ext_arb[fb] & instr_gnt_i}); + // External request must be held until granted + assign fill_ext_hold_d[fb] = (fill_alloc[fb] & fill_spec_hold) | + (fill_ext_arb[fb] & ~instr_gnt_i); + // External requests are completed when the counter is filled or when the request is cancelled + assign fill_ext_done_d[fb] = (fill_ext_cnt_q[fb][IC_LINE_BEATS_W] | + // external requests are considered complete if the request hit + fill_hit_ic1[fb] | fill_hit_q[fb] | + // cancel if the line won't be cached and, it is stale + (~fill_cache_q[fb] & (branch_or_mispredict | fill_stale_q[fb] | + // or we're already at the end of the line + fill_ext_beat[fb][IC_LINE_BEATS_W]))) & + // can't cancel while we are waiting for a grant on the bus + ~fill_ext_hold_q[fb] & fill_busy_q[fb]; + // Track whether this fill buffer expects to receive beats of data + assign fill_rvd_exp[fb] = fill_busy_q[fb] & ~fill_rvd_done[fb]; + // Count the number of rvalid beats received + assign fill_rvd_cnt_d[fb] = fill_alloc[fb] ? '0 : + (fill_rvd_cnt_q[fb] + + {{IC_LINE_BEATS_W{1'b0}},fill_rvd_arb[fb]}); + // External data is complete when all issued external requests have received their data + assign fill_rvd_done[fb] = (fill_ext_done_q[fb] & ~fill_ext_hold_q[fb]) & + (fill_rvd_cnt_q[fb] == fill_ext_cnt_q[fb]); + + ////////////////////////////////////// + // Fill buffer data output tracking // + ////////////////////////////////////// + + // Send data to the IF stage for requests that are not stale, have not completed their + // data output, and have data available to send. + // Data is available if: + // - The request hit in the cache + // - Buffered data is available (fill_rvd_cnt_q is ahead of fill_out_cnt_q) + // - Data is available from the bus this cycle (fill_rvd_arb) + assign fill_out_req[fb] = fill_busy_q[fb] & ~fill_stale_q[fb] & ~fill_out_done[fb] & + (fill_hit_ic1[fb] | fill_hit_q[fb] | + (fill_rvd_beat[fb] > fill_out_cnt_q[fb]) | fill_rvd_arb[fb]); + + // Calculate when a beat of data is output. Any ECC error squashes the output that cycle. + assign fill_out_grant[fb] = fill_out_arb[fb] & output_ready; + + // Count the beats of data output to the IF stage + assign fill_out_cnt_d[fb] = fill_alloc[fb] ? {1'b0,lookup_addr_ic0[IC_LINE_W-1:BUS_W]} : + (fill_out_cnt_q[fb] + + {{IC_LINE_BEATS_W{1'b0}},fill_out_grant[fb]}); + // Data output complete when the counter fills + assign fill_out_done[fb] = fill_out_cnt_q[fb][IC_LINE_BEATS_W]; + + ////////////////////////////////////// + // Fill buffer ram request tracking // + ////////////////////////////////////// + + // make a fill request once all data beats received + assign fill_ram_req[fb] = fill_busy_q[fb] & fill_rvd_cnt_q[fb][IC_LINE_BEATS_W] & + // unless the request hit, was non-allocating or got an error + ~fill_hit_q[fb] & fill_cache_q[fb] & ~|fill_err_q[fb] & + // or the request was already completed + ~fill_ram_done_q[fb]; + + // Record when a cache allocation request has been completed + assign fill_ram_done_d[fb] = fill_ram_arb[fb] | (fill_ram_done_q[fb] & fill_busy_q[fb]); + + ////////////////////////////// + // Fill buffer line offsets // + ////////////////////////////// + + // When we branch into the middle of a line, the output count will not start from zero. This + // beat count is used to know which incoming rdata beats are relevant. + assign fill_ext_beat[fb] = {1'b0,fill_addr_q[fb][IC_LINE_W-1:BUS_W]} + + fill_ext_cnt_q[fb][IC_LINE_BEATS_W:0]; + assign fill_ext_off[fb] = fill_ext_beat[fb][IC_LINE_BEATS_W-1:0]; + assign fill_rvd_beat[fb] = {1'b0,fill_addr_q[fb][IC_LINE_W-1:BUS_W]} + + fill_rvd_cnt_q[fb][IC_LINE_BEATS_W:0]; + assign fill_rvd_off[fb] = fill_rvd_beat[fb][IC_LINE_BEATS_W-1:0]; + + ///////////////////////////// + // Fill buffer arbitration // + ///////////////////////////// + + // Age based arbitration - all these signals are one-hot + assign fill_ext_arb[fb] = fill_ext_req[fb] & ~|(fill_ext_req & fill_older_q[fb]); + assign fill_ram_arb[fb] = fill_ram_req[fb] & fill_grant_ic0 & + ~|(fill_ram_req & fill_older_q[fb]); + // Calculate which fill buffer is the oldest one which still needs to output data to IF + assign fill_data_sel[fb] = ~|(fill_busy_q & ~fill_out_done & ~fill_stale_q & + fill_older_q[fb]); + // Arbitrate the request which has data available to send, and is the oldest outstanding + assign fill_out_arb[fb] = fill_out_req[fb] & fill_data_sel[fb]; + // Assign incoming rvalid data to the oldest fill buffer expecting it + assign fill_rvd_arb[fb] = instr_rvalid_i & fill_rvd_exp[fb] & + ~|(fill_rvd_exp & fill_older_q[fb]); + + ///////////////////////////// + // Fill buffer data muxing // + ///////////////////////////// + + // Output data muxing controls + // 1. Select data from the fill buffer data register + assign fill_data_reg[fb] = fill_busy_q[fb] & ~fill_stale_q[fb] & + ~fill_out_done[fb] & fill_data_sel[fb] & + // The incoming data is already ahead of the output count + ((fill_rvd_beat[fb] > fill_out_cnt_q[fb]) | fill_hit_q[fb] | + (|fill_err_q[fb])); + // 2. Select IC1 hit data + assign fill_data_hit[fb] = fill_busy_q[fb] & fill_hit_ic1[fb] & fill_data_sel[fb]; + // 3. Select incoming instr_rdata_i + assign fill_data_rvd[fb] = fill_busy_q[fb] & fill_rvd_arb[fb] & ~fill_hit_q[fb] & + ~fill_hit_ic1[fb] & ~fill_stale_q[fb] & ~fill_out_done[fb] & + // The incoming data lines up with the output count + (fill_rvd_beat[fb] == fill_out_cnt_q[fb]) & fill_data_sel[fb]; + + + /////////////////////////// + // Fill buffer registers // + /////////////////////////// + + // Fill buffer general enable + assign fill_entry_en[fb] = fill_alloc[fb] | fill_busy_q[fb]; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + fill_busy_q[fb] <= 1'b0; + fill_older_q[fb] <= '0; + fill_stale_q[fb] <= 1'b0; + fill_cache_q[fb] <= 1'b0; + fill_hit_q[fb] <= 1'b0; + fill_ext_cnt_q[fb] <= '0; + fill_ext_hold_q[fb] <= 1'b0; + fill_ext_done_q[fb] <= 1'b0; + fill_rvd_cnt_q[fb] <= '0; + fill_ram_done_q[fb] <= 1'b0; + fill_out_cnt_q[fb] <= '0; + end else if (fill_entry_en[fb]) begin + fill_busy_q[fb] <= fill_busy_d[fb]; + fill_older_q[fb] <= fill_older_d[fb]; + fill_stale_q[fb] <= fill_stale_d[fb]; + fill_cache_q[fb] <= fill_cache_d[fb]; + fill_hit_q[fb] <= fill_hit_d[fb]; + fill_ext_cnt_q[fb] <= fill_ext_cnt_d[fb]; + fill_ext_hold_q[fb] <= fill_ext_hold_d[fb]; + fill_ext_done_q[fb] <= fill_ext_done_d[fb]; + fill_rvd_cnt_q[fb] <= fill_rvd_cnt_d[fb]; + fill_ram_done_q[fb] <= fill_ram_done_d[fb]; + fill_out_cnt_q[fb] <= fill_out_cnt_d[fb]; + end + end + + //////////////////////////////////////// + // Fill buffer address / data storage // + //////////////////////////////////////// + + assign fill_addr_en[fb] = fill_alloc[fb]; + assign fill_way_en[fb] = (lookup_valid_ic1 & fill_in_ic1[fb]); + + if (ResetAll) begin : g_fill_addr_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + fill_addr_q[fb] <= '0; + end else if (fill_addr_en[fb]) begin + fill_addr_q[fb] <= lookup_addr_ic0; + end + end + end else begin : g_fill_addr_nr + always_ff @(posedge clk_i) begin + if (fill_addr_en[fb]) begin + fill_addr_q[fb] <= lookup_addr_ic0; + end + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + fill_way_q[fb] <= '0; + end else if (fill_way_en[fb]) begin + fill_way_q[fb] <= sel_way_ic1; + end + end + + // Data either comes from the cache or the bus. If there was an ECC error, we must take + // the incoming bus data since the cache hit data is corrupted. + assign fill_data_d[fb] = fill_hit_ic1[fb] ? hit_data_ic1 : + {IC_LINE_BEATS{instr_rdata_i}}; + + for (genvar b = 0; b < IC_LINE_BEATS; b++) begin : gen_data_buf + // Error tracking (per beat) + assign fill_err_d[fb][b] = (fill_rvd_arb[fb] & instr_err_i & + (fill_rvd_off[fb] == b[IC_LINE_BEATS_W-1:0])) | + // Hold the error once recorded + (fill_busy_q[fb] & fill_err_q[fb][b]); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + fill_err_q[fb][b] <= '0; + end else if (fill_entry_en[fb]) begin + fill_err_q[fb][b] <= fill_err_d[fb][b]; + end + end + + // Enable the relevant part of the data register (or all for cache hits) + // Ignore incoming rvalid data when we already have cache hit data + assign fill_data_en[fb][b] = fill_hit_ic1[fb] | + (fill_rvd_arb[fb] & ~fill_hit_q[fb] & + (fill_rvd_off[fb] == b[IC_LINE_BEATS_W-1:0])); + + if (ResetAll) begin : g_fill_data_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + fill_data_q[fb][b*BUS_SIZE+:BUS_SIZE] <= '0; + end else if (fill_data_en[fb][b]) begin + fill_data_q[fb][b*BUS_SIZE+:BUS_SIZE] <= fill_data_d[fb][b*BUS_SIZE+:BUS_SIZE]; + end + end + end else begin : g_fill_data_nr + always_ff @(posedge clk_i) begin + if (fill_data_en[fb][b]) begin + fill_data_q[fb][b*BUS_SIZE+:BUS_SIZE] <= fill_data_d[fb][b*BUS_SIZE+:BUS_SIZE]; + end + end + end + + end + end + + //////////////////////////////// + // Fill buffer one-hot muxing // + //////////////////////////////// + + // External req info + always_comb begin + fill_ext_req_addr = '0; + for (int i = 0; i < NUM_FB; i++) begin + if (fill_ext_arb[i]) begin + fill_ext_req_addr |= {fill_addr_q[i][ADDR_W-1:IC_LINE_W], fill_ext_off[i]}; + end + end + end + + // Cache req info + always_comb begin + fill_ram_req_addr = '0; + fill_ram_req_way = '0; + fill_ram_req_data = '0; + for (int i = 0; i < NUM_FB; i++) begin + if (fill_ram_arb[i]) begin + fill_ram_req_addr |= fill_addr_q[i]; + fill_ram_req_way |= fill_way_q[i]; + fill_ram_req_data |= fill_data_q[i]; + end + end + end + + // IF stage output data + always_comb begin + fill_out_data = '0; + fill_out_err = '0; + for (int i = 0; i < NUM_FB; i++) begin + if (fill_data_reg[i]) begin + fill_out_data |= fill_data_q[i]; + // Ignore any speculative errors accumulated on cache hits + fill_out_err |= (fill_err_q[i] & ~{IC_LINE_BEATS{fill_hit_q[i]}}); + end + end + end + + /////////////////////// + // External requests // + /////////////////////// + + assign instr_req = ((~icache_enable_i | branch_or_mispredict) & lookup_grant_ic0) | + (|fill_ext_req); + + assign instr_addr = |fill_ext_req ? fill_ext_req_addr : + lookup_addr_ic0[ADDR_W-1:BUS_W]; + + assign instr_req_o = instr_req; + assign instr_addr_o = {instr_addr[ADDR_W-1:BUS_W],{BUS_W{1'b0}}}; + + //////////////////////// + // Output data muxing // + //////////////////////// + + // Mux between line-width data sources + assign line_data = |fill_data_hit ? hit_data_ic1 : fill_out_data; + assign line_err = |fill_data_hit ? {IC_LINE_BEATS{1'b0}} : fill_out_err; + + // Mux the relevant beat of line data, based on the output address + always_comb begin + line_data_muxed = '0; + line_err_muxed = 1'b0; + for (int unsigned i = 0; i < IC_LINE_BEATS; i++) begin + // When data has been skidded, the output address is behind by one + if ((output_addr_q[IC_LINE_W-1:BUS_W] + {{IC_LINE_BEATS_W-1{1'b0}},skid_valid_q}) == + i[IC_LINE_BEATS_W-1:0]) begin + line_data_muxed |= line_data[i*32+:32]; + line_err_muxed |= line_err[i]; + end + end + end + + // Mux between incoming rdata and the muxed line data + assign output_data = |fill_data_rvd ? instr_rdata_i : line_data_muxed; + assign output_err = |fill_data_rvd ? instr_err_i : line_err_muxed; + + // Output data is valid (from any of the three possible sources). Note that fill_out_arb + // must be used here rather than fill_out_req because data can become valid out of order + // (e.g. cache hit data can become available ahead of an older outstanding miss). + assign data_valid = |fill_out_arb; + + // Skid buffer data + assign skid_data_d = output_data[31:16]; + + assign skid_en = data_valid & (ready_i | skid_ready); + + if (ResetAll) begin : g_skid_data_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + skid_data_q <= '0; + skid_err_q <= '0; + end else if (skid_en) begin + skid_data_q <= skid_data_d; + skid_err_q <= output_err; + end + end + end else begin : g_skid_data_nr + always_ff @(posedge clk_i) begin + if (skid_en) begin + skid_data_q <= skid_data_d; + skid_err_q <= output_err; + end + end + end + + // The data in the skid buffer is ready if it's a complete compressed instruction or if there's + // an error (no need to wait for the second half) + assign skid_complete_instr = skid_valid_q & ((skid_data_q[1:0] != 2'b11) | skid_err_q); + + // Data can be loaded into the skid buffer for an unaligned uncompressed instruction + assign skid_ready = output_addr_q[1] & ~skid_valid_q & (~output_compressed | output_err); + + assign output_ready = (ready_i | skid_ready) & ~skid_complete_instr; + + assign output_compressed = (rdata_o[1:0] != 2'b11); + + assign skid_valid_d = + // Branches invalidate the skid buffer + branch_or_mispredict ? 1'b0 : + // Once valid, the skid buffer stays valid until a compressed instruction realigns the stream + (skid_valid_q ? ~(ready_i & ((skid_data_q[1:0] != 2'b11) | skid_err_q)) : + // The skid buffer becomes valid when: + // - we branch to an unaligned uncompressed instruction + (data_valid & + (((output_addr_q[1] & (~output_compressed | output_err)) | + // - a compressed instruction misaligns the stream + (~output_addr_q[1] & output_compressed & ~output_err & ready_i))))); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + skid_valid_q <= 1'b0; + end else begin + skid_valid_q <= skid_valid_d; + end + end + + // Signal that valid data is available to the IF stage + // Note that if the first half of an unaligned instruction reports an error, we do not need + // to wait for the second half + // Compressed instruction completely satisfied by skid buffer + assign output_valid = skid_complete_instr | + // Output data available and, output stream aligned, or skid data available, + (data_valid & (~output_addr_q[1] | skid_valid_q | + // or this is an error or an unaligned compressed instruction + output_err | (output_data[17:16] != 2'b11))); + + // Update the address on branches and every time an instruction is driven + assign output_addr_en = branch_or_mispredict | (ready_i & valid_o); + + // Increment the address by two every time a compressed instruction is popped + assign addr_incr_two = output_compressed & ~err_o; + + // Next IF stage PC + assign output_addr_incr = (output_addr_q[31:1] + + // Increment address by 4 or 2 + {29'd0, ~addr_incr_two, addr_incr_two}); + + // Redirect the address on branches or mispredicts + assign output_addr_d = branch_i ? addr_i[31:1] : + branch_mispredict_i ? mispredict_addr_i[31:1] : + output_addr_incr; + + if (ResetAll) begin : g_output_addr_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + output_addr_q <= '0; + end else if (output_addr_en) begin + output_addr_q <= output_addr_d; + end + end + end else begin : g_output_addr_nr + always_ff @(posedge clk_i) begin + if (output_addr_en) begin + output_addr_q <= output_addr_d; + end + end + end + + // Mux the data from BUS_SIZE to halfword + // This muxing realigns data when instruction words are split across BUS_W e.g. + // word 1 |----|*h1*| + // word 0 |*h0*|----| --> |*h1*|*h0*| + // 31 15 0 31 15 0 + always_comb begin + output_data_lo = '0; + for (int unsigned i = 0; i < IC_OUTPUT_BEATS; i++) begin + if (output_addr_q[BUS_W-1:1] == i[BUS_W-2:0]) begin + output_data_lo |= output_data[i*16+:16]; + end + end + end + + always_comb begin + output_data_hi = '0; + for (int unsigned i = 0; i < IC_OUTPUT_BEATS - 1; i++) begin + if (output_addr_q[BUS_W-1:1] == i[BUS_W-2:0]) begin + output_data_hi |= output_data[(i+1)*16+:16]; + end + end + if (&output_addr_q[BUS_W-1:1]) begin + output_data_hi |= output_data[15:0]; + end + end + + assign valid_o = output_valid & ~branch_mispredict_i; + assign rdata_o = {output_data_hi, (skid_valid_q ? skid_data_q : output_data_lo)}; + assign addr_o = {output_addr_q, 1'b0}; + assign err_o = (skid_valid_q & skid_err_q) | (~skid_complete_instr & output_err); + // Error caused by the second half of a misaligned uncompressed instruction + // (only relevant when err_o is set) + assign err_plus2_o = skid_valid_q & ~skid_err_q; + + /////////////////// + // Invalidations // + /////////////////// + + + // We need to save the invalidation request inside a register. That way we can wait + // until we have a valid scrambling key to do it. Since the key itself is needed for + // starting to fill in the RAMs and read from them, ICache also needs to stop operating. + assign inval_req_d = (inval_req_q | icache_inval_i) & ~(inval_done & inval_prog_q); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + inval_req_q <= 1'b0; + end else begin + inval_req_q <= inval_req_d; + end + end + + // This will act like a lock mechanism. + // Main idea is to lock the invalidation request until we got a valid scrambling key. + assign inval_lock = inval_req_d & ~ic_scr_key_valid_i; + + // Invalidate on reset, or when instructed. If an invalidation request is received while a + // previous invalidation is ongoing, it does not need to be restarted. Do not start + // this process until inval lock is removed meaning the scrambling key is valid. + assign start_inval = ~inval_lock & (~reset_inval_q | inval_req_q) & ~inval_prog_q ; + assign inval_prog_d = ~inval_lock & (start_inval | (inval_prog_q & ~inval_done)); + assign inval_done = &inval_index_q; + assign inval_index_d = start_inval ? '0 : (inval_index_q + {{IC_INDEX_W-1{1'b0}},1'b1}); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + inval_prog_q <= 1'b0; + reset_inval_q <= 1'b0; + end else begin + inval_prog_q <= inval_prog_d; + reset_inval_q <= 1'b1; + end + end + + if (ResetAll) begin : g_inval_index_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + inval_index_q <= '0; + end else if (inval_prog_d) begin + inval_index_q <= inval_index_d; + end + end + end else begin : g_inval_index_nr + always_ff @(posedge clk_i) begin + if (inval_prog_d) begin + inval_index_q <= inval_index_d; + end + end + end + + ///////////////// + // Busy status // + ///////////////// + + // Only busy (for WFI purposes) while an invalidation is in-progress, or external requests are + // outstanding. + assign busy_o = inval_req_q | (|(fill_busy_q & ~fill_rvd_done)); + + //////////////// + // Assertions // + //////////////// + + `ASSERT_INIT(size_param_legal, (IC_LINE_SIZE > 32)) + + // ECC primitives will need to be changed for different sizes + `ASSERT_INIT(ecc_tag_param_legal, (IC_TAG_SIZE <= 27)) + `ASSERT_INIT(ecc_data_param_legal, !ICacheECC || (BUS_SIZE == 32)) + + // Lookups in the tag ram should always give a known result + `ASSERT_KNOWN(TagHitKnown, lookup_valid_ic1 & tag_hit_ic1) + `ASSERT_KNOWN(TagInvalidKnown, lookup_valid_ic1 & tag_invalid_ic1) + + // This is only used for the Yosys-based formal flow. Once we have working bind support, we can + // get rid of it. +`ifdef FORMAL + `ifdef YOSYS + // Unfortunately, Yosys doesn't support passing unpacked arrays as ports. Explicitly pack up the + // signals we need. + logic [NUM_FB-1:0][ADDR_W-1:0] packed_fill_addr_q; + always_comb begin + for (int i = 0; i < NUM_FB; i++) begin + packed_fill_addr_q[i][ADDR_W-1:0] = fill_addr_q[i]; + end + end + + `include "formal_tb_frag.svh" + `endif +`endif + + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_id_stage.sv b/hw/ip/cheriot-ibex/rtl/cheriot_id_stage.sv new file mode 100644 index 0000000..8305792 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_id_stage.sv
@@ -0,0 +1,1270 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +`ifdef RISCV_FORMAL + `define RVFI +`endif + +/** + * Instruction Decode Stage + * + * Decode stage of the core. It decodes the instructions and hosts the register + * file. + */ + +`include "prim_assert.sv" +`include "dv_fcov_macros.svh" + +module cheriot_id_stage import cheri_pkg::*; #( + parameter bit RV32E = 0, + parameter cheriot_pkg::rv32m_e RV32M = cheriot_pkg::RV32MFast, + parameter cheriot_pkg::rv32b_e RV32B = cheriot_pkg::RV32BNone, + parameter bit DataIndTiming = 1'b0, + parameter bit BranchTargetALU = 0, + parameter bit WritebackStage = 0, + parameter bit BranchPredictor = 0, + parameter bit CHERIoTEn = 1'b1, + parameter bit CheriPPLBC = 1'b1, + parameter bit CheriSBND2 = 1'b0 +) ( + input logic clk_i, + input logic rst_ni, + + input logic cheri_pmode_i, + input logic cheri_tsafe_en_i, + output logic ctrl_busy_o, + output logic illegal_insn_o, + + // Interface to IF stage + input logic instr_valid_i, + input logic [31:0] instr_rdata_i, // from IF-ID pipeline registers + input logic [31:0] instr_rdata_alu_i, // from IF-ID pipeline registers + input logic [15:0] instr_rdata_c_i, // from IF-ID pipeline registers + input logic instr_is_compressed_i, + input logic instr_bp_taken_i, + output logic instr_req_o, + output logic instr_first_cycle_id_o, + output logic instr_valid_clear_o, // kill instr in IF-ID reg + output logic id_in_ready_o, // ID stage is ready for next instr + output logic icache_inval_o, + + // Jumps and branches + input logic branch_decision_i, + + // IF and ID stage signals + output logic pc_set_o, + output cheriot_pkg::pc_sel_e pc_mux_o, + output logic nt_branch_mispredict_o, + output logic [31:0] nt_branch_addr_o, + output cheriot_pkg::exc_pc_sel_e exc_pc_mux_o, + output cheriot_pkg::exc_cause_e exc_cause_o, + + input logic illegal_c_insn_i, + input logic instr_fetch_err_i, + input logic instr_fetch_err_plus2_i, + input logic instr_fetch_cheri_acc_vio_i, + input logic instr_fetch_cheri_bound_vio_i, + + input logic [31:0] pc_id_i, + + // Stalls + input logic ex_valid_i, // EX stage has valid output + input logic lsu_resp_valid_i, // LSU has valid output, or is done + // ALU + output cheriot_pkg::alu_op_e alu_operator_ex_o, + output logic [31:0] alu_operand_a_ex_o, + output logic [31:0] alu_operand_b_ex_o, + + // Multicycle Operation Stage Register + input logic [1:0] imd_val_we_ex_i, + input logic [33:0] imd_val_d_ex_i[2], + output logic [33:0] imd_val_q_ex_o[2], + + // Branch target ALU + output logic [31:0] bt_a_operand_o, + output logic [31:0] bt_b_operand_o, + + // MUL, DIV + output logic mult_en_ex_o, + output logic div_en_ex_o, + output logic mult_sel_ex_o, + output logic div_sel_ex_o, + output cheriot_pkg::md_op_e multdiv_operator_ex_o, + output logic [1:0] multdiv_signed_mode_ex_o, + output logic [31:0] multdiv_operand_a_ex_o, + output logic [31:0] multdiv_operand_b_ex_o, + output logic multdiv_ready_id_o, + + // CSR + output logic csr_access_o, + output cheriot_pkg::csr_op_e csr_op_o, + output logic csr_op_en_o, + output logic csr_save_if_o, + output logic csr_save_id_o, + output logic csr_save_wb_o, + output logic csr_restore_mret_id_o, + output logic csr_restore_dret_id_o, + output logic csr_save_cause_o, + output logic csr_mepcc_clrtag_o, + output logic [31:0] csr_mtval_o, + input cheriot_pkg::priv_lvl_e priv_mode_i, + input logic csr_mstatus_tw_i, + input logic illegal_csr_insn_i, + input logic data_ind_timing_i, + input logic csr_pcc_perm_sr_i, + + // Interface to load store unit + output logic lsu_req_o, + output logic lsu_we_o, + output logic [1:0] lsu_type_o, + output logic lsu_sign_ext_o, + output logic [31:0] lsu_wdata_o, + + input logic lsu_req_done_i, // Data req to LSU is complete and + // instruction can move to writeback + // (only relevant where writeback stage is + // present) + + input logic lsu_addr_incr_req_i, + input logic [31:0] lsu_addr_last_i, + + // Interrupt signals + input logic csr_mstatus_mie_i, + input logic irq_pending_i, + input cheriot_pkg::irqs_t irqs_i, + input logic irq_nm_i, + output logic nmi_mode_o, + + input logic lsu_load_err_i, + input logic lsu_store_err_i, + input logic lsu_err_is_cheri_i, + + // Debug Signal + output logic debug_mode_o, + output cheriot_pkg::dbg_cause_e debug_cause_o, + output logic debug_csr_save_o, + input logic debug_req_i, + input logic debug_single_step_i, + input logic debug_ebreakm_i, + input logic debug_ebreaku_i, + input logic trigger_match_i, + + // Write back signal + input logic [31:0] result_ex_i, + input logic [31:0] csr_rdata_i, + + // Register file read + output logic [4:0] rf_raddr_a_o, + input logic [31:0] rf_rdata_a_i, + output logic [4:0] rf_raddr_b_o, + input logic [31:0] rf_rdata_b_i, + output logic rf_ren_a_o, + output logic rf_ren_b_o, + + // Register file write (via writeback) + output logic [4:0] rf_waddr_id_o, + output logic [31:0] rf_wdata_id_o, + output logic rf_we_id_o, + output logic rf_rd_a_wb_match_o, + output logic rf_rd_b_wb_match_o, + input logic [31:0] rf_reg_rdy_i, + + // Register write information from writeback (for resolving data hazards) + input logic [4:0] rf_waddr_wb_i, + input logic [31:0] rf_wdata_fwd_wb_i, + input logic rf_write_wb_i, + + output logic en_wb_o, + output cheriot_pkg::wb_instr_type_e instr_type_wb_o, + output logic instr_perf_count_id_o, + input logic ready_wb_i, + input logic outstanding_load_wb_i, + input logic outstanding_store_wb_i, + + // Performance Counters + output logic perf_jump_o, // executing a jump instr + output logic perf_branch_o, // executing a branch instr + output logic perf_tbranch_o, // executing a taken branch instr + output logic perf_dside_wait_o, // instruction in ID/EX is awaiting memory + // access to finish before proceeding + output logic perf_mul_wait_o, + output logic perf_div_wait_o, + output logic instr_id_done_o, + + // cheri signals + output logic cheri_exec_id_o, + output logic instr_is_cheri_id_o, + output logic instr_is_rv32lsu_id_o, + output logic [11:0] cheri_imm12_o, + output logic [19:0] cheri_imm20_o, + output logic [20:0] cheri_imm21_o, + output logic [OPDW-1:0] cheri_operator_o, + output logic [4:0] cheri_cs2_dec_o, + output logic cheri_load_o, + output logic cheri_store_o, + + input logic cheri_ex_valid_i, + input logic cheri_ex_err_i, + input logic [11:0] cheri_ex_err_info_i, + input logic cheri_wb_err_i, + input logic [15:0] cheri_wb_err_info_i, + input logic cheri_branch_req_i, // from cheri EX + input logic [31:0] cheri_branch_target_i +); + + import cheriot_pkg::*; + + // Decoder/Controller, ID stage internal signals + logic illegal_insn_dec; + logic ebrk_insn; + logic mret_insn_dec; + logic dret_insn_dec; + logic ecall_insn_dec; + logic wfi_insn_dec; + + logic wb_exception; + logic unused_id_exception; + logic id_exception_nc; + + logic branch_in_dec; + logic branch_set, branch_set_raw, branch_set_raw_d; + logic branch_jump_set_done_q, branch_jump_set_done_d; + logic branch_not_set; + logic branch_taken; + logic jump_in_dec; + logic jump_set_dec; + logic jump_set, jump_set_raw; + + logic instr_first_cycle; + logic instr_executing_spec; + logic instr_executing; + logic instr_done; + logic controller_run; + logic stall_ld_hz; + logic stall_mem; + logic stall_multdiv; + logic stall_branch; + logic stall_jump; + logic stall_id; + logic stall_wb; + logic stall_cheri; + logic flush_id; + logic multicycle_done; + + // Immediate decoding and sign extension + logic [31:0] imm_i_type; + logic [31:0] imm_s_type; + logic [31:0] imm_b_type; + logic [31:0] imm_u_type; + logic [31:0] imm_j_type; + logic [31:0] zimm_rs1_type; + + logic [31:0] imm_a; // contains the immediate for operand b + logic [31:0] imm_b; // contains the immediate for operand b + + // Register file interface + + rf_wd_sel_e rf_wdata_sel; + logic rf_we_dec, rf_we_raw; + logic rf_ren_a, rf_ren_b; + logic rf_ren_a_dec, rf_ren_b_dec; + logic rf_we_or_load; + + // Read enables should only be asserted for valid and legal instructions + assign rf_ren_a = instr_valid_i & ~instr_fetch_err_i & ~illegal_insn_o & rf_ren_a_dec; + assign rf_ren_b = instr_valid_i & ~instr_fetch_err_i & ~illegal_insn_o & rf_ren_b_dec; + + assign rf_ren_a_o = rf_ren_a; + assign rf_ren_b_o = rf_ren_b; + + logic [31:0] rf_rdata_a_fwd; + logic [31:0] rf_rdata_b_fwd; + + logic cheri_lsu_req_dec; + logic cheri_multicycle_dec; + logic ex_valid_all; + + // ALU Control + alu_op_e alu_operator; + op_a_sel_e alu_op_a_mux_sel, alu_op_a_mux_sel_dec; + op_b_sel_e alu_op_b_mux_sel, alu_op_b_mux_sel_dec; + logic alu_multicycle_dec; + logic stall_alu; + + logic [33:0] imd_val_q[2]; + + op_a_sel_e bt_a_mux_sel; + imm_b_sel_e bt_b_mux_sel; + + imm_a_sel_e imm_a_mux_sel; + imm_b_sel_e imm_b_mux_sel, imm_b_mux_sel_dec; + + // Multiplier Control + logic mult_en_id, mult_en_dec; // use integer multiplier + logic div_en_id, div_en_dec; // use integer division or reminder + logic multdiv_en_dec; + md_op_e multdiv_operator; + logic [1:0] multdiv_signed_mode; + + // Data Memory Control + logic lsu_we; + logic [1:0] lsu_type; + logic lsu_sign_ext; + logic lsu_req, lsu_req_dec; + logic data_req_allowed; + + // CSR control + logic csr_pipe_flush; + logic csr_cheri_always_ok; + + logic [31:0] alu_operand_a; + logic [31:0] alu_operand_b; + + logic stall_cheri_trvk; + logic instr_is_legal_cheri; + + ///////////// + // LSU Mux // + ///////////// + + // Misaligned loads/stores result in two aligned loads/stores, compute second address + assign alu_op_a_mux_sel = lsu_addr_incr_req_i ? OP_A_FWD : alu_op_a_mux_sel_dec; + assign alu_op_b_mux_sel = lsu_addr_incr_req_i ? OP_B_IMM : alu_op_b_mux_sel_dec; + assign imm_b_mux_sel = lsu_addr_incr_req_i ? IMM_B_INCR_ADDR : imm_b_mux_sel_dec; + + /////////////////// + // Operand MUXES // + /////////////////// + + // Main ALU immediate MUX for Operand A + assign imm_a = (imm_a_mux_sel == IMM_A_Z) ? zimm_rs1_type : '0; + + // Main ALU MUX for Operand A + always_comb begin : alu_operand_a_mux + unique case (alu_op_a_mux_sel) + OP_A_REG_A: alu_operand_a = rf_rdata_a_fwd; + OP_A_FWD: alu_operand_a = lsu_addr_last_i; + OP_A_CURRPC: alu_operand_a = pc_id_i; + OP_A_IMM: alu_operand_a = imm_a; + default: alu_operand_a = pc_id_i; + endcase + end + + if (BranchTargetALU) begin : g_btalu_muxes + // Branch target ALU operand A mux + always_comb begin : bt_operand_a_mux + unique case (bt_a_mux_sel) + OP_A_REG_A: bt_a_operand_o = rf_rdata_a_fwd; + OP_A_CURRPC: bt_a_operand_o = pc_id_i; + default: bt_a_operand_o = pc_id_i; + endcase + end + + // Branch target ALU operand B mux + always_comb begin : bt_immediate_b_mux + unique case (bt_b_mux_sel) + IMM_B_I: bt_b_operand_o = imm_i_type; + IMM_B_B: bt_b_operand_o = imm_b_type; + IMM_B_J: bt_b_operand_o = imm_j_type; + IMM_B_INCR_PC: bt_b_operand_o = instr_is_compressed_i ? 32'h2 : 32'h4; + default: bt_b_operand_o = instr_is_compressed_i ? 32'h2 : 32'h4; + endcase + end + + // Reduced main ALU immediate MUX for Operand B + always_comb begin : immediate_b_mux + unique case (imm_b_mux_sel) + IMM_B_I: imm_b = imm_i_type; + IMM_B_S: imm_b = imm_s_type; + IMM_B_U: imm_b = imm_u_type; + IMM_B_INCR_PC: imm_b = instr_is_compressed_i ? 32'h2 : 32'h4; + IMM_B_INCR_ADDR: imm_b = 32'h4; + default: imm_b = 32'h4; + endcase + end + `ASSERT(IbexImmBMuxSelValid, instr_valid_i |-> imm_b_mux_sel inside { + IMM_B_I, + IMM_B_S, + IMM_B_U, + IMM_B_INCR_PC, + IMM_B_INCR_ADDR}) + end else begin : g_nobtalu + op_a_sel_e unused_a_mux_sel; + imm_b_sel_e unused_b_mux_sel; + + assign unused_a_mux_sel = bt_a_mux_sel; + assign unused_b_mux_sel = bt_b_mux_sel; + assign bt_a_operand_o = '0; + assign bt_b_operand_o = '0; + + // Full main ALU immediate MUX for Operand B + always_comb begin : immediate_b_mux + unique case (imm_b_mux_sel) + IMM_B_I: imm_b = imm_i_type; + IMM_B_S: imm_b = imm_s_type; + IMM_B_B: imm_b = imm_b_type; + IMM_B_U: imm_b = imm_u_type; + IMM_B_J: imm_b = imm_j_type; + IMM_B_INCR_PC: imm_b = instr_is_compressed_i ? 32'h2 : 32'h4; + IMM_B_INCR_ADDR: imm_b = 32'h4; + default: imm_b = 32'h4; + endcase + end + `ASSERT(IbexImmBMuxSelValid, instr_valid_i |-> imm_b_mux_sel inside { + IMM_B_I, + IMM_B_S, + IMM_B_B, + IMM_B_U, + IMM_B_J, + IMM_B_INCR_PC, + IMM_B_INCR_ADDR}) + end + + // ALU MUX for Operand B + assign alu_operand_b = (alu_op_b_mux_sel == OP_B_IMM) ? imm_b : rf_rdata_b_fwd; + + ///////////////////////////////////////// + // Multicycle Operation Stage Register // + ///////////////////////////////////////// + + for (genvar i = 0; i < 2; i++) begin : gen_intermediate_val_reg + always_ff @(posedge clk_i or negedge rst_ni) begin : intermediate_val_reg + if (!rst_ni) begin + imd_val_q[i] <= '0; + end else if (imd_val_we_ex_i[i]) begin + imd_val_q[i] <= imd_val_d_ex_i[i]; + end + end + end + + assign imd_val_q_ex_o = imd_val_q; + + /////////////////////// + // Register File MUX // + /////////////////////// + + // Suppress register write if there is an illegal CSR access or instruction is not executing + assign rf_we_id_o = rf_we_raw & instr_executing & ~illegal_csr_insn_i; + + // Register file write data mux + always_comb begin : rf_wdata_id_mux + unique case (rf_wdata_sel) + RF_WD_EX: rf_wdata_id_o = result_ex_i; + RF_WD_CSR: rf_wdata_id_o = csr_rdata_i; + default: rf_wdata_id_o = result_ex_i; + endcase + end + + ///////////// + // Decoder // + ///////////// + + cheriot_decoder #( + .RV32E (RV32E), + .RV32M (RV32M), + .RV32B (RV32B), + .BranchTargetALU(BranchTargetALU), + .CHERIoTEn (CHERIoTEn), + .CheriPPLBC (CheriPPLBC), + .CheriSBND2 (CheriSBND2) + ) decoder_i ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + .cheri_pmode_i (cheri_pmode_i), + .cheri_tsafe_en_i (cheri_tsafe_en_i), + // controller + .illegal_insn_o(illegal_insn_dec), + .ebrk_insn_o (ebrk_insn), + .mret_insn_o (mret_insn_dec), + .dret_insn_o (dret_insn_dec), + .ecall_insn_o (ecall_insn_dec), + .wfi_insn_o (wfi_insn_dec), + .jump_set_o (jump_set_dec), + .branch_taken_i(branch_taken), + .icache_inval_o(icache_inval_o), + + // from IF-ID pipeline register + .instr_first_cycle_i(instr_first_cycle), + .instr_rdata_i (instr_rdata_i), + .instr_rdata_alu_i (instr_rdata_alu_i), + .illegal_c_insn_i (illegal_c_insn_i), + + // immediates + .imm_a_mux_sel_o(imm_a_mux_sel), + .imm_b_mux_sel_o(imm_b_mux_sel_dec), + .bt_a_mux_sel_o (bt_a_mux_sel), + .bt_b_mux_sel_o (bt_b_mux_sel), + + .imm_i_type_o (imm_i_type), + .imm_s_type_o (imm_s_type), + .imm_b_type_o (imm_b_type), + .imm_u_type_o (imm_u_type), + .imm_j_type_o (imm_j_type), + .zimm_rs1_type_o(zimm_rs1_type), + + // register file + .rf_wdata_sel_o(rf_wdata_sel), + .rf_we_o (rf_we_dec), + .rf_we_or_load_o(rf_we_or_load), + + .rf_raddr_a_o(rf_raddr_a_o), + .rf_raddr_b_o(rf_raddr_b_o), + .rf_waddr_o (rf_waddr_id_o), + .rf_ren_a_o (rf_ren_a_dec), + .rf_ren_b_o (rf_ren_b_dec), + + // ALU + .alu_operator_o (alu_operator), + .alu_op_a_mux_sel_o(alu_op_a_mux_sel_dec), + .alu_op_b_mux_sel_o(alu_op_b_mux_sel_dec), + .alu_multicycle_o (alu_multicycle_dec), + + // MULT & DIV + .mult_en_o (mult_en_dec), + .div_en_o (div_en_dec), + .mult_sel_o (mult_sel_ex_o), + .div_sel_o (div_sel_ex_o), + .multdiv_operator_o (multdiv_operator), + .multdiv_signed_mode_o(multdiv_signed_mode), + + // CSRs + .csr_access_o(csr_access_o), + .csr_op_o (csr_op_o), + .csr_cheri_always_ok_o (csr_cheri_always_ok), + + // LSU + .data_req_o (lsu_req_dec), + .cheri_data_req_o (cheri_lsu_req_dec), + .data_we_o (lsu_we), + .data_type_o (lsu_type), + .data_sign_extension_o(lsu_sign_ext), + + // jump/branches + .jump_in_dec_o (jump_in_dec), + .branch_in_dec_o(branch_in_dec), + + // cheri signals + .instr_is_cheri_o (instr_is_cheri_id_o), + .instr_is_legal_cheri_o (instr_is_legal_cheri), + .cheri_imm12_o (cheri_imm12_o), + .cheri_imm20_o (cheri_imm20_o), + .cheri_imm21_o (cheri_imm21_o), + .cheri_operator_o (cheri_operator_o), + .cheri_cs2_dec_o (cheri_cs2_dec_o), + .cheri_multicycle_dec_o (cheri_multicycle_dec) + ); + + // assign cheri_lsu_req_dec = cheri_load_o | cheri_store_o; + assign instr_is_rv32lsu_id_o = lsu_req_dec; // go to cheri_ex + + assign ex_valid_all = instr_is_cheri_id_o ? cheri_ex_valid_i : ex_valid_i; + + // If use "internal" CLBC, execution is sequential/multicyle. Otherwise use pipelined version. + assign cheri_load_o = cheri_operator_o[CLOAD_CAP] & (~cheri_tsafe_en_i | CheriPPLBC); + + assign cheri_store_o = cheri_operator_o[CSTORE_CAP]; + + + ///////////////////////////////// + // CSR-related pipline flushes // + ///////////////////////////////// + always_comb begin : csr_pipeline_flushes + csr_pipe_flush = 1'b0; + + // A pipeline flush is needed to let the controller react after modifying certain CSRs: + // - When enabling interrupts, pending IRQs become visible to the controller only during + // the next cycle. If during that cycle the core disables interrupts again, it does not + // see any pending IRQs and consequently does not start to handle interrupts. + // - When modifying debug CSRs - TODO: Check if this is really needed + if (csr_op_en_o == 1'b1 && (csr_op_o == CSR_OP_WRITE || csr_op_o == CSR_OP_SET)) begin + if (csr_num_e'(instr_rdata_i[31:20]) == CSR_MSTATUS || + csr_num_e'(instr_rdata_i[31:20]) == CSR_MIE) begin + csr_pipe_flush = 1'b1; + end + end else if (csr_op_en_o == 1'b1 && csr_op_o != CSR_OP_READ) begin + if (csr_num_e'(instr_rdata_i[31:20]) == CSR_DCSR || + csr_num_e'(instr_rdata_i[31:20]) == CSR_DPC || + csr_num_e'(instr_rdata_i[31:20]) == CSR_DSCRATCH0 || + csr_num_e'(instr_rdata_i[31:20]) == CSR_DSCRATCH1) begin + csr_pipe_flush = 1'b1; + end + end + end + + //////////////// + // Controller // + //////////////// + + assign illegal_insn_o = instr_valid_i & (illegal_insn_dec | illegal_csr_insn_i); + + cheriot_controller #( + .CHERIoTEn (CHERIoTEn), + .WritebackStage (WritebackStage), + .BranchPredictor(BranchPredictor) + ) controller_i ( + .clk_i (clk_i), + .rst_ni(rst_ni), + .cheri_pmode_i (cheri_pmode_i), + .ctrl_busy_o(ctrl_busy_o), + + // decoder related signals + .illegal_insn_i (illegal_insn_o), + .ecall_insn_i (ecall_insn_dec), + .mret_insn_i (mret_insn_dec), + .dret_insn_i (dret_insn_dec), + .wfi_insn_i (wfi_insn_dec), + .ebrk_insn_i (ebrk_insn), + .csr_pipe_flush_i(csr_pipe_flush), + .csr_access_i (csr_access_o), + .csr_cheri_always_ok_i (csr_cheri_always_ok), + + // from IF-ID pipeline + .instr_valid_i (instr_valid_i), + .instr_i (instr_rdata_i), + .instr_compressed_i (instr_rdata_c_i), + .instr_is_compressed_i (instr_is_compressed_i), + .instr_bp_taken_i (instr_bp_taken_i), + .instr_fetch_err_i (instr_fetch_err_i), + .instr_fetch_err_plus2_i(instr_fetch_err_plus2_i), + .instr_fetch_cheri_acc_vio_i (instr_fetch_cheri_acc_vio_i), + .instr_fetch_cheri_bound_vio_i (instr_fetch_cheri_bound_vio_i), + + .pc_id_i (pc_id_i), + + // to IF-ID pipeline + .instr_valid_clear_o(instr_valid_clear_o), + .id_in_ready_o (id_in_ready_o), + .controller_run_o (controller_run), + + // to prefetcher + .instr_req_o (instr_req_o), + .pc_set_o (pc_set_o), + .pc_mux_o (pc_mux_o), + .nt_branch_mispredict_o(nt_branch_mispredict_o), + .exc_pc_mux_o (exc_pc_mux_o), + .exc_cause_o (exc_cause_o), + + // LSU + .lsu_addr_last_i(lsu_addr_last_i), + .load_err_i (lsu_load_err_i), + .store_err_i (lsu_store_err_i), + .lsu_err_is_cheri_i (lsu_err_is_cheri_i), + .wb_exception_o (wb_exception), + .id_exception_o (unused_id_exception), + .id_exception_nc_o (id_exception_nc), + + // jump/branch control + .branch_set_i (branch_set), + .branch_not_set_i (branch_not_set), + .jump_set_i (jump_set), + + // interrupt signals + .csr_mstatus_mie_i(csr_mstatus_mie_i), + .irq_pending_i (irq_pending_i), + .irqs_i (irqs_i), + .irq_nm_i (irq_nm_i), + .nmi_mode_o (nmi_mode_o), + + // CSR Controller Signals + .csr_save_if_o (csr_save_if_o), + .csr_save_id_o (csr_save_id_o), + .csr_save_wb_o (csr_save_wb_o), + .csr_restore_mret_id_o(csr_restore_mret_id_o), + .csr_restore_dret_id_o(csr_restore_dret_id_o), + .csr_save_cause_o (csr_save_cause_o), + .csr_mepcc_clrtag_o (csr_mepcc_clrtag_o), + .csr_mtval_o (csr_mtval_o), + .priv_mode_i (priv_mode_i), + .csr_mstatus_tw_i (csr_mstatus_tw_i), + .csr_pcc_perm_sr_i (csr_pcc_perm_sr_i), + + // Debug Signal + .debug_mode_o (debug_mode_o), + .debug_cause_o (debug_cause_o), + .debug_csr_save_o (debug_csr_save_o), + .debug_req_i (debug_req_i), + .debug_single_step_i(debug_single_step_i), + .debug_ebreakm_i (debug_ebreakm_i), + .debug_ebreaku_i (debug_ebreaku_i), + .trigger_match_i (trigger_match_i), + + .stall_id_i(stall_id), + .stall_wb_i(stall_wb), + .flush_id_o(flush_id), + .ready_wb_i(ready_wb_i), + + // Performance Counters + .perf_jump_o (perf_jump_o), + .perf_tbranch_o(perf_tbranch_o), + + .instr_is_cheri_i (instr_is_cheri_id_o) , + .cheri_ex_valid_i (cheri_ex_valid_i) , + .cheri_ex_err_i (cheri_ex_err_i) , + .cheri_ex_err_info_i (cheri_ex_err_info_i) , + .cheri_wb_err_i (cheri_wb_err_i) , + .cheri_wb_err_info_i (cheri_wb_err_info_i) , + .cheri_branch_req_i (cheri_branch_req_i) , // from cheri EX + .cheri_branch_target_i (cheri_branch_target_i) + ); + + assign multdiv_en_dec = mult_en_dec | div_en_dec; + + // note data_req_allowed is already part of instr_executing + assign lsu_req = instr_executing ? data_req_allowed & lsu_req_dec : 1'b0; + assign mult_en_id = instr_executing ? mult_en_dec : 1'b0; + assign div_en_id = instr_executing ? div_en_dec : 1'b0; + + assign lsu_req_o = lsu_req; + assign lsu_we_o = lsu_we; + assign lsu_type_o = lsu_type; + assign lsu_sign_ext_o = lsu_sign_ext; + assign lsu_wdata_o = rf_rdata_b_fwd; + // csr_op_en_o is set when CSR access should actually happen. + // csv_access_o is set when CSR access instruction is present and is used to compute whether a CSR + // access is illegal. A combinational loop would be created if csr_op_en_o was used along (as + // asserting it for an illegal csr access would result in a flush that would need to deassert it). + + // assign csr_op_en_o = csr_access_o & instr_executing & instr_id_done_o; + // improve timing for CHERIoT mode (instr_id_done has too much logic) + assign csr_op_en_o = csr_access_o & instr_executing & + (CHERIoTEn ? instr_first_cycle : instr_id_done_o); + + assign alu_operator_ex_o = alu_operator; + assign alu_operand_a_ex_o = alu_operand_a; + assign alu_operand_b_ex_o = alu_operand_b; + + assign mult_en_ex_o = mult_en_id; + assign div_en_ex_o = div_en_id; + + assign multdiv_operator_ex_o = multdiv_operator; + assign multdiv_signed_mode_ex_o = multdiv_signed_mode; + assign multdiv_operand_a_ex_o = rf_rdata_a_fwd; + assign multdiv_operand_b_ex_o = rf_rdata_b_fwd; + + //////////////////////// + // Branch set control // + //////////////////////// + + if (BranchTargetALU && !DataIndTiming) begin : g_branch_set_direct + // Branch set fed straight to controller with branch target ALU + // (condition pass/fail used same cycle as generated instruction request) + assign branch_set_raw = branch_set_raw_d; + end else begin : g_branch_set_flop + // Branch set flopped without branch target ALU, or in fixed time execution mode + // (condition pass/fail used next cycle where branch target is calculated) + logic branch_set_raw_q; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + branch_set_raw_q <= 1'b0; + // bug here (see the 07082022 report). should qualify this with instr_executing + // (same as id_fsm_q). let's wait for now and fix later QQQ + end else begin + branch_set_raw_q <= branch_set_raw_d; + end + end + + // Branches always take two cycles in fixed time execution mode, with or without the branch + // target ALU (to avoid a path from the branch decision into the branch target ALU operand + // muxing). + assign branch_set_raw = (BranchTargetALU && !data_ind_timing_i) ? branch_set_raw_d : + branch_set_raw_q; + + end + + // Track whether the current instruction in ID/EX has done a branch or jump set. + assign branch_jump_set_done_d = (branch_set_raw | jump_set_raw | branch_jump_set_done_q) & + ~instr_valid_clear_o; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + branch_jump_set_done_q <= 1'b0; + end else begin + branch_jump_set_done_q <= branch_jump_set_done_d; + end + end + + // the _raw signals from the state machine may be asserted for multiple cycles when + // instr_executing_spec is asserted and instr_executing is not asserted. This may occur where + // a memory error is seen or a there are outstanding memory accesses (indicate a load or store is + // in the WB stage). The branch or jump speculatively begins the fetch but is held back from + // completing until it is certain the outstanding access hasn't seen a memory error. This logic + // ensures only the first cycle of a branch or jump set is sent to the controller to prevent + // needless extra IF flushes and fetches. + assign jump_set = jump_set_raw & ~branch_jump_set_done_q; + assign branch_set = branch_set_raw & ~branch_jump_set_done_q; + + // Branch condition is calculated in the first cycle and flopped for use in the second cycle + // (only used in fixed time execution mode to determine branch destination). + if (DataIndTiming) begin : g_sec_branch_taken + logic branch_taken_q; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + branch_taken_q <= 1'b0; + end else begin + branch_taken_q <= branch_decision_i; + end + end + + assign branch_taken = ~data_ind_timing_i | branch_taken_q; + + end else begin : g_nosec_branch_taken + + // Signal unused without fixed time execution mode - only taken branches will trigger + // branch_set_raw + assign branch_taken = 1'b1; + + end + + // Holding branch_set/jump_set high for more than one cycle should not cause a functional issue. + // However it could generate needless prefetch buffer flushes and instruction fetches. The ID/EX + // designs ensures that this never happens for non-predicted branches. + `ASSERT(NeverDoubleBranch, branch_set & ~instr_bp_taken_i |=> ~branch_set) + `ASSERT(NeverDoubleJump, jump_set & ~instr_bp_taken_i |=> ~jump_set) + + ////////////////////////////// + // Branch not-taken address // + ////////////////////////////// + + if (BranchPredictor) begin : g_calc_nt_addr + assign nt_branch_addr_o = pc_id_i + (instr_is_compressed_i ? 32'd2 : 32'd4); + end else begin : g_n_calc_nt_addr + assign nt_branch_addr_o = 32'd0; + end + + /////////////// + // ID-EX FSM // + /////////////// + + typedef enum logic { FIRST_CYCLE, MULTI_CYCLE } id_fsm_e; + id_fsm_e id_fsm_q, id_fsm_d; + + always_ff @(posedge clk_i or negedge rst_ni) begin : id_pipeline_reg + if (!rst_ni) begin + id_fsm_q <= FIRST_CYCLE; + end else if (instr_executing) begin + id_fsm_q <= id_fsm_d; + end + end + + // ID/EX stage can be in two states, FIRST_CYCLE and MULTI_CYCLE. An instruction enters + // MULTI_CYCLE if it requires multiple cycles to complete regardless of stalls and other + // considerations. An instruction may be held in FIRST_CYCLE if it's unable to begin executing + // (this is controlled by instr_executing). + + always_comb begin + id_fsm_d = id_fsm_q; + rf_we_raw = rf_we_dec; + stall_multdiv = 1'b0; + stall_jump = 1'b0; + stall_branch = 1'b0; + stall_alu = 1'b0; + stall_cheri = 1'b0; + branch_set_raw_d = 1'b0; + branch_not_set = 1'b0; + jump_set_raw = 1'b0; + perf_branch_o = 1'b0; + + if (instr_executing_spec) begin + unique case (id_fsm_q) + FIRST_CYCLE: begin + unique case (1'b1) + lsu_req_dec: begin + if (!WritebackStage) begin + // LSU operation + id_fsm_d = MULTI_CYCLE; + end else if(~lsu_req_done_i) begin + id_fsm_d = MULTI_CYCLE; + end + end + cheri_lsu_req_dec: begin + if (cheri_pmode_i) begin + if (!WritebackStage) begin + id_fsm_d = MULTI_CYCLE; + end else if(~lsu_req_done_i) begin // covers the lsu_cheri_err case (1cycle) + id_fsm_d = MULTI_CYCLE; + end + end + end + multdiv_en_dec: begin + // MUL or DIV operation + if (~ex_valid_i) begin + // When single-cycle multiply is configured mul can finish in the first cycle so + // only enter MULTI_CYCLE state if a result isn't immediately available + id_fsm_d = MULTI_CYCLE; + rf_we_raw = 1'b0; + stall_multdiv = 1'b1; + end + end + branch_in_dec: begin + // cond branch operation + // All branches take two cycles in fixed time execution mode, regardless of branch + // condition. + id_fsm_d = (data_ind_timing_i || (!BranchTargetALU && branch_decision_i)) ? + MULTI_CYCLE : FIRST_CYCLE; + stall_branch = (~BranchTargetALU & branch_decision_i) | data_ind_timing_i; + branch_set_raw_d = (branch_decision_i | data_ind_timing_i); + + if (BranchPredictor) begin + branch_not_set = ~branch_decision_i; + end + + perf_branch_o = 1'b1; + end + jump_in_dec: begin + // uncond branch operation + // BTALU means jumps only need one cycle + id_fsm_d = BranchTargetALU ? FIRST_CYCLE : MULTI_CYCLE; + stall_jump = ~BranchTargetALU; + jump_set_raw = jump_set_dec; + end + alu_multicycle_dec: begin + stall_alu = 1'b1; + id_fsm_d = MULTI_CYCLE; + rf_we_raw = 1'b0; + end + cheri_multicycle_dec: begin + if (cheri_pmode_i) begin + id_fsm_d = MULTI_CYCLE; + rf_we_raw = 1'b0; + stall_cheri = 1'b1; + end + end + default: begin + id_fsm_d = FIRST_CYCLE; + end + endcase + end + + MULTI_CYCLE: begin + if(multdiv_en_dec) begin + rf_we_raw = rf_we_dec & ex_valid_i; + end + + if (multicycle_done & ready_wb_i) begin + id_fsm_d = FIRST_CYCLE; + end else begin + stall_multdiv = multdiv_en_dec; + stall_branch = branch_in_dec; + stall_jump = jump_in_dec; + stall_cheri = cheri_multicycle_dec; + end + end + + default: begin + id_fsm_d = FIRST_CYCLE; + end + endcase + end + end + + // Note for the two-stage configuration ready_wb_i is always set + assign multdiv_ready_id_o = ready_wb_i; + + `ASSERT(StallIDIfMulticycle, (id_fsm_q == FIRST_CYCLE) & (id_fsm_d == MULTI_CYCLE) |-> stall_id) + + + // Stall ID/EX stage for reason that relates to instruction in ID/EX, update assertion below if + // modifying this. + assign stall_id = stall_ld_hz | stall_mem | stall_multdiv | stall_jump | stall_branch | stall_cheri | + stall_alu | stall_cheri_trvk; + + // Generally illegal instructions have no reason to stall, however they must still stall waiting + // for outstanding memory requests so exceptions related to them take priority over the illegal + // instruction exception. + `ASSERT(IllegalInsnStallMustBeMemStall, illegal_insn_o & stall_id |-> stall_mem & + ~(stall_ld_hz | stall_multdiv | stall_jump | stall_branch | stall_alu | stall_cheri_trvk)) + + assign instr_done = ~stall_id & ~flush_id & instr_executing; + + // Signal instruction in ID is in it's first cycle. It can remain in its + // first cycle if it is stalled. + assign instr_first_cycle = instr_valid_i & (id_fsm_q == FIRST_CYCLE); + // Used by RVFI to know when to capture register read data + // Used by ALU to access RS3 if ternary instruction. + assign instr_first_cycle_id_o = instr_first_cycle; + + if (WritebackStage) begin : gen_stall_mem + // Register read address matches write address in WB + logic rf_rd_a_wb_match; + logic rf_rd_b_wb_match; + // Hazard between registers being read and written + logic rf_rd_a_hz; + logic rf_rd_b_hz; + + logic outstanding_memory_access; + + logic instr_kill; + + assign multicycle_done = (lsu_req_dec|cheri_lsu_req_dec) ? ~stall_mem : ex_valid_all; + + // Is a memory access ongoing that isn't finishing this cycle + assign outstanding_memory_access = (outstanding_load_wb_i | outstanding_store_wb_i) & + ~lsu_resp_valid_i; + + // Can start a new memory access if any previous one has finished or is finishing + assign data_req_allowed = ~outstanding_memory_access; + + // Instruction won't execute because: + // - There is a pending exception in writeback + // The instruction in ID/EX will be flushed and the core will jump to an exception handler + // - The controller isn't running instructions + // This either happens in preparation for a flush and jump to an exception handler e.g. in + // response to an IRQ or debug request or whilst the core is sleeping or resetting/fetching + // first instruction in which case any valid instruction in ID/EX should be ignored. + // - There was an error on instruction fetch + + // cheri instr can only generate exception after execution + // exclude cheri EX exception from insr_kill improves timing + + assign instr_kill = instr_fetch_err_i | + wb_exception | + id_exception_nc | // exclude cheri EX exceptions + ~controller_run; + + // With writeback stage instructions must be prevented from executing if there is: + // - A load hazard + // - A pending memory access + // If it receives an error response this results in a precise exception from WB so ID/EX + // instruction must not execute until error response is known). + // - A load/store error + // This will cause a precise exception for the instruction in WB so ID/EX instruction must not + // execute + // + // instr_executing_spec is a speculative signal. It indicates an instruction can execute + // assuming there are no exceptions from writeback and any outstanding memory access won't + // receive an error. It is required so branch and jump requests don't factor in an incoming dmem + // error (that in turn would factor directly into imem requests leading to a feedthrough path). + // + // instr_executing is the full signal, it will only allow execution once any potential + // exceptions from writeback have been resolved. + assign instr_executing_spec = instr_valid_i & + ~instr_fetch_err_i & + controller_run & + ~stall_ld_hz & + ~stall_cheri_trvk; + + assign instr_executing = instr_valid_i & + ~instr_kill & + ~stall_ld_hz & + ~stall_cheri_trvk & + ~outstanding_memory_access; + + // allowing a cheri instruction to start execution - valid instruction not stalled by WB/hz + // note we can't use_instr_kill here since it includes id_exception (cherr_ex_err), which causes a + // comb loop. + + assign cheri_exec_id_o = cheri_pmode_i & instr_valid_i & + ~instr_fetch_err_i & + instr_is_legal_cheri & + controller_run & + ~wb_exception & + ~stall_ld_hz & + ~stall_cheri_trvk & + ~outstanding_memory_access; + + + `ASSERT(IbexExecutingSpecIfExecuting, instr_executing |-> instr_executing_spec) + + `ASSERT(IbexStallIfValidInstrNotExecuting, + instr_valid_i & ~instr_kill & ~instr_executing |-> stall_id) + + `ASSERT(IbexCannotRetireWithPendingExceptions, + instr_done |-> ~(wb_exception | outstanding_memory_access)) + + // Stall for reasons related to memory: + // * There is an outstanding memory access that won't resolve this cycle (need to wait to allow + // precise exceptions) + // * There is a load/store request not being granted or which is unaligned and waiting to issue + // a second request (needs to stay in ID for the address calculation) + + + // For pipeline timing/stalling, we treat cheri data load/stores the same as legacy RV32 load/stores + assign stall_mem = instr_valid_i & (outstanding_memory_access | + ((lsu_req_dec | cheri_lsu_req_dec) & ~lsu_req_done_i)); + + // If we stall a load in ID for any reason, it must not make an LSU request + // (otherwide we might issue two requests for the same instruction) + `ASSERT(IbexStallMemNoRequest, + instr_valid_i & lsu_req_dec & ~instr_done |-> ~lsu_req_done_i) + + assign rf_rd_a_wb_match = (rf_waddr_wb_i == rf_raddr_a_o) & |rf_raddr_a_o; + assign rf_rd_b_wb_match = (rf_waddr_wb_i == rf_raddr_b_o) & |rf_raddr_b_o; + + assign rf_rd_a_wb_match_o = rf_rd_a_wb_match; + assign rf_rd_b_wb_match_o = rf_rd_b_wb_match; + + // If instruction is reading register that load will be writing stall in + // ID until load is complete. No need to stall when reading zero register. + assign rf_rd_a_hz = rf_rd_a_wb_match & rf_ren_a; + assign rf_rd_b_hz = rf_rd_b_wb_match & rf_ren_b; + + // If instruction is read register that writeback is writing forward writeback data to read + // data. Note this doesn't factor in load data as it arrives too late, such hazards are + // resolved via a stall (see above). + assign rf_rdata_a_fwd = rf_rd_a_wb_match & rf_write_wb_i ? rf_wdata_fwd_wb_i : rf_rdata_a_i; + assign rf_rdata_b_fwd = rf_rd_b_wb_match & rf_write_wb_i ? rf_wdata_fwd_wb_i : rf_rdata_b_i; + + assign stall_ld_hz = outstanding_load_wb_i & (rf_rd_a_hz | rf_rd_b_hz); + + logic rf_we_or_load_valid; + assign rf_we_or_load_valid = rf_we_or_load & instr_valid_i & ~instr_fetch_err_i & ~illegal_insn_o; + + + assign stall_cheri_trvk = (CHERIoTEn & cheri_pmode_i & CheriPPLBC) ? + ((rf_ren_a && ~rf_reg_rdy_i[rf_raddr_a_o]) | + (rf_ren_b && ~rf_reg_rdy_i[rf_raddr_b_o]) | + (rf_we_or_load_valid && ~rf_reg_rdy_i[rf_waddr_id_o])) : + 1'b0; + + assign instr_type_wb_o = ~lsu_req_dec ? WB_INSTR_OTHER : + lsu_we ? WB_INSTR_STORE : + WB_INSTR_LOAD; + + assign instr_id_done_o = en_wb_o & ready_wb_i; + + // Stall ID/EX as instruction in ID/EX cannot proceed to writeback yet + assign stall_wb = en_wb_o & ~ready_wb_i; + + assign perf_dside_wait_o = instr_valid_i & ~instr_kill & + (outstanding_memory_access | stall_ld_hz | stall_cheri_trvk); + end else begin : gen_no_stall_mem + + assign multicycle_done = (cheri_lsu_req_dec | lsu_req_dec) ? lsu_resp_valid_i : ex_valid_all; + + assign data_req_allowed = instr_first_cycle; + + // Without Writeback Stage always stall the first cycle of a load/store. + // Then stall until it is complete + assign stall_mem = instr_valid_i & ((lsu_req_dec | cheri_lsu_req_dec) & (~lsu_resp_valid_i | instr_first_cycle)); + + // No load hazards without Writeback Stage + assign stall_ld_hz = 1'b0; + assign stall_cheri_trvk = 1'b0; // CheriPPLBC can't work with 2-stage pipeline configuration + + // Without writeback stage any valid instruction that hasn't seen an error will execute + assign instr_executing_spec = instr_valid_i & ~instr_fetch_err_i & controller_run; + assign instr_executing = instr_executing_spec; + assign cheri_exec_id_o = instr_executing; + + `ASSERT(IbexStallIfValidInstrNotExecuting, + instr_valid_i & ~instr_fetch_err_i & ~instr_executing & controller_run |-> stall_id) + + // No data forwarding without writeback stage so always take source register data direct from + // register file + assign rf_rdata_a_fwd = rf_rdata_a_i; + assign rf_rdata_b_fwd = rf_rdata_b_i; + + assign rf_rd_a_wb_match_o = 1'b0; + assign rf_rd_b_wb_match_o = 1'b0; + + // Unused Writeback stage only IO & wiring + // Assign inputs and internal wiring to unused signals to satisfy lint checks + // Tie-off outputs to constant values + logic unused_data_req_done_ex; + logic [4:0] unused_rf_waddr_wb; + logic unused_rf_write_wb; + logic unused_outstanding_load_wb; + logic unused_outstanding_store_wb; + logic unused_wb_exception; + logic [31:0] unused_rf_wdata_fwd_wb; + + assign unused_data_req_done_ex = lsu_req_done_i; + assign unused_rf_waddr_wb = rf_waddr_wb_i; + assign unused_rf_write_wb = rf_write_wb_i; + assign unused_outstanding_load_wb = outstanding_load_wb_i; + assign unused_outstanding_store_wb = outstanding_store_wb_i; + assign unused_wb_exception = wb_exception; + assign unused_rf_wdata_fwd_wb = rf_wdata_fwd_wb_i; + + assign instr_type_wb_o = WB_INSTR_OTHER; + assign stall_wb = 1'b0; + + assign perf_dside_wait_o = instr_executing & lsu_req_dec & ~lsu_resp_valid_i; + + assign instr_id_done_o = instr_done; + end + + // Signal which instructions to count as retired in minstret, all traps along with ebrk and + // ecall instructions are not counted. + assign instr_perf_count_id_o = ~ebrk_insn & ~ecall_insn_dec & ~illegal_insn_dec & + ~illegal_csr_insn_i & ~instr_fetch_err_i; + + // An instruction is ready to move to the writeback stage (or retire if there is no writeback + // stage) + assign en_wb_o = instr_done; + + assign perf_mul_wait_o = stall_multdiv & mult_en_dec; + assign perf_div_wait_o = stall_multdiv & div_en_dec; + + ////////// + // FCOV // + ////////// + + `DV_FCOV_SIGNAL_GEN_IF(logic, rf_rd_wb_hz, + (gen_stall_mem.rf_rd_a_hz | gen_stall_mem.rf_rd_b_hz) & instr_valid_i, WritebackStage) + `DV_FCOV_SIGNAL(logic, branch_taken, + instr_executing & (id_fsm_q == FIRST_CYCLE) & branch_decision_i) + `DV_FCOV_SIGNAL(logic, branch_not_taken, + instr_executing & (id_fsm_q == FIRST_CYCLE) & ~branch_decision_i) + + //////////////// + // Assertions // + //////////////// + + // Selectors must be known/valid. + `ASSERT_KNOWN_IF(IbexAluOpMuxSelKnown, alu_op_a_mux_sel, instr_valid_i) + `ASSERT(IbexAluAOpMuxSelValid, instr_valid_i |-> alu_op_a_mux_sel inside { + OP_A_REG_A, + OP_A_FWD, + OP_A_CURRPC, + OP_A_IMM}) + `ASSERT_KNOWN_IF(IbexBTAluAOpMuxSelKnown, bt_a_mux_sel, instr_valid_i) + `ASSERT(IbexBTAluAOpMuxSelValid, instr_valid_i |-> bt_a_mux_sel inside { + OP_A_REG_A, + OP_A_CURRPC}) + `ASSERT_KNOWN_IF(IbexBTAluBOpMuxSelKnown, bt_b_mux_sel, instr_valid_i) + `ASSERT(IbexBTAluBOpMuxSelValid, instr_valid_i |-> bt_b_mux_sel inside { + IMM_B_I, + IMM_B_B, + IMM_B_J, + IMM_B_INCR_PC}) + `ASSERT(IbexRegfileWdataSelValid, instr_valid_i |-> rf_wdata_sel inside { + RF_WD_EX, + RF_WD_CSR}) + `ASSERT_KNOWN(IbexWbStateKnown, id_fsm_q) + + // Branch decision must be valid when jumping. + `ASSERT_KNOWN_IF(IbexBranchDecisionValid, branch_decision_i, + instr_valid_i && !(illegal_csr_insn_i || instr_fetch_err_i)) + + // Instruction delivered to ID stage can not contain X. + `ASSERT_KNOWN_IF(IbexIdInstrKnown, instr_rdata_i, + instr_valid_i && !(illegal_c_insn_i || instr_fetch_err_i)) + + // Instruction delivered to ID stage can not contain X. + `ASSERT_KNOWN_IF(IbexIdInstrALUKnown, instr_rdata_alu_i, + instr_valid_i && !(illegal_c_insn_i || instr_fetch_err_i)) + + // Multicycle enable signals must be unique. + `ASSERT(IbexMulticycleEnableUnique, + $onehot0({lsu_req_dec, multdiv_en_dec, branch_in_dec, jump_in_dec})) + + // Duplicated instruction flops must match + // === as DV environment can produce instructions with Xs in, so must use precise match that + // includes Xs + `ASSERT(IbexDuplicateInstrMatch, instr_valid_i |-> instr_rdata_i === instr_rdata_alu_i) + + `ifdef CHECK_MISALIGNED + `ASSERT(IbexMisalignedMemoryAccess, !lsu_addr_incr_req_i) + `endif + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_if_stage.sv b/hw/ip/cheriot-ibex/rtl/cheriot_if_stage.sv new file mode 100644 index 0000000..2829dd5 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_if_stage.sv
@@ -0,0 +1,807 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Instruction Fetch Stage + * + * Instruction fetch unit: Selection of the next PC, and buffering (sampling) of + * the read instruction. + */ + +`include "prim_assert.sv" + +module cheriot_if_stage import cheriot_pkg::*; import cheri_pkg::*; #( + parameter int unsigned DmHaltAddr = 32'h1A110800, + parameter int unsigned DmExceptionAddr = 32'h1A110808, + parameter bit DummyInstructions = 1'b0, + parameter bit ICache = 1'b0, + parameter bit ICacheECC = 1'b0, + parameter int unsigned BusSizeECC = BUS_SIZE, + parameter int unsigned TagSizeECC = IC_TAG_SIZE, + parameter int unsigned LineSizeECC = IC_LINE_SIZE, + parameter bit PCIncrCheck = 1'b0, + parameter bit ResetAll = 1'b0, + parameter lfsr_seed_t RndCnstLfsrSeed = RndCnstLfsrSeedDefault, + parameter lfsr_perm_t RndCnstLfsrPerm = RndCnstLfsrPermDefault, + parameter bit BranchPredictor = 1'b0, + parameter bit CHERIoTEn = 1'b1 +) ( + input logic clk_i, + input logic rst_ni, + + input logic cheri_pmode_i, + input logic [31:0] boot_addr_i, // also used for mtvec + input logic req_i, // instruction request control + input logic debug_mode_i, + + // instruction cache interface + output logic instr_req_o, + output logic [31:0] instr_addr_o, + input logic instr_gnt_i, + input logic instr_rvalid_i, + input logic [31:0] instr_rdata_i, + input logic instr_err_i, + + // ICache RAM IO + output logic [IC_NUM_WAYS-1:0] ic_tag_req_o, + output logic ic_tag_write_o, + output logic [IC_INDEX_W-1:0] ic_tag_addr_o, + output logic [TagSizeECC-1:0] ic_tag_wdata_o, + input logic [TagSizeECC-1:0] ic_tag_rdata_i [IC_NUM_WAYS], + output logic [IC_NUM_WAYS-1:0] ic_data_req_o, + output logic ic_data_write_o, + output logic [IC_INDEX_W-1:0] ic_data_addr_o, + output logic [LineSizeECC-1:0] ic_data_wdata_o, + input logic [LineSizeECC-1:0] ic_data_rdata_i [IC_NUM_WAYS], + input logic ic_scr_key_valid_i, + + // output of ID stage + output logic instr_valid_id_o, // instr in IF-ID is valid + output logic instr_new_id_o, // instr in IF-ID is new + output logic [31:0] instr_rdata_id_o, // instr for ID stage + output logic [31:0] instr_rdata_alu_id_o, // replicated instr for ID stage + // to reduce fan-out + output logic [15:0] instr_rdata_c_id_o, // compressed instr for ID stage + // (mtval), meaningful only if + // instr_is_compressed_id_o = 1'b1 + output logic instr_is_compressed_id_o, // compressed decoder thinks this + // is a compressed instr + output logic instr_bp_taken_o, // instruction was predicted to be + // a taken branch + output logic instr_fetch_err_o, // bus error on fetch + output logic instr_fetch_err_plus2_o, // bus error misaligned + output logic illegal_c_insn_id_o, // compressed decoder thinks this + // is an invalid instr + output logic instr_fetch_cheri_acc_vio_o, + output logic instr_fetch_cheri_bound_vio_o, + output logic dummy_instr_id_o, // Instruction is a dummy + output logic [31:0] pc_if_o, + output logic [31:0] pc_id_o, + input logic pmp_err_if_i, + input logic pmp_err_if_plus2_i, + + // control signals + input logic instr_valid_clear_i, // clear instr valid bit in IF-ID + input logic pc_set_i, // set the PC to a new value + input pc_sel_e pc_mux_i, // selector for PC multiplexer + input logic nt_branch_mispredict_i, // Not-taken branch in ID/EX was + // mispredicted (predicted taken) + input logic [31:0] nt_branch_addr_i, // Not-taken branch address in ID/EX + input exc_pc_sel_e exc_pc_mux_i, // selects ISR address + input exc_cause_e exc_cause, // selects ISR address for + // vectorized interrupt lines + input logic dummy_instr_en_i, + input logic [2:0] dummy_instr_mask_i, + input logic dummy_instr_seed_en_i, + input logic [31:0] dummy_instr_seed_i, + input logic icache_enable_i, + input logic icache_inval_i, + + // jump and branch target + input logic [31:0] branch_target_ex_i, // branch/jump target address + + // CSRs + input logic [31:0] csr_mepc_i, // PC to restore after handling + // the interrupt/exception + input logic [31:0] csr_depc_i, // PC to restore after handling + // the debug request + input logic [31:0] csr_mtvec_i, // base PC to jump to on exception + output logic csr_mtvec_init_o, // tell CS regfile to init mtvec + + // pipeline stall + input logic id_in_ready_i, // ID stage is ready for new instr + + // misc signals + output logic pc_mismatch_alert_o, + output logic if_busy_o, // IF stage is busy fetching instr + input pcc_cap_t pcc_cap_i +); + + logic instr_valid_id_d, instr_valid_id_q; + logic instr_new_id_d, instr_new_id_q; + + // prefetch buffer related signals + logic prefetch_busy; + logic branch_req; + logic [31:0] fetch_addr_n; + logic unused_fetch_addr_n0; + + logic fetch_valid; + logic fetch_ready; + logic [31:0] fetch_rdata; + logic [31:0] fetch_addr; + logic fetch_err; + logic fetch_err_plus2; + + logic [31:0] instr_decompressed; + logic illegal_c_insn; + logic instr_is_compressed; + + logic if_instr_valid; + logic [31:0] if_instr_rdata; + logic [31:0] if_instr_addr; + logic if_instr_bus_err; + logic if_instr_pmp_err; + logic if_instr_err; + logic if_instr_err_plus2; + + logic [31:0] exc_pc; + + logic [5:0] irq_id; + logic unused_irq_bit; + + logic if_id_pipe_reg_we; // IF-ID pipeline reg write enable + + // Dummy instruction signals + logic stall_dummy_instr; + logic [31:0] instr_out; + logic instr_is_compressed_out; + logic illegal_c_instr_out; + logic instr_err_out; + + logic predict_branch_taken; + logic [31:0] predict_branch_pc; + + cheriot_pkg::pc_sel_e pc_mux_internal; + + logic [7:0] unused_boot_addr; + logic [7:0] unused_csr_mtvec; + + logic cheri_acc_vio, cheri_bound_vio; + logic cheri_force_uc; + + assign unused_boot_addr = boot_addr_i[7:0]; + assign unused_csr_mtvec = csr_mtvec_i[7:0]; + + // extract interrupt ID from exception cause + assign irq_id = {exc_cause}; + assign unused_irq_bit = irq_id[5]; // MSB distinguishes interrupts from exceptions + + // exception PC selection mux + always_comb begin : exc_pc_mux + unique case (exc_pc_mux_i) + EXC_PC_EXC: exc_pc = (csr_mtvec_i[0] | ~cheri_pmode_i)? { csr_mtvec_i[31:8], 8'h00 } : {csr_mtvec_i[31:2], 2'b00}; + EXC_PC_IRQ: exc_pc = (csr_mtvec_i[0] | ~cheri_pmode_i) ? { csr_mtvec_i[31:8], 1'b0, irq_id[4:0], 2'b00 } : {csr_mtvec_i[31:2], 2'b00}; + EXC_PC_DBD: exc_pc = DmHaltAddr; + EXC_PC_DBG_EXC: exc_pc = DmExceptionAddr; + default: exc_pc = { csr_mtvec_i[31:8], 8'h00 }; + endcase + end + + // The Branch predictor can provide a new PC which is internal to if_stage. Only override the mux + // select to choose this if the core isn't already trying to set a PC. + assign pc_mux_internal = + (BranchPredictor && predict_branch_taken && !pc_set_i) ? PC_BP : pc_mux_i; + + // fetch address selection mux + always_comb begin : fetch_addr_mux + unique case (pc_mux_internal) + PC_BOOT: fetch_addr_n = { boot_addr_i[31:8], 8'h80 }; + PC_JUMP: fetch_addr_n = branch_target_ex_i; + PC_EXC: fetch_addr_n = exc_pc; // set PC to exception handler + PC_ERET: fetch_addr_n = csr_mepc_i; // restore PC when returning from EXC + PC_DRET: fetch_addr_n = csr_depc_i; + // Without branch predictor will never get pc_mux_internal == PC_BP. We still handle no branch + // predictor case here to ensure redundant mux logic isn't synthesised. + PC_BP: fetch_addr_n = BranchPredictor ? predict_branch_pc : { boot_addr_i[31:8], 8'h80 }; + default: fetch_addr_n = { boot_addr_i[31:8], 8'h80 }; + endcase + end + + // tell CS register file to initialize mtvec on boot + assign csr_mtvec_init_o = (pc_mux_i == PC_BOOT) & pc_set_i; + if (ICache) begin : gen_icache + // Full I-Cache option + cheriot_icache #( + .ICacheECC (ICacheECC), + .ResetAll (ResetAll), + .BusSizeECC (BusSizeECC), + .TagSizeECC (TagSizeECC), + .LineSizeECC (LineSizeECC) + ) icache_i ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + + .req_i ( req_i ), + + .branch_i ( branch_req ), + .branch_mispredict_i ( nt_branch_mispredict_i ), + .mispredict_addr_i ( nt_branch_addr_i ), + .addr_i ( {fetch_addr_n[31:1], 1'b0} ), + + .ready_i ( fetch_ready ), + .valid_o ( fetch_valid ), + .rdata_o ( fetch_rdata ), + .addr_o ( fetch_addr ), + .err_o ( fetch_err ), + .err_plus2_o ( fetch_err_plus2 ), + + .instr_req_o ( instr_req_o ), + .instr_addr_o ( instr_addr_o ), + .instr_gnt_i ( instr_gnt_i ), + .instr_rvalid_i ( instr_rvalid_i ), + .instr_rdata_i ( instr_rdata_i ), + .instr_err_i ( instr_err_i ), + + .ic_tag_req_o ( ic_tag_req_o ), + .ic_tag_write_o ( ic_tag_write_o ), + .ic_tag_addr_o ( ic_tag_addr_o ), + .ic_tag_wdata_o ( ic_tag_wdata_o ), + .ic_tag_rdata_i ( ic_tag_rdata_i ), + .ic_data_req_o ( ic_data_req_o ), + .ic_data_write_o ( ic_data_write_o ), + .ic_data_addr_o ( ic_data_addr_o ), + .ic_data_wdata_o ( ic_data_wdata_o ), + .ic_data_rdata_i ( ic_data_rdata_i ), + .ic_scr_key_valid_i ( ic_scr_key_valid_i ), + + .icache_enable_i ( icache_enable_i ), + .icache_inval_i ( icache_inval_i ), + .busy_o ( prefetch_busy ) + ); + + end else begin : gen_prefetch_buffer + + // prefetch buffer, caches a fixed number of instructions + cheriot_prefetch_buffer #( + .ResetAll (ResetAll) + ) prefetch_buffer_i ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + + .req_i ( req_i ), + + .branch_i ( branch_req ), + .branch_mispredict_i ( nt_branch_mispredict_i ), + .mispredict_addr_i ( nt_branch_addr_i ), + .addr_i ( {fetch_addr_n[31:1], 1'b0} ), + + .ready_i ( fetch_ready ), + .valid_o ( fetch_valid ), + .rdata_o ( fetch_rdata ), + .addr_o ( fetch_addr ), + .err_o ( fetch_err ), + .err_plus2_o ( fetch_err_plus2 ), + + .cheri_force_uc_i ( cheri_force_uc ), + + .instr_req_o ( instr_req_o ), + .instr_addr_o ( instr_addr_o ), + .instr_gnt_i ( instr_gnt_i ), + .instr_rvalid_i ( instr_rvalid_i ), + .instr_rdata_i ( instr_rdata_i ), + .instr_err_i ( instr_err_i ), + + .busy_o ( prefetch_busy ) + ); + + // ICache tieoffs + logic unused_icen, unused_icinv, unused_scr_key_valid; + logic [TagSizeECC-1:0] unused_tag_ram_input [IC_NUM_WAYS]; + logic [LineSizeECC-1:0] unused_data_ram_input [IC_NUM_WAYS]; + assign unused_icen = icache_enable_i; + assign unused_icinv = icache_inval_i; + assign unused_tag_ram_input = ic_tag_rdata_i; + assign unused_data_ram_input = ic_data_rdata_i; + assign unused_scr_key_valid = ic_scr_key_valid_i; + assign ic_tag_req_o = 'b0; + assign ic_tag_write_o = 'b0; + assign ic_tag_addr_o = 'b0; + assign ic_tag_wdata_o = 'b0; + assign ic_data_req_o = 'b0; + assign ic_data_write_o = 'b0; + assign ic_data_addr_o = 'b0; + assign ic_data_wdata_o = 'b0; + +`ifndef SYNTHESIS + // If we don't instantiate an icache and this is a simulation then we have a problem because the + // simulator might discard the icache module entirely, including some DPI exports that it + // implies. This then causes problems for linking against C++ testbench code that expected them. + // As a slightly ugly hack, let's define the DPI functions here (the real versions are defined + // in prim_util_get_scramble_params.svh) + export "DPI-C" function simutil_get_scramble_key; + export "DPI-C" function simutil_get_scramble_nonce; + function automatic int simutil_get_scramble_key(output bit [127:0] val); + return 0; + endfunction + function automatic int simutil_get_scramble_nonce(output bit [319:0] nonce); + return 0; + endfunction +`endif + end + + assign unused_fetch_addr_n0 = fetch_addr_n[0]; + + assign branch_req = pc_set_i | predict_branch_taken; + + assign pc_if_o = if_instr_addr; + assign if_busy_o = prefetch_busy; + + // PMP errors + // An error can come from the instruction address, or the next instruction address for unaligned, + // uncompressed instructions. + assign if_instr_pmp_err = pmp_err_if_i | + (if_instr_addr[1] & ~instr_is_compressed & pmp_err_if_plus2_i); + + // Combine bus errors and pmp errors + assign if_instr_err = if_instr_bus_err | if_instr_pmp_err | cheri_acc_vio | cheri_bound_vio; + + // Capture the second half of the address for errors on the second part of an instruction + // LEC_NOT_COMPATIBLE + assign if_instr_err_plus2 = ((if_instr_addr[1] & ~instr_is_compressed & pmp_err_if_plus2_i) | + fetch_err_plus2) & ~pmp_err_if_i; + + // pre-calculate headroom to improve memory read timing + logic [33:0] instr_hdrm; + logic hdrm_ge4, hdrm_ge2, hdrm_ok, base_ok; + logic allow_all; + + // allow_all is used to permit the pc wraparound case (pc = 0xffff_fffe, uncompressed instruction) + // - in this case fetch should be allowed if pcc bounds is specified as the entire 32-bit space. + // - If we don't treat this as a specail case the fetch would be erred since headroom < 4 + assign allow_all = (pcc_cap_i.base32==0) & (pcc_cap_i.top33==33'h1_0000_0000); + + assign instr_hdrm = {1'b0, pcc_cap_i.top33} - {2'b00, if_instr_addr}; + assign hdrm_ge4 = (|instr_hdrm[32:2]) & ~instr_hdrm[33]; // >= 4 + assign hdrm_ge2 = (|instr_hdrm[32:1]) & ~instr_hdrm[33]; // >= 2 + assign hdrm_ok = allow_all || (instr_is_compressed ? hdrm_ge2 : hdrm_ge4); + assign base_ok = ~(if_instr_addr < pcc_cap_i.base32); + + // only issue cheri_acc_vio on valid fetches + assign cheri_bound_vio = CHERIoTEn & cheri_pmode_i & ~debug_mode_i & (~base_ok || ~hdrm_ok); + + // In order to have constant timing (avoid side-channel leakage due to data-dependent behavior), + // if base vio or headroom < 4 (we are only authorized to fetch 2 bytes), force the fetch_fifo + // to treat the current rdata as a unaligned compressed instruction if pc[1]=1, and push it to + // ID stage without waiting for the 2nd part of 32-bit instruciton. + // + assign cheri_force_uc = CHERIoTEn & cheri_pmode_i & ~allow_all & (~base_ok | ~hdrm_ge4); + + // we still check seal/perm here to be safe, however by ISA those can't happen at fetch time + // since they are check elsewhere already + assign cheri_acc_vio = CHERIoTEn & cheri_pmode_i & ~debug_mode_i & + (~pcc_cap_i.perms[PERM_EX] || ~pcc_cap_i.valid || (pcc_cap_i.otype!=0)); + + // compressed instruction decoding, or more precisely compressed instruction + // expander + // + // since it does not matter where we decompress instructions, we do it here + // to ease timing closure + cheriot_compressed_decoder #( + .CHERIoTEn (CHERIoTEn) + ) compressed_decoder_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .valid_i (fetch_valid & ~fetch_err), + .instr_i (if_instr_rdata), + .cheri_pmode_i (cheri_pmode_i), + .instr_o (instr_decompressed), + .is_compressed_o(instr_is_compressed), + .illegal_instr_o(illegal_c_insn) + ); + + // Dummy instruction insertion + if (DummyInstructions) begin : gen_dummy_instr + logic insert_dummy_instr; + logic [31:0] dummy_instr_data; + + cheriot_dummy_instr #( + .RndCnstLfsrSeed (RndCnstLfsrSeed), + .RndCnstLfsrPerm (RndCnstLfsrPerm) + ) dummy_instr_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .dummy_instr_en_i (dummy_instr_en_i), + .dummy_instr_mask_i (dummy_instr_mask_i), + .dummy_instr_seed_en_i(dummy_instr_seed_en_i), + .dummy_instr_seed_i (dummy_instr_seed_i), + .fetch_valid_i (fetch_valid), + .id_in_ready_i (id_in_ready_i), + .insert_dummy_instr_o (insert_dummy_instr), + .dummy_instr_data_o (dummy_instr_data) + ); + + // Mux between actual instructions and dummy instructions + assign instr_out = insert_dummy_instr ? dummy_instr_data : instr_decompressed; + assign instr_is_compressed_out = insert_dummy_instr ? 1'b0 : instr_is_compressed; + assign illegal_c_instr_out = insert_dummy_instr ? 1'b0 : illegal_c_insn; + assign instr_err_out = insert_dummy_instr ? 1'b0 : if_instr_err; + + // Stall the IF stage if we insert a dummy instruction. The dummy will execute between whatever + // is currently in the ID stage and whatever is valid from the prefetch buffer this cycle. The + // PC of the dummy instruction will match whatever is next from the prefetch buffer. + assign stall_dummy_instr = insert_dummy_instr; + + // Register the dummy instruction indication into the ID stage + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + dummy_instr_id_o <= 1'b0; + end else if (if_id_pipe_reg_we) begin + dummy_instr_id_o <= insert_dummy_instr; + end + end + + end else begin : gen_no_dummy_instr + logic unused_dummy_en; + logic [2:0] unused_dummy_mask; + logic unused_dummy_seed_en; + logic [31:0] unused_dummy_seed; + + assign unused_dummy_en = dummy_instr_en_i; + assign unused_dummy_mask = dummy_instr_mask_i; + assign unused_dummy_seed_en = dummy_instr_seed_en_i; + assign unused_dummy_seed = dummy_instr_seed_i; + assign instr_out = instr_decompressed; + assign instr_is_compressed_out = instr_is_compressed; + assign illegal_c_instr_out = illegal_c_insn; + assign instr_err_out = if_instr_err; + assign stall_dummy_instr = 1'b0; + assign dummy_instr_id_o = 1'b0; + end + + // The ID stage becomes valid as soon as any instruction is registered in the ID stage flops. + // Note that the current instruction is squashed by the incoming pc_set_i signal. + // Valid is held until it is explicitly cleared (due to an instruction completing or an exception) + assign instr_valid_id_d = (if_instr_valid & id_in_ready_i & ~pc_set_i) | + (instr_valid_id_q & ~instr_valid_clear_i); + assign instr_new_id_d = if_instr_valid & id_in_ready_i; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + instr_valid_id_q <= 1'b0; + instr_new_id_q <= 1'b0; + end else begin + instr_valid_id_q <= instr_valid_id_d; + instr_new_id_q <= instr_new_id_d; + end + end + + assign instr_valid_id_o = instr_valid_id_q; + // Signal when a new instruction enters the ID stage (only used for RVFI signalling). + assign instr_new_id_o = instr_new_id_q; + + // IF-ID pipeline registers, frozen when the ID stage is stalled + assign if_id_pipe_reg_we = instr_new_id_d; + + if (ResetAll) begin : g_instr_rdata_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + instr_rdata_id_o <= '0; + instr_rdata_alu_id_o <= '0; + instr_fetch_err_o <= '0; + instr_fetch_err_plus2_o <= '0; + instr_rdata_c_id_o <= '0; + instr_is_compressed_id_o <= '0; + illegal_c_insn_id_o <= '0; + pc_id_o <= '0; + instr_fetch_cheri_acc_vio_o <= '0; + instr_fetch_cheri_bound_vio_o <= '0; + end else if (if_id_pipe_reg_we) begin + instr_rdata_id_o <= instr_out; + // To reduce fan-out and help timing from the instr_rdata_id flops they are replicated. + instr_rdata_alu_id_o <= instr_out; + instr_fetch_err_o <= instr_err_out; + instr_fetch_err_plus2_o <= if_instr_err_plus2; + instr_rdata_c_id_o <= if_instr_rdata[15:0]; + instr_is_compressed_id_o <= instr_is_compressed_out; + illegal_c_insn_id_o <= illegal_c_instr_out; + pc_id_o <= pc_if_o; + instr_fetch_cheri_acc_vio_o <= cheri_acc_vio; + instr_fetch_cheri_bound_vio_o <= cheri_bound_vio; + end + end + end else begin : g_instr_rdata_nr + always_ff @(posedge clk_i) begin + if (if_id_pipe_reg_we) begin + instr_rdata_id_o <= instr_out; + // To reduce fan-out and help timing from the instr_rdata_id flops they are replicated. + instr_rdata_alu_id_o <= instr_out; + instr_fetch_err_o <= instr_err_out; + instr_fetch_err_plus2_o <= if_instr_err_plus2; + instr_rdata_c_id_o <= if_instr_rdata[15:0]; + instr_is_compressed_id_o <= instr_is_compressed_out; + illegal_c_insn_id_o <= illegal_c_instr_out; + pc_id_o <= pc_if_o; + instr_fetch_cheri_acc_vio_o <= cheri_acc_vio; + instr_fetch_cheri_bound_vio_o <= cheri_bound_vio; + end + end + end + + // Check for expected increments of the PC when security hardening enabled + if (PCIncrCheck) begin : g_secure_pc + logic [31:0] prev_instr_addr_incr, prev_instr_addr_incr_buf; + logic prev_instr_seq_q, prev_instr_seq_d; + + // Do not check for sequential increase after a branch, jump, exception, interrupt or debug + // request, all of which will set branch_req. Also do not check after reset or for dummys. + assign prev_instr_seq_d = (prev_instr_seq_q | instr_new_id_d) & + ~branch_req & ~if_instr_err & ~stall_dummy_instr; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + prev_instr_seq_q <= 1'b0; + end else begin + prev_instr_seq_q <= prev_instr_seq_d; + end + end + + assign prev_instr_addr_incr = pc_id_o + (instr_is_compressed_id_o ? 32'd2 : 32'd4); + + `ifdef FPGA + // Buffer anticipated next PC address to ensure optimiser cannot remove the check. + prim_buf #(.Width(32)) u_prev_instr_addr_incr_buf ( + .in_i (prev_instr_addr_incr), + .out_o(prev_instr_addr_incr_buf) + ); + `else + assign prev_instr_addr_incr_buf = prev_instr_addr_incr; + `endif + + // Check that the address equals the previous address +2/+4 + assign pc_mismatch_alert_o = prev_instr_seq_q & (pc_if_o != prev_instr_addr_incr_buf); + + end else begin : g_no_secure_pc + assign pc_mismatch_alert_o = 1'b0; + end + + if (BranchPredictor) begin : g_branch_predictor + logic [31:0] instr_skid_data_q; + logic [31:0] instr_skid_addr_q; + logic instr_skid_bp_taken_q; + logic instr_skid_valid_q, instr_skid_valid_d; + logic instr_skid_en; + logic instr_bp_taken_q, instr_bp_taken_d; + + logic predict_branch_taken_raw; + + // ID stages needs to know if branch was predicted taken so it can signal mispredicts + if (ResetAll) begin : g_bp_taken_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + instr_bp_taken_q <= '0; + end else if (if_id_pipe_reg_we) begin + instr_bp_taken_q <= instr_bp_taken_d; + end + end + end else begin : g_bp_taken_nr + always_ff @(posedge clk_i) begin + if (if_id_pipe_reg_we) begin + instr_bp_taken_q <= instr_bp_taken_d; + end + end + end + + // When branch prediction is enabled a skid buffer between the IF and ID/EX stage is introduced. + // If an instruction in IF is predicted to be a taken branch and ID/EX is not ready the + // instruction in IF is moved to the skid buffer which becomes the output of the IF stage until + // the ID/EX stage accepts the instruction. The skid buffer is required as otherwise the ID/EX + // ready signal is coupled to the instr_req_o output which produces a feedthrough path from + // data_gnt_i -> instr_req_o (which needs to be avoided as for some interconnects this will + // result in a combinational loop). + + assign instr_skid_en = predict_branch_taken & ~pc_set_i & ~id_in_ready_i & ~instr_skid_valid_q; + + assign instr_skid_valid_d = (instr_skid_valid_q & ~id_in_ready_i & ~stall_dummy_instr) | + instr_skid_en; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + instr_skid_valid_q <= 1'b0; + end else begin + instr_skid_valid_q <= instr_skid_valid_d; + end + end + + if (ResetAll) begin : g_instr_skid_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + instr_skid_bp_taken_q <= '0; + instr_skid_data_q <= '0; + instr_skid_addr_q <= '0; + end else if (instr_skid_en) begin + instr_skid_bp_taken_q <= predict_branch_taken; + instr_skid_data_q <= fetch_rdata; + instr_skid_addr_q <= fetch_addr; + end + end + end else begin : g_instr_skid_nr + always_ff @(posedge clk_i) begin + if (instr_skid_en) begin + instr_skid_bp_taken_q <= predict_branch_taken; + instr_skid_data_q <= fetch_rdata; + instr_skid_addr_q <= fetch_addr; + end + end + end + + cheriot_branch_predict branch_predict_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .fetch_rdata_i(fetch_rdata), + .fetch_pc_i (fetch_addr), + .fetch_valid_i(fetch_valid), + + .predict_branch_taken_o(predict_branch_taken_raw), + .predict_branch_pc_o (predict_branch_pc) + ); + + // If there is an instruction in the skid buffer there must be no branch prediction. + // Instructions are only placed in the skid after they have been predicted to be a taken branch + // so with the skid valid any prediction has already occurred. + // Do not branch predict on instruction errors. + assign predict_branch_taken = predict_branch_taken_raw & ~instr_skid_valid_q & ~fetch_err; + + assign if_instr_valid = fetch_valid | (instr_skid_valid_q & ~nt_branch_mispredict_i); + assign if_instr_rdata = instr_skid_valid_q ? instr_skid_data_q : fetch_rdata; + assign if_instr_addr = instr_skid_valid_q ? instr_skid_addr_q : fetch_addr; + + // Don't branch predict on instruction error so only instructions without errors end up in the + // skid buffer. + assign if_instr_bus_err = ~instr_skid_valid_q & fetch_err; + assign instr_bp_taken_d = instr_skid_valid_q ? instr_skid_bp_taken_q : predict_branch_taken; + + assign fetch_ready = id_in_ready_i & ~stall_dummy_instr & ~instr_skid_valid_q; + + assign instr_bp_taken_o = instr_bp_taken_q; + + `ASSERT(NoPredictSkid, instr_skid_valid_q |-> ~predict_branch_taken) + `ASSERT(NoPredictIllegal, predict_branch_taken |-> ~illegal_c_insn) + end else begin : g_no_branch_predictor + assign instr_bp_taken_o = 1'b0; + assign predict_branch_taken = 1'b0; + assign predict_branch_pc = 32'b0; + + assign if_instr_valid = fetch_valid; + assign if_instr_rdata = fetch_rdata; + assign if_instr_addr = fetch_addr; + assign if_instr_bus_err = fetch_err; + assign fetch_ready = id_in_ready_i & ~stall_dummy_instr; + end + + //////////////// + // Assertions // + //////////////// + + // Selectors must be known/valid. + `ASSERT_KNOWN(IbexExcPcMuxKnown, exc_pc_mux_i) + + if (BranchPredictor) begin : g_branch_predictor_asserts + `ASSERT_IF(IbexPcMuxValid, pc_mux_internal inside { + PC_BOOT, + PC_JUMP, + PC_EXC, + PC_ERET, + PC_DRET, + PC_BP}, + pc_set_i) + +`ifdef INC_ASSERT + /** + * Checks for branch prediction interface to fetch_fifo/icache + * + * The interface has two signals: + * - predicted_branch_i: When set with a branch (branch_i) indicates the branch is a predicted + * one, it should be ignored when a branch_i isn't set. + * - branch_mispredict_i: Indicates the previously predicted branch was mis-predicted and + * execution should resume with the not-taken side of the branch (i.e. continue with the PC + * that followed the predicted branch). This must be raised before the instruction that is + * made available following a predicted branch is accepted (Following a cycle with branch_i + * & predicted_branch_i, branch_mispredict_i can only be asserted before or on the same cycle + * as seeing fetch_valid & fetch_ready). When branch_mispredict_i is asserted, fetch_valid may + * be asserted in response. If fetch_valid is asserted on the same cycle as + * branch_mispredict_i this indicates the fetch_fifo/icache has the not-taken side of the + * branch immediately ready for use + */ + logic predicted_branch_live_q, predicted_branch_live_d; + logic [31:0] predicted_branch_nt_pc_q, predicted_branch_nt_pc_d; + logic [31:0] awaiting_instr_after_mispredict_q, awaiting_instr_after_mispredict_d; + logic [31:0] next_pc; + + logic mispredicted, mispredicted_d, mispredicted_q; + + assign next_pc = fetch_addr + (instr_is_compressed_out ? 32'd2 : 32'd4); + + logic predicted_branch; + + // pc_set_i takes precendence over branch prediction + assign predicted_branch = predict_branch_taken & ~pc_set_i; + + always_comb begin + predicted_branch_live_d = predicted_branch_live_q; + mispredicted_d = mispredicted_q; + + if (branch_req & predicted_branch) begin + predicted_branch_live_d = 1'b1; + mispredicted_d = 1'b0; + end else if (predicted_branch_live_q) begin + if (fetch_valid & fetch_ready) begin + predicted_branch_live_d = 1'b0; + end else if (nt_branch_mispredict_i) begin + mispredicted_d = 1'b1; + end + end + end + + always @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + predicted_branch_live_q <= 1'b0; + mispredicted_q <= 1'b0; + end else begin + predicted_branch_live_q <= predicted_branch_live_d; + mispredicted_q <= mispredicted_d; + end + end + + always @(posedge clk_i) begin + if (branch_req & predicted_branch) begin + predicted_branch_nt_pc_q <= next_pc; + end + end + + // Must only see mispredict after we've performed a predicted branch but before we've accepted + // any instruction (with fetch_ready & fetch_valid) that follows that predicted branch. + `ASSERT(MispredictOnlyImmediatelyAfterPredictedBranch, + nt_branch_mispredict_i |-> predicted_branch_live_q) + // Check that on mispredict we get the correct PC for the non-taken side of the branch when + // prefetch buffer/icache makes that PC available. + `ASSERT(CorrectPCOnMispredict, + predicted_branch_live_q & mispredicted_d & fetch_valid |-> + fetch_addr == predicted_branch_nt_pc_q) + // Must not signal mispredict over multiple cycles but it's possible to have back to back + // mispredicts for different branches (core signals mispredict, prefetch buffer/icache immediate + // has not-taken side of the mispredicted branch ready, which itself is a predicted branch, + // following cycle core signal that that branch has mispredicted). + `ASSERT(MispredictSingleCycle, + nt_branch_mispredict_i & ~(fetch_valid & fetch_ready) |=> ~nt_branch_mispredict_i) + // Note that we should never see a mispredict and an incoming branch on the same cycle. + // The mispredict also cancels any predicted branch so overall branch_req must be low. + `ASSERT(NoMispredBranch, nt_branch_mispredict_i |-> ~branch_req) +`endif + + end else begin : g_no_branch_predictor_asserts + `ASSERT_IF(IbexPcMuxValid, pc_mux_internal inside { + PC_BOOT, + PC_JUMP, + PC_EXC, + PC_ERET, + PC_DRET}, + pc_set_i) + end + + // Boot address must be aligned to 256 bytes. + `ASSERT(IbexBootAddrUnaligned, boot_addr_i[7:0] == 8'h00) + + // Address must not contain X when request is sent. + `ASSERT(IbexInstrAddrUnknown, instr_req_o |-> !$isunknown(instr_addr_o)) + + // Address must be word aligned when request is sent. + `ASSERT(IbexInstrAddrUnaligned, instr_req_o |-> (instr_addr_o[1:0] == 2'b00)) + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_load_store_unit.sv b/hw/ip/cheriot-ibex/rtl/cheriot_load_store_unit.sv new file mode 100644 index 0000000..ebbe74a --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_load_store_unit.sv
@@ -0,0 +1,760 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + + +/** + * Load Store Unit + * + * Load Store Unit, used to eliminate multiple access during processor stalls, + * and to align bytes and halfwords. + */ + +`include "prim_assert.sv" +`include "dv_fcov_macros.svh" + +module cheriot_load_store_unit import cheriot_pkg::*; import cheri_pkg::*; #( + parameter bit CHERIoTEn = 1'b1, + parameter bit MemCapFmt = 1'b0, + parameter bit CheriTBRE = 1'b0, + parameter bit CheriCapIT8 = 1'b0 +)( + input logic clk_i, + input logic rst_ni, + input logic cheri_pmode_i, + + // data interface + output logic data_req_o, + output logic data_is_cap_o, + input logic data_gnt_i, + input logic data_rvalid_i, + input logic data_err_i, + input logic data_pmp_err_i, + + output logic [31:0] data_addr_o, + output logic data_we_o, + output logic [3:0] data_be_o, + output logic [32:0] data_wdata_o, // kliu + input logic [32:0] data_rdata_i, // kliu + + // signals to/from ID/EX stage + input logic lsu_we_i, // write enable -> from ID/EX + input logic lsu_is_cap_i, // kliu + input logic lsu_cheri_err_i, // kliu + input logic [1:0] lsu_type_i, // data type: word, half word, byte -> from ID/EX + input logic [32:0] lsu_wdata_i, // data to write to memory -> from ID/EX + input reg_cap_t lsu_wcap_i, // kliu + input logic [3:0] lsu_lc_clrperm_i, + input logic lsu_sign_ext_i, // sign extension -> from ID/EX + input logic cpu_stall_by_stkz_i, + input logic cpu_grant_to_stkz_i, + + output reg_cap_t lsu_rcap_o, // kliu + output logic [32:0] lsu_rdata_o, // requested data -> to ID/EX + output logic lsu_rdata_valid_o, + input logic lsu_req_i, // data request -> from ID/EX + + input logic [31:0] lsu_addr_i, // address computed in ALU -> from ID/EX + + output logic lsu_addr_incr_req_o, // request address increment for + // misaligned accesses -> to ID/EX + output logic [31:0] addr_last_o, // address of last transaction -> to controller + // -> mtval + // -> AGU for misaligned accesses + + output logic lsu_req_done_o, // Signals that data request is complete + // (only need to await final data + // response) -> to ID/EX + output logic lsu_resp_valid_o, // LSU has response from transaction -> to ID/EX & WB + output logic lsu_resp_is_wr_o, + + // TBRE related signals + input logic tbre_lsu_req_i, + input logic cpu_lsu_dec_i, + output logic lsu_tbre_sel_o, // request-side selection signal + output logic lsu_tbre_addr_incr_req_o, // request address increment for + output logic [32:0] lsu_tbre_raw_lsw_o, + output logic lsu_tbre_req_done_o, + output logic lsu_tbre_resp_valid_o, // response from transaction -> to TBRE + output logic lsu_tbre_resp_err_o, + + // exception signals + output logic load_err_o, + output logic store_err_o, + output logic lsu_err_is_cheri_o, + + output logic busy_o, + output logic busy_tbre_o, + + output logic perf_load_o, + output logic perf_store_o +); + + logic [31:0] data_addr; + logic [31:0] data_addr_w_aligned; + logic [31:0] addr_last_q, addr_last_d; + + logic addr_update; + logic ctrl_update; + logic rdata_update; + logic [31:8] rdata_q; + logic [1:0] rdata_offset_q; + logic [1:0] data_type_q; + logic data_sign_ext_q; + logic data_we_q; + + logic [1:0] data_offset; // mux control for data to be written to memory + + logic [3:0] data_be; + logic [32:0] data_wdata; + + logic [32:0] data_rdata_ext; + + logic [32:0] rdata_w_ext; // word realignment for misaligned loads + logic [31:0] rdata_h_ext; // sign extension for half words + logic [31:0] rdata_b_ext; // sign extension for bytes + + logic split_misaligned_access; + logic handle_misaligned_q, handle_misaligned_d; // high after receiving grant for first + // part of a misaligned access + logic pmp_err_q, pmp_err_d; + logic lsu_err_q, lsu_err_d; + logic data_or_pmp_err; + + logic resp_is_cap_q; + logic cheri_err_d, cheri_err_q; + logic [3:0] resp_lc_clrperm_q; + logic cur_req_is_tbre; + logic req_is_tbre_q; + logic resp_is_tbre; + logic tbre_req_good; + logic outstanding_resp_q, resp_wait; + logic lsu_resp_valid; + logic lsu_go; + logic addr_incr_req; + logic cpu_req_erred, cpu_req_valid; + + + ls_fsm_e ls_fsm_cs, ls_fsm_ns; + + cap_rx_fsm_t cap_rx_fsm_q, cap_rx_fsm_d; + + logic cap_lsw_err_q; + logic [32:0] cap_lsw_q; + + assign data_addr = lsu_addr_i; + assign data_offset = (cheri_pmode_i & lsu_is_cap_i) ? 2'b00 : data_addr[1:0]; + + /////////////////// + // BE generation // + /////////////////// + + always_comb begin + if (CHERIoTEn & cheri_pmode_i & lsu_is_cap_i) + data_be = 4'b1111; // caps are always word aligned + else begin + unique case (lsu_type_i) // Data type 00 Word, 01 Half word, 11,10 byte + 2'b00: begin // Writing a word + if (!handle_misaligned_q) begin // first part of potentially misaligned transaction + unique case (data_offset) + 2'b00: data_be = 4'b1111; + 2'b01: data_be = 4'b1110; + 2'b10: data_be = 4'b1100; + 2'b11: data_be = 4'b1000; + default: data_be = 4'b1111; + endcase // case (data_offset) + end else begin // second part of misaligned transaction + unique case (data_offset) + 2'b00: data_be = 4'b0000; // this is not used, but included for completeness + 2'b01: data_be = 4'b0001; + 2'b10: data_be = 4'b0011; + 2'b11: data_be = 4'b0111; + default: data_be = 4'b1111; + endcase // case (data_offset) + end + end + + 2'b01: begin // Writing a half word + if (!handle_misaligned_q) begin // first part of potentially misaligned transaction + unique case (data_offset) + 2'b00: data_be = 4'b0011; + 2'b01: data_be = 4'b0110; + 2'b10: data_be = 4'b1100; + 2'b11: data_be = 4'b1000; + default: data_be = 4'b1111; + endcase // case (data_offset) + end else begin // second part of misaligned transaction + data_be = 4'b0001; + end + end + + 2'b10, + 2'b11: begin // Writing a byte + unique case (data_offset) + 2'b00: data_be = 4'b0001; + 2'b01: data_be = 4'b0010; + 2'b10: data_be = 4'b0100; + 2'b11: data_be = 4'b1000; + default: data_be = 4'b1111; + endcase // case (data_offset) + end + + default: data_be = 4'b1111; + endcase // case (lsu_type_i) + end // if lsu_cap_i + end + + ///////////////////// + // WData alignment // + ///////////////////// + + // prepare data to be written to the memory + // we handle misaligned accesses, half word and byte accesses here + if (~MemCapFmt) begin : gen_memcap_wr_fmt0 + always_comb begin + if (CHERIoTEn & cheri_pmode_i & lsu_is_cap_i && (ls_fsm_cs == CTX_WAIT_GNT2)) + data_wdata = CheriCapIT8 ? reg2memcap_it8_fmt0(lsu_wcap_i): + reg2memcap_fmt0(lsu_wcap_i); + else if (CHERIoTEn & cheri_pmode_i & lsu_is_cap_i) + data_wdata = lsu_wdata_i; + else begin + unique case (data_offset) + 2'b00: data_wdata = lsu_wdata_i[32:0]; + 2'b01: data_wdata = {1'b0, lsu_wdata_i[23:0], lsu_wdata_i[31:24]}; + 2'b10: data_wdata = {1'b0, lsu_wdata_i[15:0], lsu_wdata_i[31:16]}; + 2'b11: data_wdata = {1'b0, lsu_wdata_i[ 7:0], lsu_wdata_i[31: 8]}; + default: data_wdata = lsu_wdata_i[32:0]; + endcase // case (data_offset) + end + end + end else begin : gen_memcap_wr_fmt1 + logic [65:0] mem_capaddr; + assign mem_capaddr = CheriCapIT8 ? reg2mem_it8_fmt1(lsu_wcap_i, lsu_wdata_i) : + reg2mem_fmt1(lsu_wcap_i, lsu_wdata_i); + + always_comb begin + if (CHERIoTEn & lsu_is_cap_i && (ls_fsm_cs == CTX_WAIT_GNT2)) + data_wdata = mem_capaddr[65:33]; + else if (CHERIoTEn & lsu_is_cap_i) + data_wdata = mem_capaddr[32:0]; + else begin + unique case (data_offset) + 2'b00: data_wdata = lsu_wdata_i[32:0]; + 2'b01: data_wdata = {1'b0, lsu_wdata_i[23:0], lsu_wdata_i[31:24]}; + 2'b10: data_wdata = {1'b0, lsu_wdata_i[15:0], lsu_wdata_i[31:16]}; + 2'b11: data_wdata = {1'b0, lsu_wdata_i[ 7:0], lsu_wdata_i[31: 8]}; + default: data_wdata = lsu_wdata_i[32:0]; + endcase // case (data_offset) + end + end + end + ///////////////////// + // RData alignment // + ///////////////////// + + // register for unaligned rdata + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rdata_q <= '0; + end else if (rdata_update) begin + rdata_q <= data_rdata_i[31:8]; + end + end + + // registers for transaction control + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rdata_offset_q <= 2'h0; + data_type_q <= 2'h0; + data_sign_ext_q <= 1'b0; + data_we_q <= 1'b0; + end else if (ctrl_update) begin + rdata_offset_q <= data_offset; + data_type_q <= lsu_type_i; + data_sign_ext_q <= lsu_sign_ext_i; + data_we_q <= lsu_we_i; + end + end + + // Store last address for mtval + AGU for misaligned transactions. Do not update in case of + // errors, mtval needs the (first) failing address. Where an aligned access or the first half of + // a misaligned access sees an error provide the calculated access address. For the second half of + // a misaligned access provide the word aligned address of the second half. + assign addr_last_d = addr_incr_req ? data_addr_w_aligned : data_addr; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + addr_last_q <= '0; + end else if (addr_update & ~cur_req_is_tbre) begin + addr_last_q <= addr_last_d; + end + end + + // take care of misaligned words + always_comb begin + unique case (rdata_offset_q) + 2'b00: rdata_w_ext = data_rdata_i[32:0]; + 2'b01: rdata_w_ext = {1'b0, data_rdata_i[ 7:0], rdata_q[31:8]}; + 2'b10: rdata_w_ext = {1'b0, data_rdata_i[15:0], rdata_q[31:16]}; + 2'b11: rdata_w_ext = {1'b0, data_rdata_i[23:0], rdata_q[31:24]}; + default: rdata_w_ext = data_rdata_i[32:0]; + endcase + end + + //////////////////// + // Sign extension // + //////////////////// + + // sign extension for half words + always_comb begin + unique case (rdata_offset_q) + 2'b00: begin + if (!data_sign_ext_q) begin + rdata_h_ext = {16'h0000, data_rdata_i[15:0]}; + end else begin + rdata_h_ext = {{16{data_rdata_i[15]}}, data_rdata_i[15:0]}; + end + end + + 2'b01: begin + if (!data_sign_ext_q) begin + rdata_h_ext = {16'h0000, data_rdata_i[23:8]}; + end else begin + rdata_h_ext = {{16{data_rdata_i[23]}}, data_rdata_i[23:8]}; + end + end + + 2'b10: begin + if (!data_sign_ext_q) begin + rdata_h_ext = {16'h0000, data_rdata_i[31:16]}; + end else begin + rdata_h_ext = {{16{data_rdata_i[31]}}, data_rdata_i[31:16]}; + end + end + + 2'b11: begin + if (!data_sign_ext_q) begin + rdata_h_ext = {16'h0000, data_rdata_i[7:0], rdata_q[31:24]}; + end else begin + rdata_h_ext = {{16{data_rdata_i[7]}}, data_rdata_i[7:0], rdata_q[31:24]}; + end + end + + default: rdata_h_ext = {16'h0000, data_rdata_i[15:0]}; + endcase // case (rdata_offset_q) + end + + // sign extension for bytes + always_comb begin + unique case (rdata_offset_q) + 2'b00: begin + if (!data_sign_ext_q) begin + rdata_b_ext = {24'h00_0000, data_rdata_i[7:0]}; + end else begin + rdata_b_ext = {{24{data_rdata_i[7]}}, data_rdata_i[7:0]}; + end + end + + 2'b01: begin + if (!data_sign_ext_q) begin + rdata_b_ext = {24'h00_0000, data_rdata_i[15:8]}; + end else begin + rdata_b_ext = {{24{data_rdata_i[15]}}, data_rdata_i[15:8]}; + end + end + + 2'b10: begin + if (!data_sign_ext_q) begin + rdata_b_ext = {24'h00_0000, data_rdata_i[23:16]}; + end else begin + rdata_b_ext = {{24{data_rdata_i[23]}}, data_rdata_i[23:16]}; + end + end + + 2'b11: begin + if (!data_sign_ext_q) begin + rdata_b_ext = {24'h00_0000, data_rdata_i[31:24]}; + end else begin + rdata_b_ext = {{24{data_rdata_i[31]}}, data_rdata_i[31:24]}; + end + end + + default: rdata_b_ext = {24'h00_0000, data_rdata_i[7:0]}; + endcase // case (rdata_offset_q) + end + + // select word, half word or byte sign extended version + always_comb begin + unique case (data_type_q) + 2'b00: data_rdata_ext = rdata_w_ext; + 2'b01: data_rdata_ext = {1'b0, rdata_h_ext}; + 2'b10,2'b11: data_rdata_ext = {1'b0, rdata_b_ext}; + default: data_rdata_ext = rdata_w_ext; + endcase // case (data_type_q) + end + + ///////////// + // LSU FSM // + ///////////// + + // check for misaligned accesses that need to be split into two word-aligned accesses + assign split_misaligned_access = + ((lsu_type_i == 2'b00) && (data_offset != 2'b00)) || // misaligned word access + ((lsu_type_i == 2'b01) && (data_offset == 2'b11)); // misaligned half-word access + + assign cpu_req_valid = lsu_req_i & ~lsu_cheri_err_i & ~cpu_stall_by_stkz_i; + assign cpu_req_erred = lsu_req_i & lsu_cheri_err_i; + + // FSM + always_comb begin + ls_fsm_ns = ls_fsm_cs; + + data_req_o = 1'b0; + addr_incr_req = 1'b0; + handle_misaligned_d = handle_misaligned_q; + pmp_err_d = pmp_err_q; + lsu_err_d = lsu_err_q; + cheri_err_d = cheri_err_q & cheri_pmode_i; + + addr_update = 1'b0; + ctrl_update = 1'b0; + rdata_update = 1'b0; + + perf_load_o = 1'b0; + perf_store_o = 1'b0; + + lsu_go = 1'b0; + + unique case (ls_fsm_cs) + + IDLE: begin + pmp_err_d = 1'b0; + cheri_err_d = 1'b0; + + if (CHERIoTEn & cheri_pmode_i & cpu_req_erred & ~resp_wait) begin + // cheri access error case, don't issue data_req but send error response back to WB stage + data_req_o = 1'b0; + cheri_err_d = 1'b1; + ctrl_update = 1'b1; // update ctrl/address so we can report error correctly + addr_update = 1'b1; + pmp_err_d = 1'b0; + lsu_err_d = 1'b0; + perf_load_o = 1'b0; + lsu_go = 1'b1; // decision to move forward with a request + ls_fsm_ns = IDLE; + end else if (CHERIoTEn & cheri_pmode_i & (cpu_req_valid | tbre_req_good) & + lsu_is_cap_i & ~resp_wait) begin + // normal cap access case + data_req_o = 1'b1; + cheri_err_d = 1'b0; + pmp_err_d = data_pmp_err_i; + lsu_err_d = 1'b0; + perf_load_o = ~lsu_we_i; + perf_store_o = lsu_we_i; + lsu_go = 1'b1; // decision to move forward with a request + + if (data_gnt_i) begin + ctrl_update = 1'b1; + addr_update = 1'b1; + ls_fsm_ns = CTX_WAIT_GNT2; + end else begin + ls_fsm_ns = CTX_WAIT_GNT1; + end + end else if ((cpu_req_valid | tbre_req_good) & ~resp_wait) begin + // normal data access case + data_req_o = 1'b1; + cheri_err_d = 1'b0; + pmp_err_d = data_pmp_err_i; + lsu_err_d = 1'b0; + perf_load_o = ~lsu_we_i; + perf_store_o = lsu_we_i; + lsu_go = 1'b1; // decision to move forward with a request + + if (data_gnt_i) begin + ctrl_update = 1'b1; + addr_update = 1'b1; + handle_misaligned_d = split_misaligned_access; + ls_fsm_ns = split_misaligned_access ? WAIT_RVALID_MIS : IDLE; + end else begin + ls_fsm_ns = split_misaligned_access ? WAIT_GNT_MIS : WAIT_GNT; + end + end + + end + + WAIT_GNT_MIS: begin + data_req_o = 1'b1; + // data_pmp_err_i is valid during the address phase of a request. An error will block the + // external request and so a data_gnt_i might never be signalled. The registered version + // pmp_err_q is only updated for new address phases and so can be used in WAIT_GNT* and + // WAIT_RVALID* states + if (data_gnt_i || pmp_err_q ) begin + addr_update = 1'b1; + ctrl_update = 1'b1; + handle_misaligned_d = 1'b1; + ls_fsm_ns = WAIT_RVALID_MIS; + end + end + + WAIT_RVALID_MIS: begin + // push out second request + data_req_o = 1'b1; + // tell ID/EX stage to update the address + addr_incr_req = 1'b1; + + // first part rvalid is received, or gets a PMP error + if (data_rvalid_i || pmp_err_q) begin + // Update the PMP error for the second part + pmp_err_d = data_pmp_err_i; + // Record the error status of the first part + lsu_err_d = data_err_i | pmp_err_q; + // Capture the first rdata for loads + rdata_update = ~data_we_q; + // If already granted, wait for second rvalid + ls_fsm_ns = data_gnt_i ? IDLE : WAIT_GNT; + // Update the address for the second part, if no error + addr_update = data_gnt_i & ~(data_err_i | pmp_err_q); + // clear handle_misaligned if second request is granted + handle_misaligned_d = ~data_gnt_i; + end else begin + // first part rvalid is NOT received + if (data_gnt_i) begin + // second grant is received + ls_fsm_ns = WAIT_RVALID_MIS_GNTS_DONE; + handle_misaligned_d = 1'b0; + end + end + end + + WAIT_GNT: begin + // tell ID/EX stage to update the address + addr_incr_req = handle_misaligned_q; + data_req_o = 1'b1; + if (data_gnt_i || pmp_err_q) begin + ctrl_update = 1'b1; + // Update the address, unless there was an error + addr_update = ~lsu_err_q; + ls_fsm_ns = IDLE; + handle_misaligned_d = 1'b0; + end + end + + WAIT_RVALID_MIS_GNTS_DONE: begin + // tell ID/EX stage to update the address (to make sure the + // second address can be captured correctly for mtval and PMP checking) + addr_incr_req = 1'b1; + // Wait for the first rvalid, second request is already granted + if (data_rvalid_i) begin + // Update the pmp error for the second part + pmp_err_d = data_pmp_err_i ; + // The first part cannot see a PMP error in this state + lsu_err_d = data_err_i; + // Now we can update the address for the second part if no error + addr_update = ~data_err_i; + // Capture the first rdata for loads + rdata_update = ~data_we_q; + // Wait for second rvalid + ls_fsm_ns = IDLE; + end + end + + CTX_WAIT_GNT1: begin + if (cheri_pmode_i) begin + addr_incr_req = 1'b0; + data_req_o = 1'b1; + if (data_gnt_i) begin + ls_fsm_ns = CTX_WAIT_GNT2; + ctrl_update = 1'b1; + addr_update = 1'b1; + end + end else begin + ls_fsm_ns = IDLE; + end + end + + CTX_WAIT_GNT2: begin + if (cheri_pmode_i) begin + addr_incr_req = 1'b1; + data_req_o = 1'b1; + if (data_gnt_i && (data_rvalid_i || (cap_rx_fsm_q == CRX_WAIT_RESP2))) ls_fsm_ns = IDLE; + else if (data_gnt_i) ls_fsm_ns = CTX_WAIT_RESP; + end else begin + ls_fsm_ns = IDLE; + end + end + + CTX_WAIT_RESP: begin // only needed if mem allows 2 active req + if (cheri_pmode_i) begin + addr_incr_req = 1'b1; // stay 1 to reduce unnecessary addr toggling + data_req_o = 1'b0; + if (data_rvalid_i) ls_fsm_ns = IDLE; + end else begin + ls_fsm_ns = IDLE; + end + end + + default: begin + ls_fsm_ns = IDLE; + end + endcase + end + + always_comb begin + cap_rx_fsm_d = cap_rx_fsm_q; + + case (cap_rx_fsm_q) + CRX_IDLE: + if (CHERIoTEn & cheri_pmode_i & lsu_is_cap_i && (ls_fsm_ns != IDLE)) cap_rx_fsm_d = CRX_WAIT_RESP1; + CRX_WAIT_RESP1: + if (data_rvalid_i) cap_rx_fsm_d = CRX_WAIT_RESP2; + CRX_WAIT_RESP2: + if (data_rvalid_i && lsu_is_cap_i && (ls_fsm_ns != IDLE)) cap_rx_fsm_d = CRX_WAIT_RESP1; + else if (data_rvalid_i) cap_rx_fsm_d = CRX_IDLE; + default:; + endcase + end + + // this is the decision of granting LSU to TBRE/STKZ + assign tbre_req_good = CHERIoTEn & cheri_pmode_i & CheriTBRE & tbre_lsu_req_i & + (~cpu_lsu_dec_i | (cpu_lsu_dec_i & cpu_grant_to_stkz_i)); + + assign resp_wait = CHERIoTEn & cheri_pmode_i & CheriTBRE & outstanding_resp_q & ~lsu_resp_valid; + + // we assume ctrl will be held till req_done asserted + // (once req captured in IDLE, it can be deasserted) + logic lsu_req_done; + + assign lsu_req_done = (lsu_go | (ls_fsm_cs != IDLE)) & (ls_fsm_ns == IDLE); + + assign lsu_req_done_o = lsu_req_done & (~cur_req_is_tbre); + assign lsu_tbre_req_done_o = lsu_req_done & cur_req_is_tbre & cheri_pmode_i; + + assign lsu_addr_incr_req_o = addr_incr_req & ~cur_req_is_tbre; + assign lsu_tbre_addr_incr_req_o = addr_incr_req & cur_req_is_tbre; + + assign cur_req_is_tbre = CHERIoTEn & cheri_pmode_i & CheriTBRE & ((ls_fsm_cs == IDLE) ? + (tbre_req_good & ~resp_wait) : req_is_tbre_q); + + assign lsu_tbre_sel_o = cur_req_is_tbre; // req ctrl signal mux select (to cheri_ex/tbre_wrapper) + + // registers for FSM + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + ls_fsm_cs <= IDLE; + handle_misaligned_q <= '0; + pmp_err_q <= '0; + lsu_err_q <= '0; + resp_is_cap_q <= 1'b0; + resp_lc_clrperm_q <= 4'h0; + req_is_tbre_q <= 1'b0; + cheri_err_q <= 1'b0; + cap_rx_fsm_q <= CRX_IDLE; + cap_lsw_err_q <= 1'b0; + cap_lsw_q <= 33'h0; + outstanding_resp_q <= 1'b0; + end else begin + ls_fsm_cs <= ls_fsm_ns; + handle_misaligned_q <= handle_misaligned_d; + pmp_err_q <= pmp_err_d; + lsu_err_q <= lsu_err_d; + cheri_err_q <= cheri_err_d; + + cap_rx_fsm_q <= cap_rx_fsm_d; + + // resp_is_cap_q aligns with responses on the data interface, lsu_is_cap_i aligns with requests + // we use lsu_go to qualify this update + // - note this implies that LSU only support a outstand request at a time + // - new request can't be issued (go) until resp_valid + // - also note resp_valid is gated by (ls_fsm_cs == IDLE) + if (lsu_go) begin + resp_is_cap_q <= lsu_is_cap_i; + resp_lc_clrperm_q <= lsu_lc_clrperm_i; + req_is_tbre_q <= cur_req_is_tbre; + end + + if (CHERIoTEn & cheri_pmode_i && (cap_rx_fsm_q == CRX_WAIT_RESP1) && data_rvalid_i && (~data_we_q)) + cap_lsw_q <= data_rdata_i; + + if (CHERIoTEn & cheri_pmode_i && (cap_rx_fsm_q == CRX_WAIT_RESP1) && data_rvalid_i) + cap_lsw_err_q <= data_err_i; + + if (lsu_go) + outstanding_resp_q <= 1'b1; + else if (lsu_resp_valid) + outstanding_resp_q <= 1'b0; + + end + end + + ///////////// + // Outputs // + ///////////// + + assign resp_is_tbre = req_is_tbre_q; + + logic all_resp; + assign data_or_pmp_err = lsu_err_q | data_err_i | pmp_err_q | (cheri_pmode_i & + (cheri_err_q | (resp_is_cap_q & cap_lsw_err_q))); + + assign all_resp = data_rvalid_i | pmp_err_q | (cheri_pmode_i & cheri_err_q); + assign lsu_resp_valid = all_resp & (ls_fsm_cs == IDLE) ; + + assign lsu_resp_valid_o = lsu_resp_valid & (~cheri_pmode_i | (~resp_is_tbre)) ; + assign lsu_tbre_resp_valid_o = lsu_resp_valid & resp_is_tbre; + assign lsu_resp_is_wr_o = data_we_q; + + // this goes to wb as rf_we_lsu, so needs to be gated when data needs to go back to EX + assign lsu_rdata_valid_o = (ls_fsm_cs == IDLE) & data_rvalid_i & ~data_or_pmp_err & ~data_we_q & + (~cheri_pmode_i | (~resp_is_tbre)); + + // output to register file + if (CHERIoTEn & ~MemCapFmt) begin : gen_memcap_rd_fmt0 + assign lsu_rdata_o = (cheri_pmode_i & resp_is_cap_q) ? cap_lsw_q : data_rdata_ext; + assign lsu_rcap_o = (resp_is_cap_q && data_rvalid_i && (cap_rx_fsm_q == CRX_WAIT_RESP2) && (~data_or_pmp_err)) ? + (CheriCapIT8 ? mem2regcap_it8_fmt0(data_rdata_i, cap_lsw_q, resp_lc_clrperm_q) : + mem2regcap_fmt0(data_rdata_i, cap_lsw_q, resp_lc_clrperm_q)) : NULL_REG_CAP; + end else if (CHERIoTEn) begin : gen_memcap_rd_fmt1 + assign lsu_rdata_o = (cheri_pmode_i & resp_is_cap_q) ? mem2regaddr_fmt1(data_rdata_ext, cap_lsw_q, lsu_rcap_o): data_rdata_ext; + assign lsu_rcap_o = (resp_is_cap_q && data_rvalid_i && (cap_rx_fsm_q == CRX_WAIT_RESP2) && (~data_or_pmp_err)) ? + (CheriCapIT8 ? mem2regcap_it8_fmt1(data_rdata_i, cap_lsw_q, resp_lc_clrperm_q) : + mem2regcap_fmt1(data_rdata_i, cap_lsw_q, resp_lc_clrperm_q)) : NULL_REG_CAP; + end else begin : gen_no_cap_rd + assign lsu_rdata_o = data_rdata_ext; + assign lsu_rcap_o = NULL_REG_CAP; + end + + + assign lsu_tbre_raw_lsw_o = cap_lsw_q; // "raw" memory word to tbre + + // output data address must be word aligned + assign data_addr_w_aligned = {data_addr[31:2], 2'b00}; + + // output to data interface + assign data_addr_o = data_addr_w_aligned; + + assign data_wdata_o = data_wdata; + assign data_we_o = lsu_we_i; + assign data_be_o = data_be; + + assign data_is_cap_o = lsu_is_cap_i; + + // output to ID stage: mtval + AGU for misaligned transactions + assign addr_last_o = addr_last_q; + + // Signal a load or store error depending on the transaction type outstanding + assign load_err_o = data_or_pmp_err & ~data_we_q & lsu_resp_valid & (~resp_is_tbre); + assign store_err_o = data_or_pmp_err & data_we_q & lsu_resp_valid & (~resp_is_tbre); + + assign lsu_err_is_cheri_o = cheri_pmode_i & cheri_err_q; // send to controller for mcause encoding + assign lsu_tbre_resp_err_o = cheri_pmode_i & data_or_pmp_err & lsu_resp_valid & resp_is_tbre; + + assign busy_o = (ls_fsm_cs != IDLE); + // assign busy_tbre_o = (ls_fsm_cs != IDLE) & cur_req_is_tbre; + assign busy_tbre_o = (ls_fsm_cs != IDLE) & cheri_pmode_i & resp_is_tbre; + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_lockstep.sv b/hw/ip/cheriot-ibex/rtl/cheriot_lockstep.sv new file mode 100644 index 0000000..15815a0 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_lockstep.sv
@@ -0,0 +1,657 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Ibex lockstep module +// This module instantiates a second copy of the core logic, and compares it's outputs against +// those from the main core. The second core runs synchronously with the main core, delayed by +// LockstepOffset cycles. +module cheriot_lockstep import cheriot_pkg::*; import cheri_pkg::*; #( + parameter int unsigned LockstepOffset = 2, + parameter bit PMPEnable = 1'b0, + parameter int unsigned PMPGranularity = 0, + parameter int unsigned PMPNumRegions = 4, + parameter int unsigned MHPMCounterNum = 0, + parameter int unsigned MHPMCounterWidth = 40, + parameter bit RV32E = 1'b0, + parameter rv32m_e RV32M = RV32MFast, + parameter rv32b_e RV32B = RV32BNone, + parameter bit BranchTargetALU = 1'b0, + parameter bit WritebackStage = 1'b0, + parameter bit ICache = 1'b0, + parameter bit ICacheECC = 1'b0, + parameter int unsigned BusSizeECC = BUS_SIZE, + parameter int unsigned TagSizeECC = IC_TAG_SIZE, + parameter int unsigned LineSizeECC = IC_LINE_SIZE, + parameter bit BranchPredictor = 1'b0, + parameter bit DbgTriggerEn = 1'b0, + parameter int unsigned DbgHwBreakNum = 1, + parameter bit ResetAll = 1'b0, + parameter lfsr_seed_t RndCnstLfsrSeed = RndCnstLfsrSeedDefault, + parameter lfsr_perm_t RndCnstLfsrPerm = RndCnstLfsrPermDefault, + parameter bit SecureIbex = 1'b0, + parameter bit DummyInstructions = 1'b0, + parameter bit RegFileECC = 1'b0, + parameter int unsigned RegFileDataWidth = 32, + parameter int unsigned DmHaltAddr = 32'h1A110800, + parameter int unsigned DmExceptionAddr = 32'h1A110808, + // CHERIoT paramters + parameter bit CHERIoTEn = 1'b1, + parameter int unsigned DataWidth = 33, + parameter int unsigned HeapBase = 32'h2001_0000, + parameter int unsigned TSMapBase = 32'h2002_f000, + parameter int unsigned TSMapSize = 1024, + parameter bit MemCapFmt = 1'b0, + parameter bit CheriPPLBC = 1'b1, + parameter bit CheriSBND2 = 1'b0, + parameter bit CheriTBRE = 1'b1, + parameter int unsigned MMRegDinW = 128, + parameter int unsigned MMRegDoutW = 64 +) ( + input logic clk_i, + input logic rst_ni, + + input logic [31:0] hart_id_i, + input logic [31:0] boot_addr_i, + input logic cheri_pmode_i, + input logic cheri_tsafe_en_i, + + input logic instr_req_i, + input logic instr_gnt_i, + input logic instr_rvalid_i, + input logic [31:0] instr_addr_i, + input logic [31:0] instr_rdata_i, + input logic [6:0] instr_rdata_intg_i, + input logic instr_err_i, + + input logic data_req_i, + input logic data_gnt_i, + input logic data_rvalid_i, + input logic data_we_i, + input logic [3:0] data_be_i, + input logic [31:0] data_addr_i, + input logic [DataWidth-1:0] data_wdata_i, + input logic data_is_cap_i, + output logic [6:0] data_wdata_intg_o, + input logic [DataWidth-1:0] data_rdata_i, + input logic [6:0] data_rdata_intg_i, + input logic data_err_i, + + input logic dummy_instr_id_i, + input logic [4:0] rf_raddr_a_i, + input logic [4:0] rf_raddr_b_i, + input logic [4:0] rf_waddr_wb_i, + input logic rf_we_wb_i, + input logic [RegFileDataWidth-1:0] rf_wdata_wb_ecc_i, + input logic [RegFileDataWidth-1:0] rf_rdata_a_ecc_i, + input logic [RegFileDataWidth-1:0] rf_rdata_b_ecc_i, + + input reg_cap_t rf_wcap_wb_i, + input reg_cap_t rf_rcap_a_i, + input reg_cap_t rf_rcap_b_i, + input logic [31:0] rf_reg_rdy_i, + input logic rf_trsv_en_i, + input logic [4:0] rf_trsv_addr_i, + input logic [6:0] rf_trsv_par_i, + input logic [4:0] rf_trvk_addr_i, + input logic rf_trvk_en_i, + input logic rf_trvk_clrtag_i, + input logic [6:0] rf_trvk_par_i, + input logic tsmap_cs_i, + input logic [15:0] tsmap_addr_i, + input logic [31:0] tsmap_rdata_i, + input logic [6:0] tsmap_rdata_intg_i, + input logic [MMRegDinW-1:0] mmreg_corein_i, + input logic [MMRegDoutW-1:0] mmreg_coreout_i, + + input logic [IC_NUM_WAYS-1:0] ic_tag_req_i, + input logic ic_tag_write_i, + input logic [IC_INDEX_W-1:0] ic_tag_addr_i, + input logic [TagSizeECC-1:0] ic_tag_wdata_i, + input logic [TagSizeECC-1:0] ic_tag_rdata_i [IC_NUM_WAYS], + input logic [IC_NUM_WAYS-1:0] ic_data_req_i, + input logic ic_data_write_i, + input logic [IC_INDEX_W-1:0] ic_data_addr_i, + input logic [LineSizeECC-1:0] ic_data_wdata_i, + input logic [LineSizeECC-1:0] ic_data_rdata_i [IC_NUM_WAYS], + input logic ic_scr_key_valid_i, + + input logic irq_software_i, + input logic irq_timer_i, + input logic irq_external_i, + input logic [14:0] irq_fast_i, + input logic irq_nm_i, + input logic irq_pending_i, + + input logic debug_req_i, + input crash_dump_t crash_dump_i, + input logic double_fault_seen_i, + + input fetch_enable_t fetch_enable_i, + output logic alert_minor_o, + output logic alert_major_internal_o, + output logic alert_major_bus_o, + input logic icache_inval_i, + input logic core_busy_i, + input logic test_en_i, + input logic scan_rst_ni +); + + localparam int unsigned LockstepOffsetW = $clog2(LockstepOffset); + // Core outputs are delayed for an extra cycle due to shadow output registers + localparam int unsigned OutputsOffset = LockstepOffset + 1; + + ////////////////////// + // Reset generation // + ////////////////////// + + // Upon reset, the comparison is stopped and the shadow core is reset, both immediately. A + // counter is started. After LockstepOffset clock cycles: + // - The counter is stopped. + // - The reset of the shadow core is synchronously released. + // The comparison is started in the following clock cycle. + + logic [LockstepOffsetW-1:0] rst_shadow_cnt_d, rst_shadow_cnt_q, rst_shadow_cnt_incr; + // Internally generated resets cause IMPERFECTSCH warnings + /* verilator lint_off IMPERFECTSCH */ + logic rst_shadow_set_d, rst_shadow_set_q; + logic rst_shadow_n, enable_cmp_q; + /* verilator lint_on IMPERFECTSCH */ + + assign rst_shadow_cnt_incr = rst_shadow_cnt_q + LockstepOffsetW'(1); + + assign rst_shadow_set_d = (rst_shadow_cnt_q == LockstepOffsetW'(LockstepOffset - 1)); + assign rst_shadow_cnt_d = rst_shadow_set_d ? rst_shadow_cnt_q : rst_shadow_cnt_incr; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rst_shadow_cnt_q <= '0; + enable_cmp_q <= '0; + end else begin + rst_shadow_cnt_q <= rst_shadow_cnt_d; + enable_cmp_q <= rst_shadow_set_q; + end + end + + // The primitives below are used to place size-only constraints in order to prevent + // synthesis optimizations and preserve anchor points for constraining backend tools. + prim_flop #( + .Width(1), + .ResetValue(1'b0) + ) u_prim_rst_shadow_set_flop ( + .clk_i (clk_i), + .rst_ni(rst_ni), + .d_i (rst_shadow_set_d), + .q_o (rst_shadow_set_q) + ); + + prim_clock_mux2 #( + .NoFpgaBufG(1'b1) + ) u_prim_rst_shadow_n_mux2 ( + .clk0_i(rst_shadow_set_q), + .clk1_i(scan_rst_ni), + .sel_i (test_en_i), + .clk_o (rst_shadow_n) + ); + + ////////////////// + // Input delays // + ////////////////// + + typedef struct packed { + logic instr_gnt; + logic instr_rvalid; + logic [31:0] instr_rdata; + logic instr_err; + logic data_gnt; + logic data_rvalid; + logic [DataWidth-1:0] data_rdata; + logic data_err; + logic [RegFileDataWidth-1:0] rf_rdata_a_ecc; + logic [RegFileDataWidth-1:0] rf_rdata_b_ecc; + logic irq_software; + logic irq_timer; + logic irq_external; + logic [14:0] irq_fast; + logic irq_nm; + logic debug_req; + fetch_enable_t fetch_enable; + logic ic_scr_key_valid; + logic cheri_pmode; + logic cheri_tsafe_en; + reg_cap_t rf_rcap_a; + reg_cap_t rf_rcap_b; + logic [31:0] rf_reg_rdy; + logic [31:0] tsmap_rdata; + logic [MMRegDinW-1:0] mmreg_corein; + } delayed_inputs_t; + + delayed_inputs_t [LockstepOffset-1:0] shadow_inputs_q; + delayed_inputs_t shadow_inputs_in; + logic [6:0] instr_rdata_intg_q, data_rdata_intg_q; + logic [6:0] tsmap_rdata_intg_q; + // Packed arrays must be dealt with separately + logic [TagSizeECC-1:0] shadow_tag_rdata_q [IC_NUM_WAYS][LockstepOffset]; + logic [LineSizeECC-1:0] shadow_data_rdata_q [IC_NUM_WAYS][LockstepOffset]; + + // Assign the inputs to the delay structure + assign shadow_inputs_in.instr_gnt = instr_gnt_i; + assign shadow_inputs_in.instr_rvalid = instr_rvalid_i; + assign shadow_inputs_in.instr_rdata = instr_rdata_i; + assign shadow_inputs_in.instr_err = instr_err_i; + assign shadow_inputs_in.data_gnt = data_gnt_i; + assign shadow_inputs_in.data_rvalid = data_rvalid_i; + assign shadow_inputs_in.data_rdata = data_rdata_i; + assign shadow_inputs_in.data_err = data_err_i; + assign shadow_inputs_in.rf_rdata_a_ecc = rf_rdata_a_ecc_i; + assign shadow_inputs_in.rf_rdata_b_ecc = rf_rdata_b_ecc_i; + assign shadow_inputs_in.irq_software = irq_software_i; + assign shadow_inputs_in.irq_timer = irq_timer_i; + assign shadow_inputs_in.irq_external = irq_external_i; + assign shadow_inputs_in.irq_fast = irq_fast_i; + assign shadow_inputs_in.irq_nm = irq_nm_i; + assign shadow_inputs_in.debug_req = debug_req_i; + assign shadow_inputs_in.fetch_enable = fetch_enable_i; + assign shadow_inputs_in.ic_scr_key_valid = ic_scr_key_valid_i; + assign shadow_inputs_in.cheri_pmode = cheri_pmode_i; + assign shadow_inputs_in.cheri_tsafe_en = cheri_tsafe_en_i; + assign shadow_inputs_in.rf_rcap_a = rf_rcap_a_i; + assign shadow_inputs_in.rf_rcap_b = rf_rcap_b_i; + assign shadow_inputs_in.rf_reg_rdy = rf_reg_rdy_i; + assign shadow_inputs_in.tsmap_rdata = tsmap_rdata_i; + assign shadow_inputs_in.mmreg_corein = mmreg_corein_i; + + // Delay the inputs + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + instr_rdata_intg_q <= '0; + data_rdata_intg_q <= '0; + tsmap_rdata_intg_q <= '0; + for (int unsigned i = 0; i < LockstepOffset; i++) begin + shadow_inputs_q[i] <= delayed_inputs_t'('0); + shadow_tag_rdata_q[i] <= '{default: 0}; + shadow_data_rdata_q[i] <= '{default: 0}; + end + end else begin + instr_rdata_intg_q <= instr_rdata_intg_i; + data_rdata_intg_q <= data_rdata_intg_i; + tsmap_rdata_intg_q <= tsmap_rdata_intg_i; + for (int unsigned i = 0; i < LockstepOffset - 1; i++) begin + shadow_inputs_q[i] <= shadow_inputs_q[i+1]; + shadow_tag_rdata_q[i] <= shadow_tag_rdata_q[i+1]; + shadow_data_rdata_q[i] <= shadow_data_rdata_q[i+1]; + end + shadow_inputs_q[LockstepOffset-1] <= shadow_inputs_in; + shadow_tag_rdata_q[LockstepOffset-1] <= ic_tag_rdata_i; + shadow_data_rdata_q[LockstepOffset-1] <= ic_data_rdata_i; + end + end + + //////////////////////////// + // Bus integrity checking // + //////////////////////////// + + logic bus_intg_err; + logic [1:0] instr_intg_err, data_intg_err, data_intg_err_tmp; + logic [31:0] unused_wdata; + logic [1:0] data_we_q; + logic [31:0] rdata_tmp; + + always @(posedge clk_i, negedge rst_ni) begin + if (~rst_ni) begin + data_we_q <= 2'b00; + end else begin + if (data_gnt_i) data_we_q[1] <= data_we_i; + data_we_q[0] <= data_we_q[1]; // align with shadow_inputs_q[LockstepOffset-1] + end + end + + // Checks on incoming data + prim_secded_inv_39_32_dec u_instr_intg_dec ( + .data_i ({instr_rdata_intg_q, shadow_inputs_q[LockstepOffset-1].instr_rdata}), + .data_o (), + .syndrome_o (), + .err_o (instr_intg_err) + ); + + if (CHERIoTEn) begin + assign rdata_tmp = shadow_inputs_q[LockstepOffset-1].data_rdata[31:0] ^ + {31'h0, shadow_inputs_q[LockstepOffset-1].data_rdata[32]}; + end else begin + assign rdata_tmp = shadow_inputs_q[LockstepOffset-1].data_rdata[31:0]; + end + + prim_secded_inv_39_32_dec u_data_intg_dec ( + .data_i ({data_rdata_intg_q, rdata_tmp}), + .data_o (), + .syndrome_o (), + .err_o (data_intg_err_tmp) + ); + + // only check read data (data_rvalid includes both reads and writes) + assign data_intg_err = data_we_q[0] ? 2'h0 : data_intg_err_tmp; + + assign bus_intg_err = (shadow_inputs_q[LockstepOffset-1].instr_rvalid & |instr_intg_err) | + (shadow_inputs_q[LockstepOffset-1].data_rvalid & |data_intg_err); + + // Generate integrity bits + if (CHERIoTEn) begin + prim_secded_inv_39_32_enc u_data_gen ( + .data_i (data_wdata_i[31:0]^{31'h0, data_wdata_i[32]}), + .data_o ({data_wdata_intg_o, unused_wdata}) + ); + end else begin + prim_secded_inv_39_32_enc u_data_gen ( + .data_i (data_wdata_i[31:0]), + .data_o ({data_wdata_intg_o, unused_wdata}) + ); + end + + + //////////////////////////////////////// + // TSMAP interface integrity checking // + //////////////////////////////////////// + + logic tsmap_intg_err; + logic [1:0] tsmap_intg_err_tmp; + logic [1:0] tsmap_cs_q; + + if (CHERIoTEn && CheriPPLBC) begin + always @(posedge clk_i, negedge rst_ni) begin + if (~rst_ni) begin + tsmap_cs_q <= 2'b00; + end else begin + tsmap_cs_q <= {tsmap_cs_i, tsmap_cs_q[1]}; // align with shadow_inputs_q[LockstepOffset-1] + end + end + + // Checks on incoming data + prim_secded_inv_39_32_dec u_tsmap_intg_dec ( + .data_i ({tsmap_rdata_intg_q, shadow_inputs_q[LockstepOffset-1].tsmap_rdata}), + .data_o (), + .syndrome_o (), + .err_o (tsmap_intg_err_tmp) + ); + + assign tsmap_intg_err = tsmap_cs_q[0] & tsmap_intg_err_tmp; + + end else begin + assign tsmap_intg_err = 1'b0; + end + + /////////////////// + // Output delays // + /////////////////// + + typedef struct packed { + logic instr_req; + logic [31:0] instr_addr; + logic data_req; + logic data_we; + logic [3:0] data_be; + logic [31:0] data_addr; + logic [DataWidth-1:0] data_wdata; + logic data_is_cap; + logic dummy_instr_id; + logic [4:0] rf_raddr_a; + logic [4:0] rf_raddr_b; + logic [4:0] rf_waddr_wb; + logic rf_we_wb; + logic [RegFileDataWidth-1:0] rf_wdata_wb_ecc; + logic [IC_NUM_WAYS-1:0] ic_tag_req; + logic ic_tag_write; + logic [IC_INDEX_W-1:0] ic_tag_addr; + logic [TagSizeECC-1:0] ic_tag_wdata; + logic [IC_NUM_WAYS-1:0] ic_data_req; + logic ic_data_write; + logic [IC_INDEX_W-1:0] ic_data_addr; + logic [LineSizeECC-1:0] ic_data_wdata; + logic irq_pending; + crash_dump_t crash_dump; + logic double_fault_seen; + logic icache_inval; + logic core_busy; + reg_cap_t rf_wcap_wb; + logic rf_trsv_en; + logic [4:0] rf_trsv_addr; + logic [6:0] rf_trsv_par; + logic [4:0] rf_trvk_addr; + logic rf_trvk_en; + logic rf_trvk_clrtag; + logic [6:0] rf_trvk_par; + logic tsmap_cs; + logic [15:0] tsmap_addr; + logic [MMRegDoutW-1:0] mmreg_coreout; + } delayed_outputs_t; + + delayed_outputs_t [OutputsOffset-1:0] core_outputs_q; + delayed_outputs_t core_outputs_in; + delayed_outputs_t shadow_outputs_d, shadow_outputs_q; + + // Assign core outputs to the structure + assign core_outputs_in.instr_req = instr_req_i; + assign core_outputs_in.instr_addr = instr_addr_i; + assign core_outputs_in.data_req = data_req_i; + assign core_outputs_in.data_we = data_we_i; + assign core_outputs_in.data_be = data_be_i; + assign core_outputs_in.data_addr = data_addr_i; + assign core_outputs_in.data_wdata = data_wdata_i; + assign core_outputs_in.data_is_cap = data_is_cap_i; + assign core_outputs_in.dummy_instr_id = dummy_instr_id_i; + assign core_outputs_in.rf_raddr_a = rf_raddr_a_i; + assign core_outputs_in.rf_raddr_b = rf_raddr_b_i; + assign core_outputs_in.rf_waddr_wb = rf_waddr_wb_i; + assign core_outputs_in.rf_we_wb = rf_we_wb_i; + assign core_outputs_in.rf_wdata_wb_ecc = rf_wdata_wb_ecc_i; + assign core_outputs_in.ic_tag_req = ic_tag_req_i; + assign core_outputs_in.ic_tag_write = ic_tag_write_i; + assign core_outputs_in.ic_tag_addr = ic_tag_addr_i; + assign core_outputs_in.ic_tag_wdata = ic_tag_wdata_i; + assign core_outputs_in.ic_data_req = ic_data_req_i; + assign core_outputs_in.ic_data_write = ic_data_write_i; + assign core_outputs_in.ic_data_addr = ic_data_addr_i; + assign core_outputs_in.ic_data_wdata = ic_data_wdata_i; + assign core_outputs_in.irq_pending = irq_pending_i; + assign core_outputs_in.crash_dump = crash_dump_i; + assign core_outputs_in.double_fault_seen = double_fault_seen_i; + assign core_outputs_in.icache_inval = icache_inval_i; + assign core_outputs_in.core_busy = core_busy_i; + assign core_outputs_in.rf_wcap_wb = rf_wcap_wb_i; + assign core_outputs_in.rf_trsv_en = rf_trsv_en_i; + assign core_outputs_in.rf_trsv_addr = rf_trsv_addr_i; + assign core_outputs_in.rf_trsv_par = rf_trsv_par_i; + assign core_outputs_in.rf_trvk_addr = rf_trvk_addr_i; + assign core_outputs_in.rf_trvk_en = rf_trvk_en_i; + assign core_outputs_in.rf_trvk_clrtag = rf_trvk_clrtag_i; + assign core_outputs_in.rf_trvk_par = rf_trvk_par_i; + assign core_outputs_in.tsmap_cs = tsmap_cs_i; + assign core_outputs_in.tsmap_addr = tsmap_addr_i; + assign core_outputs_in.mmreg_coreout = mmreg_coreout_i; + + // Delay the outputs + always_ff @(posedge clk_i) begin + for (int unsigned i = 0; i < OutputsOffset - 1; i++) begin + core_outputs_q[i] <= core_outputs_q[i+1]; + end + core_outputs_q[OutputsOffset-1] <= core_outputs_in; + end + + /////////////////////////////// + // Shadow core instantiation // + /////////////////////////////// + + logic shadow_alert_minor, shadow_alert_major; + + cheriot_core #( + .PMPEnable ( PMPEnable ), + .PMPGranularity ( PMPGranularity ), + .PMPNumRegions ( PMPNumRegions ), + .MHPMCounterNum ( MHPMCounterNum ), + .MHPMCounterWidth ( MHPMCounterWidth ), + .RV32E ( RV32E ), + .RV32M ( RV32M ), + .RV32B ( RV32B ), + .BranchTargetALU ( BranchTargetALU ), + .ICache ( ICache ), + .ICacheECC ( ICacheECC ), + .BusSizeECC ( BusSizeECC ), + .TagSizeECC ( TagSizeECC ), + .LineSizeECC ( LineSizeECC ), + .BranchPredictor ( BranchPredictor ), + .DbgTriggerEn ( DbgTriggerEn ), + .DbgHwBreakNum ( DbgHwBreakNum ), + .WritebackStage ( WritebackStage ), + .ResetAll ( ResetAll ), + .RndCnstLfsrSeed ( RndCnstLfsrSeed ), + .RndCnstLfsrPerm ( RndCnstLfsrPerm ), + .SecureIbex ( SecureIbex ), + .DummyInstructions ( DummyInstructions ), + .RegFileECC ( RegFileECC ), + .RegFileDataWidth ( RegFileDataWidth ), + .DmHaltAddr ( DmHaltAddr ), + .DmExceptionAddr ( DmExceptionAddr ), + .CHERIoTEn ( CHERIoTEn), + .DataWidth ( DataWidth), + .HeapBase ( HeapBase ), + .TSMapBase ( TSMapBase ), + .TSMapSize ( TSMapSize), + .MemCapFmt ( MemCapFmt ), + .CheriPPLBC ( CheriPPLBC), + .CheriSBND2 ( CheriSBND2), + .CheriTBRE ( CheriTBRE) + ) u_shadow_core ( + .clk_i (clk_i), + .rst_ni (rst_shadow_n), + + .hart_id_i (hart_id_i), + .boot_addr_i (boot_addr_i), + + .cheri_pmode_i (shadow_inputs_q[0].cheri_pmode), + .cheri_tsafe_en_i (shadow_inputs_q[0].cheri_tsafe_en), + + .instr_req_o (shadow_outputs_d.instr_req), + .instr_gnt_i (shadow_inputs_q[0].instr_gnt), + .instr_rvalid_i (shadow_inputs_q[0].instr_rvalid), + .instr_addr_o (shadow_outputs_d.instr_addr), + .instr_rdata_i (shadow_inputs_q[0].instr_rdata), + .instr_err_i (shadow_inputs_q[0].instr_err), + + .data_req_o (shadow_outputs_d.data_req), + .data_gnt_i (shadow_inputs_q[0].data_gnt), + .data_rvalid_i (shadow_inputs_q[0].data_rvalid), + .data_we_o (shadow_outputs_d.data_we), + .data_be_o (shadow_outputs_d.data_be), + .data_addr_o (shadow_outputs_d.data_addr), + .data_wdata_o (shadow_outputs_d.data_wdata), + .data_is_cap_o (shadow_outputs_d.data_is_cap), + .data_rdata_i (shadow_inputs_q[0].data_rdata), + .data_err_i (shadow_inputs_q[0].data_err), + + .dummy_instr_id_o (shadow_outputs_d.dummy_instr_id), + .rf_raddr_a_o (shadow_outputs_d.rf_raddr_a), + .rf_raddr_b_o (shadow_outputs_d.rf_raddr_b), + .rf_waddr_wb_o (shadow_outputs_d.rf_waddr_wb), + .rf_we_wb_o (shadow_outputs_d.rf_we_wb), + .rf_wdata_wb_ecc_o (shadow_outputs_d.rf_wdata_wb_ecc), + .rf_rdata_a_ecc_i (shadow_inputs_q[0].rf_rdata_a_ecc), + .rf_rdata_b_ecc_i (shadow_inputs_q[0].rf_rdata_b_ecc), + .rf_wcap_wb_o (shadow_outputs_d.rf_wcap_wb), + .rf_rcap_a_i (shadow_inputs_q[0].rf_rcap_a), + .rf_rcap_b_i (shadow_inputs_q[0].rf_rcap_b), + .rf_reg_rdy_i (shadow_inputs_q[0].rf_reg_rdy), + .rf_trsv_en_o (shadow_outputs_d.rf_trsv_en), + .rf_trsv_addr_o (shadow_outputs_d.rf_trsv_addr), + .rf_trsv_par_o (shadow_outputs_d.rf_trsv_par), + .rf_trvk_addr_o (shadow_outputs_d.rf_trvk_addr), + .rf_trvk_en_o (shadow_outputs_d.rf_trvk_en), + .rf_trvk_clrtag_o (shadow_outputs_d.rf_trvk_clrtag), + .rf_trvk_par_o (shadow_outputs_d.rf_trvk_par), + .tsmap_cs_o (shadow_outputs_d.tsmap_cs), + .tsmap_addr_o (shadow_outputs_d.tsmap_addr), + .tsmap_rdata_i (shadow_inputs_q[0].tsmap_rdata), + .mmreg_corein_i (shadow_inputs_q[0].mmreg_corein), + .mmreg_coreout_o (shadow_outputs_d.mmreg_coreout), + + .ic_tag_req_o (shadow_outputs_d.ic_tag_req), + .ic_tag_write_o (shadow_outputs_d.ic_tag_write), + .ic_tag_addr_o (shadow_outputs_d.ic_tag_addr), + .ic_tag_wdata_o (shadow_outputs_d.ic_tag_wdata), + .ic_tag_rdata_i (shadow_tag_rdata_q[0]), + .ic_data_req_o (shadow_outputs_d.ic_data_req), + .ic_data_write_o (shadow_outputs_d.ic_data_write), + .ic_data_addr_o (shadow_outputs_d.ic_data_addr), + .ic_data_wdata_o (shadow_outputs_d.ic_data_wdata), + .ic_data_rdata_i (shadow_data_rdata_q[0]), + .ic_scr_key_valid_i (shadow_inputs_q[0].ic_scr_key_valid), + + .irq_software_i (shadow_inputs_q[0].irq_software), + .irq_timer_i (shadow_inputs_q[0].irq_timer), + .irq_external_i (shadow_inputs_q[0].irq_external), + .irq_fast_i (shadow_inputs_q[0].irq_fast), + .irq_nm_i (shadow_inputs_q[0].irq_nm), + .irq_pending_o (shadow_outputs_d.irq_pending), + + .debug_req_i (shadow_inputs_q[0].debug_req), + .crash_dump_o (shadow_outputs_d.crash_dump), + .double_fault_seen_o (shadow_outputs_d.double_fault_seen), + +`ifdef RVFI + .rvfi_valid (), + .rvfi_order (), + .rvfi_insn (), + .rvfi_trap (), + .rvfi_halt (), + .rvfi_intr (), + .rvfi_mode (), + .rvfi_ixl (), + .rvfi_rs1_addr (), + .rvfi_rs2_addr (), + .rvfi_rs3_addr (), + .rvfi_rs1_rdata (), + .rvfi_rs2_rdata (), + .rvfi_rs3_rdata (), + .rvfi_rd_addr (), + .rvfi_rd_wdata (), + .rvfi_pc_rdata (), + .rvfi_pc_wdata (), + .rvfi_mem_addr (), + .rvfi_mem_rmask (), + .rvfi_mem_wmask (), + .rvfi_mem_rdata (), + .rvfi_mem_wdata (), + .rvfi_ext_mip (), + .rvfi_ext_nmi (), + .rvfi_ext_debug_req (), + .rvfi_ext_mcycle (), + .rvfi_mem_wcap (), + .rvfi_mem_rcap (), + .rvfi_mem_is_cap (), + .rvfi_rd_wcap (), + .rvfi_rs2_rcap (), + .rvfi_rs1_rcap (), +`endif + + .fetch_enable_i (shadow_inputs_q[0].fetch_enable), + .alert_minor_o (shadow_alert_minor), + .alert_major_o (shadow_alert_major), + .icache_inval_o (shadow_outputs_d.icache_inval), + .core_busy_o (shadow_outputs_d.core_busy) + ); + + // Register the shadow core outputs + always_ff @(posedge clk_i) begin + shadow_outputs_q <= shadow_outputs_d; + end + + ///////////////////////// + // Compare the outputs // + ///////////////////////// + + logic outputs_mismatch; + + assign outputs_mismatch = enable_cmp_q & (shadow_outputs_q != core_outputs_q[0]); + assign alert_major_internal_o = outputs_mismatch | shadow_alert_major; + assign alert_major_bus_o = bus_intg_err | tsmap_intg_err; + assign alert_minor_o = shadow_alert_minor; + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_multdiv_fast.sv b/hw/ip/cheriot-ibex/rtl/cheriot_multdiv_fast.sv new file mode 100644 index 0000000..522bb6b --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_multdiv_fast.sv
@@ -0,0 +1,531 @@ +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +`define OP_L 15:0 +`define OP_H 31:16 + +/** + * Fast Multiplier and Division + * + * 16x16 kernel multiplier and Long Division + */ + +`include "prim_assert.sv" + +module cheriot_multdiv_fast #( + parameter cheriot_pkg::rv32m_e RV32M = cheriot_pkg::RV32MFast + ) ( + input logic clk_i, + input logic rst_ni, + input logic mult_en_i, // dynamic enable signal, for FSM control + input logic div_en_i, // dynamic enable signal, for FSM control + input logic mult_sel_i, // static decoder output, for data muxes + input logic div_sel_i, // static decoder output, for data muxes + input cheriot_pkg::md_op_e operator_i, + input logic [1:0] signed_mode_i, + input logic [31:0] op_a_i, + input logic [31:0] op_b_i, + input logic [33:0] alu_adder_ext_i, + input logic [31:0] alu_adder_i, + input logic equal_to_zero_i, + input logic data_ind_timing_i, + + output logic [32:0] alu_operand_a_o, + output logic [32:0] alu_operand_b_o, + + input logic [33:0] imd_val_q_i[2], + output logic [33:0] imd_val_d_o[2], + output logic [1:0] imd_val_we_o, + + input logic multdiv_ready_id_i, + + output logic [31:0] multdiv_result_o, + output logic valid_o +); + + import cheriot_pkg::*; + + // Both multiplier variants + logic signed [34:0] mac_res_signed; + logic [34:0] mac_res_ext; + logic [33:0] accum; + logic sign_a, sign_b; + logic mult_valid; + logic signed_mult; + + // Results that become intermediate value depending on whether mul or div is being calculated + logic [33:0] mac_res_d, op_remainder_d; + // Raw output of MAC calculation + logic [33:0] mac_res; + + // Divider signals + logic div_sign_a, div_sign_b; + logic is_greater_equal; + logic div_change_sign, rem_change_sign; + logic [31:0] one_shift; + logic [31:0] op_denominator_q; + logic [31:0] op_numerator_q; + logic [31:0] op_quotient_q; + logic [31:0] op_denominator_d; + logic [31:0] op_numerator_d; + logic [31:0] op_quotient_d; + logic [31:0] next_remainder; + logic [32:0] next_quotient; + logic [31:0] res_adder_h; + logic div_valid; + logic [ 4:0] div_counter_q, div_counter_d; + logic multdiv_en; + logic mult_hold; + logic div_hold; + logic div_by_zero_d, div_by_zero_q; + + logic mult_en_internal; + logic div_en_internal; + + typedef enum logic [2:0] { + MD_IDLE, MD_ABS_A, MD_ABS_B, MD_COMP, MD_LAST, MD_CHANGE_SIGN, MD_FINISH + } md_fsm_e; + md_fsm_e md_state_q, md_state_d; + + logic unused_mult_sel_i; + assign unused_mult_sel_i = mult_sel_i; + + assign mult_en_internal = mult_en_i & ~mult_hold; + assign div_en_internal = div_en_i & ~div_hold; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + div_counter_q <= '0; + md_state_q <= MD_IDLE; + op_numerator_q <= '0; + op_quotient_q <= '0; + div_by_zero_q <= '0; + end else if (div_en_internal) begin + div_counter_q <= div_counter_d; + op_numerator_q <= op_numerator_d; + op_quotient_q <= op_quotient_d; + md_state_q <= md_state_d; + div_by_zero_q <= div_by_zero_d; + end + end + + `ASSERT_KNOWN(DivEnKnown, div_en_internal) + `ASSERT_KNOWN(MultEnKnown, mult_en_internal) + `ASSERT_KNOWN(MultDivEnKnown, multdiv_en) + + assign multdiv_en = mult_en_internal | div_en_internal; + + // Intermediate value register shared with ALU + assign imd_val_d_o[0] = div_sel_i ? op_remainder_d : mac_res_d; + assign imd_val_we_o[0] = multdiv_en; + + assign imd_val_d_o[1] = {2'b0, op_denominator_d}; + assign imd_val_we_o[1] = div_en_internal; + assign op_denominator_q = imd_val_q_i[1][31:0]; + logic [1:0] unused_imd_val; + assign unused_imd_val = imd_val_q_i[1][33:32]; + logic unused_mac_res_ext; + assign unused_mac_res_ext = mac_res_ext[34]; + + assign signed_mult = (signed_mode_i != 2'b00); + assign multdiv_result_o = div_sel_i ? imd_val_q_i[0][31:0] : mac_res_d[31:0]; + + // The single cycle multiplier uses three 17 bit multipliers to compute MUL instructions in a + // single cycle and MULH instructions in two cycles. + if (RV32M == RV32MSingleCycle) begin : gen_mult_single_cycle + + typedef enum logic { + MULL, MULH + } mult_fsm_e; + mult_fsm_e mult_state_q, mult_state_d; + + logic signed [33:0] mult1_res, mult2_res, mult3_res; + logic [33:0] mult1_res_uns; + logic [33:32] unused_mult1_res_uns; + logic [15:0] mult1_op_a, mult1_op_b; + logic [15:0] mult2_op_a, mult2_op_b; + logic [15:0] mult3_op_a, mult3_op_b; + logic mult1_sign_a, mult1_sign_b; + logic mult2_sign_a, mult2_sign_b; + logic mult3_sign_a, mult3_sign_b; + logic [33:0] summand1, summand2, summand3; + + assign mult1_res = $signed({mult1_sign_a, mult1_op_a}) * $signed({mult1_sign_b, mult1_op_b}); + assign mult2_res = $signed({mult2_sign_a, mult2_op_a}) * $signed({mult2_sign_b, mult2_op_b}); + assign mult3_res = $signed({mult3_sign_a, mult3_op_a}) * $signed({mult3_sign_b, mult3_op_b}); + + assign mac_res_signed = $signed(summand1) + $signed(summand2) + $signed(summand3); + + assign mult1_res_uns = $unsigned(mult1_res); + assign mac_res_ext = $unsigned(mac_res_signed); + assign mac_res = mac_res_ext[33:0]; + + assign sign_a = signed_mode_i[0] & op_a_i[31]; + assign sign_b = signed_mode_i[1] & op_b_i[31]; + + // The first two multipliers are only used in state 1 (MULL). We can assign them statically. + // al*bl + assign mult1_sign_a = 1'b0; + assign mult1_sign_b = 1'b0; + assign mult1_op_a = op_a_i[`OP_L]; + assign mult1_op_b = op_b_i[`OP_L]; + + // al*bh + assign mult2_sign_a = 1'b0; + assign mult2_sign_b = sign_b; + assign mult2_op_a = op_a_i[`OP_L]; + assign mult2_op_b = op_b_i[`OP_H]; + + // used in MULH + assign accum[17:0] = imd_val_q_i[0][33:16]; + assign accum[33:18] = {16{signed_mult & imd_val_q_i[0][33]}}; + + always_comb begin + // Default values == MULL + + // ah*bl + mult3_sign_a = sign_a; + mult3_sign_b = 1'b0; + mult3_op_a = op_a_i[`OP_H]; + mult3_op_b = op_b_i[`OP_L]; + + summand1 = {18'h0, mult1_res_uns[`OP_H]}; + summand2 = $unsigned(mult2_res); + summand3 = $unsigned(mult3_res); + + // mac_res = A*B[47:16], mult1_res = A*B[15:0] + mac_res_d = {2'b0, mac_res[`OP_L], mult1_res_uns[`OP_L]}; + mult_valid = mult_en_i; + mult_state_d = MULL; + + mult_hold = 1'b0; + + unique case (mult_state_q) + + MULL: begin + if (operator_i != MD_OP_MULL) begin + mac_res_d = mac_res; + mult_valid = 1'b0; + mult_state_d = MULH; + end else begin + mult_hold = ~multdiv_ready_id_i; + end + end + + MULH: begin + // ah*bh + mult3_sign_a = sign_a; + mult3_sign_b = sign_b; + mult3_op_a = op_a_i[`OP_H]; + mult3_op_b = op_b_i[`OP_H]; + mac_res_d = mac_res; + + summand1 = '0; + summand2 = accum; + summand3 = $unsigned(mult3_res); + + mult_state_d = MULL; + mult_valid = 1'b1; + + mult_hold = ~multdiv_ready_id_i; + end + + default: begin + mult_state_d = MULL; + end + + endcase // mult_state_q + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + mult_state_q <= MULL; + end else begin + if (mult_en_internal) begin + mult_state_q <= mult_state_d; + end + end + end + + assign unused_mult1_res_uns = mult1_res_uns[33:32]; + + // States must be knwon/valid. + `ASSERT_KNOWN(IbexMultStateKnown, mult_state_q) + + // The fast multiplier uses one 17 bit multiplier to compute MUL instructions in 3 cycles + // and MULH instructions in 4 cycles. + end else begin : gen_mult_fast + logic [15:0] mult_op_a; + logic [15:0] mult_op_b; + + typedef enum logic [1:0] { + ALBL, ALBH, AHBL, AHBH + } mult_fsm_e; + mult_fsm_e mult_state_q, mult_state_d; + + // The 2 MSBs of mac_res_ext (mac_res_ext[34:33]) are always equal since: + // 1. The 2 MSBs of the multiplicants are always equal, and + // 2. The 16 MSBs of the addend (accum[33:18]) are always equal. + // Thus, it is safe to ignore mac_res_ext[34]. + assign mac_res_signed = + $signed({sign_a, mult_op_a}) * $signed({sign_b, mult_op_b}) + $signed(accum); + assign mac_res_ext = $unsigned(mac_res_signed); + assign mac_res = mac_res_ext[33:0]; + + always_comb begin + mult_op_a = op_a_i[`OP_L]; + mult_op_b = op_b_i[`OP_L]; + sign_a = 1'b0; + sign_b = 1'b0; + accum = imd_val_q_i[0]; + mac_res_d = mac_res; + mult_state_d = mult_state_q; + mult_valid = 1'b0; + mult_hold = 1'b0; + + unique case (mult_state_q) + + ALBL: begin + // al*bl + mult_op_a = op_a_i[`OP_L]; + mult_op_b = op_b_i[`OP_L]; + sign_a = 1'b0; + sign_b = 1'b0; + accum = '0; + mac_res_d = mac_res; + mult_state_d = ALBH; + end + + ALBH: begin + // al*bh<<16 + mult_op_a = op_a_i[`OP_L]; + mult_op_b = op_b_i[`OP_H]; + sign_a = 1'b0; + sign_b = signed_mode_i[1] & op_b_i[31]; + // result of AL*BL (in imd_val_q_i[0]) always unsigned with no carry + accum = {18'b0, imd_val_q_i[0][31:16]}; + if (operator_i == MD_OP_MULL) begin + mac_res_d = {2'b0, mac_res[`OP_L], imd_val_q_i[0][`OP_L]}; + end else begin + // MD_OP_MULH + mac_res_d = mac_res; + end + mult_state_d = AHBL; + end + + AHBL: begin + // ah*bl<<16 + mult_op_a = op_a_i[`OP_H]; + mult_op_b = op_b_i[`OP_L]; + sign_a = signed_mode_i[0] & op_a_i[31]; + sign_b = 1'b0; + if (operator_i == MD_OP_MULL) begin + accum = {18'b0, imd_val_q_i[0][31:16]}; + mac_res_d = {2'b0, mac_res[15:0], imd_val_q_i[0][15:0]}; + mult_valid = 1'b1; + + // Note no state transition will occur if mult_hold is set + mult_state_d = ALBL; + mult_hold = ~multdiv_ready_id_i; + end else begin + accum = imd_val_q_i[0]; + mac_res_d = mac_res; + mult_state_d = AHBH; + end + end + + AHBH: begin + // only MD_OP_MULH here + // ah*bh + mult_op_a = op_a_i[`OP_H]; + mult_op_b = op_b_i[`OP_H]; + sign_a = signed_mode_i[0] & op_a_i[31]; + sign_b = signed_mode_i[1] & op_b_i[31]; + accum[17: 0] = imd_val_q_i[0][33:16]; + accum[33:18] = {16{signed_mult & imd_val_q_i[0][33]}}; + // result of AH*BL is not signed only if signed_mode_i == 2'b00 + mac_res_d = mac_res; + mult_valid = 1'b1; + + // Note no state transition will occur if mult_hold is set + mult_state_d = ALBL; + mult_hold = ~multdiv_ready_id_i; + end + default: begin + mult_state_d = ALBL; + end + endcase // mult_state_q + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + mult_state_q <= ALBL; + end else begin + if (mult_en_internal) begin + mult_state_q <= mult_state_d; + end + end + end + + // States must be knwon/valid. + `ASSERT_KNOWN(IbexMultStateKnown, mult_state_q) + + end // gen_mult_fast + + // Divider + assign res_adder_h = alu_adder_ext_i[32:1]; + logic [1:0] unused_alu_adder_ext; + assign unused_alu_adder_ext = {alu_adder_ext_i[33],alu_adder_ext_i[0]}; + + assign next_remainder = is_greater_equal ? res_adder_h[31:0] : imd_val_q_i[0][31:0]; + assign next_quotient = is_greater_equal ? {1'b0, op_quotient_q} | {1'b0, one_shift} : + {1'b0, op_quotient_q}; + + assign one_shift = {31'b0, 1'b1} << div_counter_q; + + // The adder in the ALU computes alu_operand_a_o + alu_operand_b_o which means + // Remainder - Divisor. If Remainder - Divisor >= 0, is_greater_equal is equal to 1, + // the next Remainder is Remainder - Divisor contained in res_adder_h and the + always_comb begin + if ((imd_val_q_i[0][31] ^ op_denominator_q[31]) == 1'b0) begin + is_greater_equal = (res_adder_h[31] == 1'b0); + end else begin + is_greater_equal = imd_val_q_i[0][31]; + end + end + + assign div_sign_a = op_a_i[31] & signed_mode_i[0]; + assign div_sign_b = op_b_i[31] & signed_mode_i[1]; + assign div_change_sign = (div_sign_a ^ div_sign_b) & ~div_by_zero_q; + assign rem_change_sign = div_sign_a; + + + always_comb begin + div_counter_d = div_counter_q - 5'h1; + op_remainder_d = imd_val_q_i[0]; + op_quotient_d = op_quotient_q; + md_state_d = md_state_q; + op_numerator_d = op_numerator_q; + op_denominator_d = op_denominator_q; + alu_operand_a_o = {32'h0 , 1'b1}; + alu_operand_b_o = {~op_b_i, 1'b1}; + div_valid = 1'b0; + div_hold = 1'b0; + div_by_zero_d = div_by_zero_q; + + unique case (md_state_q) + MD_IDLE: begin + if (operator_i == MD_OP_DIV) begin + // Check if the Denominator is 0 + // quotient for division by 0 is specified to be -1 + // Note with data-independent time option, the full divide operation will proceed as + // normal and will naturally return -1 + op_remainder_d = '1; + md_state_d = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A; + // Record that this is a div by zero to stop the sign change at the end of the + // division (in data_ind_timing mode). + div_by_zero_d = equal_to_zero_i; + end else begin + // Check if the Denominator is 0 + // remainder for division by 0 is specified to be the numerator (operand a) + // Note with data-independent time option, the full divide operation will proceed as + // normal and will naturally return operand a + op_remainder_d = {2'b0, op_a_i}; + md_state_d = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A; + end + // 0 - B = 0 iff B == 0 + alu_operand_a_o = {32'h0 , 1'b1}; + alu_operand_b_o = {~op_b_i, 1'b1}; + div_counter_d = 5'd31; + end + + MD_ABS_A: begin + // quotient + op_quotient_d = '0; + // A abs value + op_numerator_d = div_sign_a ? alu_adder_i : op_a_i; + md_state_d = MD_ABS_B; + div_counter_d = 5'd31; + // ABS(A) = 0 - A + alu_operand_a_o = {32'h0 , 1'b1}; + alu_operand_b_o = {~op_a_i, 1'b1}; + end + + MD_ABS_B: begin + // remainder + op_remainder_d = { 33'h0, op_numerator_q[31]}; + // B abs value + op_denominator_d = div_sign_b ? alu_adder_i : op_b_i; + md_state_d = MD_COMP; + div_counter_d = 5'd31; + // ABS(B) = 0 - B + alu_operand_a_o = {32'h0 , 1'b1}; + alu_operand_b_o = {~op_b_i, 1'b1}; + end + + MD_COMP: begin + op_remainder_d = {1'b0, next_remainder[31:0], op_numerator_q[div_counter_d]}; + op_quotient_d = next_quotient[31:0]; + md_state_d = (div_counter_q == 5'd1) ? MD_LAST : MD_COMP; + // Division + alu_operand_a_o = {imd_val_q_i[0][31:0], 1'b1}; // it contains the remainder + alu_operand_b_o = {~op_denominator_q[31:0], 1'b1}; // -denominator two's compliment + end + + MD_LAST: begin + if (operator_i == MD_OP_DIV) begin + // this time we save the quotient in op_remainder_d (i.e. imd_val_q_i[0]) since + // we do not need anymore the remainder + op_remainder_d = {1'b0, next_quotient}; + end else begin + // this time we do not save the quotient anymore since we need only the remainder + op_remainder_d = {2'b0, next_remainder[31:0]}; + end + // Division + alu_operand_a_o = {imd_val_q_i[0][31:0], 1'b1}; // it contains the remainder + alu_operand_b_o = {~op_denominator_q[31:0], 1'b1}; // -denominator two's compliment + + md_state_d = MD_CHANGE_SIGN; + end + + MD_CHANGE_SIGN: begin + md_state_d = MD_FINISH; + if (operator_i == MD_OP_DIV) begin + op_remainder_d = (div_change_sign) ? {2'h0, alu_adder_i} : imd_val_q_i[0]; + end else begin + op_remainder_d = (rem_change_sign) ? {2'h0, alu_adder_i} : imd_val_q_i[0]; + end + // ABS(Quotient) = 0 - Quotient (or Remainder) + alu_operand_a_o = {32'h0 , 1'b1}; + alu_operand_b_o = {~imd_val_q_i[0][31:0], 1'b1}; + end + + MD_FINISH: begin + // Hold result until ID stage is ready to accept it + // Note no state transition will occur if div_hold is set + md_state_d = MD_IDLE; + div_hold = ~multdiv_ready_id_i; + div_valid = 1'b1; + end + + default: begin + md_state_d = MD_IDLE; + end + endcase // md_state_q + end + + assign valid_o = mult_valid | div_valid; + + // States must be knwon/valid. + `ASSERT(IbexMultDivStateValid, md_state_q inside { + MD_IDLE, MD_ABS_A, MD_ABS_B, MD_COMP, MD_LAST, MD_CHANGE_SIGN, MD_FINISH}) + +`ifdef FORMAL + `ifdef YOSYS + `include "formal_tb_frag.svh" + `endif +`endif + +endmodule // ibex_mult
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_multdiv_slow.sv b/hw/ip/cheriot-ibex/rtl/cheriot_multdiv_slow.sv new file mode 100644 index 0000000..8fbc929 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_multdiv_slow.sv
@@ -0,0 +1,374 @@ +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Slow Multiplier and Division + * + * Baugh-Wooley multiplier and Long Division + */ + +`include "prim_assert.sv" + +module cheriot_multdiv_slow +( + input logic clk_i, + input logic rst_ni, + input logic mult_en_i, // dynamic enable signal, for FSM control + input logic div_en_i, // dynamic enable signal, for FSM control + input logic mult_sel_i, // static decoder output, for data muxes + input logic div_sel_i, // static decoder output, for data muxes + input cheriot_pkg::md_op_e operator_i, + input logic [1:0] signed_mode_i, + input logic [31:0] op_a_i, + input logic [31:0] op_b_i, + input logic [33:0] alu_adder_ext_i, + input logic [31:0] alu_adder_i, + input logic equal_to_zero_i, + input logic data_ind_timing_i, + + output logic [32:0] alu_operand_a_o, + output logic [32:0] alu_operand_b_o, + + input logic [33:0] imd_val_q_i[2], + output logic [33:0] imd_val_d_o[2], + output logic [1:0] imd_val_we_o, + + input logic multdiv_ready_id_i, + + output logic [31:0] multdiv_result_o, + + output logic valid_o +); + + import cheriot_pkg::*; + + typedef enum logic [2:0] { + MD_IDLE, MD_ABS_A, MD_ABS_B, MD_COMP, MD_LAST, MD_CHANGE_SIGN, MD_FINISH + } md_fsm_e; + md_fsm_e md_state_q, md_state_d; + + logic [32:0] accum_window_q, accum_window_d; + logic unused_imd_val0; + logic [ 1:0] unused_imd_val1; + + logic [32:0] res_adder_l; + logic [32:0] res_adder_h; + + logic [ 4:0] multdiv_count_q, multdiv_count_d; + logic [32:0] op_b_shift_q, op_b_shift_d; + logic [32:0] op_a_shift_q, op_a_shift_d; + logic [32:0] op_a_ext, op_b_ext; + logic [32:0] one_shift; + logic [32:0] op_a_bw_pp, op_a_bw_last_pp; + logic [31:0] b_0; + logic sign_a, sign_b; + logic [32:0] next_quotient; + logic [31:0] next_remainder; + logic [31:0] op_numerator_q, op_numerator_d; + logic is_greater_equal; + logic div_change_sign, rem_change_sign; + logic div_by_zero_d, div_by_zero_q; + logic multdiv_hold; + logic multdiv_en; + + // (accum_window_q + op_a_shift_q) + assign res_adder_l = alu_adder_ext_i[32:0]; + // (accum_window_q + op_a_shift_q)>>1 + assign res_adder_h = alu_adder_ext_i[33:1]; + + ///////////////////// + // ALU Operand MUX // + ///////////////////// + + // Intermediate value register shared with ALU + assign imd_val_d_o[0] = {1'b0,accum_window_d}; + assign imd_val_we_o[0] = ~multdiv_hold; + assign accum_window_q = imd_val_q_i[0][32:0]; + assign unused_imd_val0 = imd_val_q_i[0][33]; + + assign imd_val_d_o[1] = {2'b00, op_numerator_d}; + assign imd_val_we_o[1] = multdiv_en; + assign op_numerator_q = imd_val_q_i[1][31:0]; + assign unused_imd_val1 = imd_val_q_i[1][33:32]; + + always_comb begin + alu_operand_a_o = accum_window_q; + + unique case (operator_i) + + MD_OP_MULL: begin + alu_operand_b_o = op_a_bw_pp; + end + + MD_OP_MULH: begin + alu_operand_b_o = (md_state_q == MD_LAST) ? op_a_bw_last_pp : op_a_bw_pp; + end + + MD_OP_DIV, + MD_OP_REM: begin + unique case (md_state_q) + MD_IDLE: begin + // 0 - B = 0 iff B == 0 + alu_operand_a_o = {32'h0 , 1'b1}; + alu_operand_b_o = {~op_b_i, 1'b1}; + end + MD_ABS_A: begin + // ABS(A) = 0 - A + alu_operand_a_o = {32'h0 , 1'b1}; + alu_operand_b_o = {~op_a_i, 1'b1}; + end + MD_ABS_B: begin + // ABS(B) = 0 - B + alu_operand_a_o = {32'h0 , 1'b1}; + alu_operand_b_o = {~op_b_i, 1'b1}; + end + MD_CHANGE_SIGN: begin + // ABS(Quotient) = 0 - Quotient (or Reminder) + alu_operand_a_o = {32'h0 , 1'b1}; + alu_operand_b_o = {~accum_window_q[31:0], 1'b1}; + end + default: begin + // Division + alu_operand_a_o = {accum_window_q[31:0], 1'b1}; // it contains the remainder + alu_operand_b_o = {~op_b_shift_q[31:0], 1'b1}; // -denominator two's compliment + end + endcase + end + default: begin + alu_operand_a_o = accum_window_q; + alu_operand_b_o = {~op_b_shift_q[31:0], 1'b1}; + end + endcase + end + + // Multiplier partial product calculation + assign b_0 = {32{op_b_shift_q[0]}}; + assign op_a_bw_pp = { ~(op_a_shift_q[32] & op_b_shift_q[0]), (op_a_shift_q[31:0] & b_0) }; + assign op_a_bw_last_pp = { (op_a_shift_q[32] & op_b_shift_q[0]), ~(op_a_shift_q[31:0] & b_0) }; + + // Sign extend the input operands + assign sign_a = op_a_i[31] & signed_mode_i[0]; + assign sign_b = op_b_i[31] & signed_mode_i[1]; + + assign op_a_ext = {sign_a, op_a_i}; + assign op_b_ext = {sign_b, op_b_i}; + + // Divider calculations + + // The adder in the ALU computes Remainder - Divisor. If Remainder - Divisor >= 0, + // is_greater_equal is true, the next Remainder is the subtraction result and the Quotient + // multdiv_count_q-th bit is set to 1. + assign is_greater_equal = (accum_window_q[31] == op_b_shift_q[31]) ? + ~res_adder_h[31] : accum_window_q[31]; + + assign one_shift = {32'b0, 1'b1} << multdiv_count_q; + + assign next_remainder = is_greater_equal ? res_adder_h[31:0] : accum_window_q[31:0]; + assign next_quotient = is_greater_equal ? op_a_shift_q | one_shift : op_a_shift_q; + + assign div_change_sign = (sign_a ^ sign_b) & ~div_by_zero_q; + assign rem_change_sign = sign_a; + + always_comb begin + multdiv_count_d = multdiv_count_q; + accum_window_d = accum_window_q; + op_b_shift_d = op_b_shift_q; + op_a_shift_d = op_a_shift_q; + op_numerator_d = op_numerator_q; + md_state_d = md_state_q; + multdiv_hold = 1'b0; + div_by_zero_d = div_by_zero_q; + if (mult_sel_i || div_sel_i) begin + unique case (md_state_q) + MD_IDLE: begin + unique case (operator_i) + MD_OP_MULL: begin + op_a_shift_d = op_a_ext << 1; + accum_window_d = { ~(op_a_ext[32] & op_b_i[0]), + op_a_ext[31:0] & {32{op_b_i[0]}} }; + op_b_shift_d = op_b_ext >> 1; + // Proceed with multiplication by 0/1 in data-independent time mode + md_state_d = (!data_ind_timing_i && ((op_b_ext >> 1) == 0)) ? MD_LAST : MD_COMP; + end + MD_OP_MULH: begin + op_a_shift_d = op_a_ext; + accum_window_d = { 1'b1, ~(op_a_ext[32] & op_b_i[0]), + op_a_ext[31:1] & {31{op_b_i[0]}} }; + op_b_shift_d = op_b_ext >> 1; + md_state_d = MD_COMP; + end + MD_OP_DIV: begin + // Check if the denominator is 0 + // quotient for division by 0 is specified to be -1 + // Note with data-independent time option, the full divide operation will proceed as + // normal and will naturally return -1 + accum_window_d = {33{1'b1}}; + md_state_d = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A; + // Record that this is a div by zero to stop the sign change at the end of the + // division (in data_ind_timing mode). + div_by_zero_d = equal_to_zero_i; + end + MD_OP_REM: begin + // Check if the denominator is 0 + // remainder for division by 0 is specified to be the numerator (operand a) + // Note with data-independent time option, the full divide operation will proceed as + // normal and will naturally return operand a + accum_window_d = op_a_ext; + md_state_d = (!data_ind_timing_i && equal_to_zero_i) ? MD_FINISH : MD_ABS_A; + end + default:; + endcase + multdiv_count_d = 5'd31; + end + + MD_ABS_A: begin + // quotient + op_a_shift_d = '0; + // A abs value + op_numerator_d = sign_a ? alu_adder_i : op_a_i; + md_state_d = MD_ABS_B; + end + + MD_ABS_B: begin + // remainder + accum_window_d = {32'h0, op_numerator_q[31]}; + // B abs value + op_b_shift_d = sign_b ? {1'b0, alu_adder_i} : {1'b0, op_b_i}; + md_state_d = MD_COMP; + end + + MD_COMP: begin + multdiv_count_d = multdiv_count_q - 5'h1; + unique case (operator_i) + MD_OP_MULL: begin + accum_window_d = res_adder_l; + op_a_shift_d = op_a_shift_q << 1; + op_b_shift_d = op_b_shift_q >> 1; + // Multiplication is complete once op_b is zero, unless in data_ind_timing mode where + // the maximum possible shift-add operations will be completed regardless of op_b + md_state_d = ((!data_ind_timing_i && (op_b_shift_d == 0)) || + (multdiv_count_q == 5'd1)) ? MD_LAST : MD_COMP; + end + MD_OP_MULH: begin + accum_window_d = res_adder_h; + op_a_shift_d = op_a_shift_q; + op_b_shift_d = op_b_shift_q >> 1; + md_state_d = (multdiv_count_q == 5'd1) ? MD_LAST : MD_COMP; + end + MD_OP_DIV, + MD_OP_REM: begin + accum_window_d = {next_remainder[31:0], op_numerator_q[multdiv_count_d]}; + op_a_shift_d = next_quotient; + md_state_d = (multdiv_count_q == 5'd1) ? MD_LAST : MD_COMP; + end + default: ; + endcase + end + + MD_LAST: begin + unique case (operator_i) + MD_OP_MULL: begin + accum_window_d = res_adder_l; + + // Note no state transition will occur if multdiv_hold is set + md_state_d = MD_IDLE; + multdiv_hold = ~multdiv_ready_id_i; + end + MD_OP_MULH: begin + accum_window_d = res_adder_l; + md_state_d = MD_IDLE; + + // Note no state transition will occur if multdiv_hold is set + md_state_d = MD_IDLE; + multdiv_hold = ~multdiv_ready_id_i; + end + MD_OP_DIV: begin + // this time we save the quotient in accum_window_q since we do not need anymore the + // remainder + accum_window_d = next_quotient; + md_state_d = MD_CHANGE_SIGN; + end + MD_OP_REM: begin + // this time we do not save the quotient anymore since we need only the remainder + accum_window_d = {1'b0, next_remainder[31:0]}; + md_state_d = MD_CHANGE_SIGN; + end + default: ; + endcase + end + + MD_CHANGE_SIGN: begin + md_state_d = MD_FINISH; + unique case (operator_i) + MD_OP_DIV: + accum_window_d = div_change_sign ? {1'b0,alu_adder_i} : accum_window_q; + MD_OP_REM: + accum_window_d = rem_change_sign ? {1'b0,alu_adder_i} : accum_window_q; + default: ; + endcase + end + + MD_FINISH: begin + // Note no state transition will occur if multdiv_hold is set + md_state_d = MD_IDLE; + multdiv_hold = ~multdiv_ready_id_i; + end + + default: begin + md_state_d = MD_IDLE; + end + endcase // md_state_q + end // (mult_sel_i || div_sel_i) + end + + ////////////////////////////////////////// + // Mutliplier / Divider state registers // + ////////////////////////////////////////// + + assign multdiv_en = (mult_en_i | div_en_i) & ~multdiv_hold; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + multdiv_count_q <= 5'h0; + op_b_shift_q <= 33'h0; + op_a_shift_q <= 33'h0; + md_state_q <= MD_IDLE; + div_by_zero_q <= 1'b0; + end else if (multdiv_en) begin + multdiv_count_q <= multdiv_count_d; + op_b_shift_q <= op_b_shift_d; + op_a_shift_q <= op_a_shift_d; + md_state_q <= md_state_d; + div_by_zero_q <= div_by_zero_d; + end + end + + ///////////// + // Outputs // + ///////////// + + assign valid_o = (md_state_q == MD_FINISH) | + (md_state_q == MD_LAST & + (operator_i == MD_OP_MULL | + operator_i == MD_OP_MULH)); + + assign multdiv_result_o = div_en_i ? accum_window_q[31:0] : res_adder_l[31:0]; + + //////////////// + // Assertions // + //////////////// + + // State must be valid. + `ASSERT(IbexMultDivStateValid, md_state_q inside { + MD_IDLE, MD_ABS_A, MD_ABS_B, MD_COMP, MD_LAST, MD_CHANGE_SIGN, MD_FINISH + }, clk_i, !rst_ni) + +`ifdef FORMAL + `ifdef YOSYS + `include "formal_tb_frag.svh" + `endif +`endif + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_pkg.sv b/hw/ip/cheriot-ibex/rtl/cheriot_pkg.sv new file mode 100644 index 0000000..d40fd94 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_pkg.sv
@@ -0,0 +1,676 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Copyright lowRISC contributors. +// Copyright 2017 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Package with constants used by Ibex + */ +package cheriot_pkg; + + //////////////// + // IO Structs // + //////////////// + + typedef struct packed { + logic [31:0] current_pc; + logic [31:0] next_pc; + logic [31:0] last_data_addr; + logic [31:0] exception_addr; + } crash_dump_t; + + typedef struct packed { + logic dummy_instr_id; + logic [4:0] raddr_a; + logic [4:0] waddr_a; + logic we_a; + logic [4:0] raddr_b; + } core2rf_t; + + ///////////////////// + // Parameter Enums // + ///////////////////// + + typedef enum integer { + RegFileFF = 0, + RegFileFPGA = 1, + RegFileLatch = 2 + } regfile_e; + + typedef enum integer { + RV32MNone = 0, + RV32MSlow = 1, + RV32MFast = 2, + RV32MSingleCycle = 3 + } rv32m_e; + + typedef enum integer { + RV32BNone = 0, + RV32BBalanced = 1, + RV32BOTEarlGrey = 2, + RV32BFull = 3 + } rv32b_e; + + ///////////// + // Opcodes // + ///////////// + + typedef enum logic [6:0] { + OPCODE_LOAD = 7'h03, + OPCODE_MISC_MEM = 7'h0f, + OPCODE_OP_IMM = 7'h13, + OPCODE_AUIPC = 7'h17, + OPCODE_STORE = 7'h23, + OPCODE_OP = 7'h33, + OPCODE_LUI = 7'h37, + OPCODE_BRANCH = 7'h63, + OPCODE_JALR = 7'h67, + OPCODE_JAL = 7'h6f, + OPCODE_SYSTEM = 7'h73, + OPCODE_CHERI = 7'h5b, + OPCODE_AUICGP = 7'h7b + } opcode_e; + + + //////////////////// + // ALU operations // + //////////////////// + + typedef enum logic [6:0] { + // Arithmetics + ALU_ADD, + ALU_SUB, + + // Logics + ALU_XOR, + ALU_OR, + ALU_AND, + // RV32B + ALU_XNOR, + ALU_ORN, + ALU_ANDN, + + // Shifts + ALU_SRA, + ALU_SRL, + ALU_SLL, + // RV32B + ALU_SRO, + ALU_SLO, + ALU_ROR, + ALU_ROL, + ALU_GREV, + ALU_GORC, + ALU_SHFL, + ALU_UNSHFL, + ALU_XPERM_N, + ALU_XPERM_B, + ALU_XPERM_H, + + // Address Calculations + // RV32B + ALU_SH1ADD, + ALU_SH2ADD, + ALU_SH3ADD, + + // Comparisons + ALU_LT, + ALU_LTU, + ALU_GE, + ALU_GEU, + ALU_EQ, + ALU_NE, + // RV32B + ALU_MIN, + ALU_MINU, + ALU_MAX, + ALU_MAXU, + + // Pack + // RV32B + ALU_PACK, + ALU_PACKU, + ALU_PACKH, + + // Sign-Extend + // RV32B + ALU_SEXTB, + ALU_SEXTH, + + // Bitcounting + // RV32B + ALU_CLZ, + ALU_CTZ, + ALU_CPOP, + + // Set lower than + ALU_SLT, + ALU_SLTU, + + // Ternary Bitmanip Operations + // RV32B + ALU_CMOV, + ALU_CMIX, + ALU_FSL, + ALU_FSR, + + // Single-Bit Operations + // RV32B + ALU_BSET, + ALU_BCLR, + ALU_BINV, + ALU_BEXT, + + // Bit Compress / Decompress + // RV32B + ALU_BCOMPRESS, + ALU_BDECOMPRESS, + + // Bit Field Place + // RV32B + ALU_BFP, + + // Carry-less Multiply + // RV32B + ALU_CLMUL, + ALU_CLMULR, + ALU_CLMULH, + + // Cyclic Redundancy Check + ALU_CRC32_B, + ALU_CRC32C_B, + ALU_CRC32_H, + ALU_CRC32C_H, + ALU_CRC32_W, + ALU_CRC32C_W + } alu_op_e; + + typedef enum logic [1:0] { + // Multiplier/divider + MD_OP_MULL, + MD_OP_MULH, + MD_OP_DIV, + MD_OP_REM + } md_op_e; + + + ////////////////////////////////// + // Control and status registers // + ////////////////////////////////// + + // CSR operations + typedef enum logic [1:0] { + CSR_OP_READ, + CSR_OP_WRITE, + CSR_OP_SET, + CSR_OP_CLEAR + } csr_op_e; + + // Privileged mode + typedef enum logic[1:0] { + PRIV_LVL_M = 2'b11, + PRIV_LVL_H = 2'b10, + PRIV_LVL_S = 2'b01, + PRIV_LVL_U = 2'b00 + } priv_lvl_e; + + // Constants for the dcsr.xdebugver fields + typedef enum logic[3:0] { + XDEBUGVER_NO = 4'd0, // no external debug support + XDEBUGVER_STD = 4'd4, // external debug according to RISC-V debug spec + XDEBUGVER_NONSTD = 4'd15 // debug not conforming to RISC-V debug spec + } x_debug_ver_e; + + ////////////// + // WB stage // + ////////////// + + // Type of instruction present in writeback stage + typedef enum logic[1:0] { + WB_INSTR_LOAD, // Instruction is awaiting load data + WB_INSTR_STORE, // Instruction is awaiting store response + WB_INSTR_OTHER // Instruction doesn't fit into above categories + } wb_instr_type_e; + + ////////////// + // ID stage // + ////////////// + + // Operand a selection + typedef enum logic[1:0] { + OP_A_REG_A, + OP_A_FWD, + OP_A_CURRPC, + OP_A_IMM + } op_a_sel_e; + + // Immediate a selection + typedef enum logic { + IMM_A_Z, + IMM_A_ZERO + } imm_a_sel_e; + + // Operand b selection + typedef enum logic { + OP_B_REG_B, + OP_B_IMM + } op_b_sel_e; + + // Immediate b selection + typedef enum logic [2:0] { + IMM_B_I, + IMM_B_S, + IMM_B_B, + IMM_B_U, + IMM_B_J, + IMM_B_INCR_PC, + IMM_B_INCR_ADDR + } imm_b_sel_e; + + // Regfile write data selection + typedef enum logic { + RF_WD_EX, + RF_WD_CSR + } rf_wd_sel_e; + + + ////////////// + // IF stage // + ////////////// + + // PC mux selection + typedef enum logic [2:0] { + PC_BOOT, + PC_JUMP, + PC_EXC, + PC_ERET, + PC_DRET, + PC_BP + } pc_sel_e; + + // Exception PC mux selection + typedef enum logic [1:0] { + EXC_PC_EXC, + EXC_PC_IRQ, + EXC_PC_DBD, + EXC_PC_DBG_EXC // Exception while in debug mode + } exc_pc_sel_e; + + // Interrupt requests + typedef struct packed { + logic irq_software; + logic irq_timer; + logic irq_external; + logic [14:0] irq_fast; // 15 fast interrupts, + // one interrupt is reserved for NMI (not visible through mip/mie) + } irqs_t; + + // Exception cause + typedef enum logic [5:0] { + EXC_CAUSE_IRQ_SOFTWARE_M = {1'b1, 5'd03}, + EXC_CAUSE_IRQ_TIMER_M = {1'b1, 5'd07}, + EXC_CAUSE_IRQ_EXTERNAL_M = {1'b1, 5'd11}, + // EXC_CAUSE_IRQ_FAST_0 = {1'b1, 5'd16}, + // EXC_CAUSE_IRQ_FAST_14 = {1'b1, 5'd30}, + EXC_CAUSE_IRQ_NM = {1'b1, 5'd31}, // == EXC_CAUSE_IRQ_FAST_15 + EXC_CAUSE_INSN_ADDR_MISA = {1'b0, 5'd00}, + EXC_CAUSE_INSTR_ACCESS_FAULT = {1'b0, 5'd01}, + EXC_CAUSE_ILLEGAL_INSN = {1'b0, 5'd02}, + EXC_CAUSE_BREAKPOINT = {1'b0, 5'd03}, + EXC_CAUSE_LOAD_ADDR_MISALIGN = {1'b0, 5'd04}, + EXC_CAUSE_LOAD_ACCESS_FAULT = {1'b0, 5'd05}, + EXC_CAUSE_STORE_ADDR_MISALIGN = {1'b0, 5'd06}, + EXC_CAUSE_STORE_ACCESS_FAULT = {1'b0, 5'd07}, + EXC_CAUSE_ECALL_UMODE = {1'b0, 5'd08}, + EXC_CAUSE_ECALL_MMODE = {1'b0, 5'd11}, + EXC_CAUSE_CHERI_FAULT = {1'b0, 5'd28} + } exc_cause_e; + + // Debug cause + typedef enum logic [2:0] { + DBG_CAUSE_NONE = 3'h0, + DBG_CAUSE_EBREAK = 3'h1, + DBG_CAUSE_TRIGGER = 3'h2, + DBG_CAUSE_HALTREQ = 3'h3, + DBG_CAUSE_STEP = 3'h4 + } dbg_cause_e; + + // ICache constants + parameter int unsigned ADDR_W = 32; + parameter int unsigned BUS_SIZE = 32; + parameter int unsigned BUS_BYTES = BUS_SIZE/8; + parameter int unsigned BUS_W = $clog2(BUS_BYTES); + parameter int unsigned IC_SIZE_BYTES = 4096; + parameter int unsigned IC_NUM_WAYS = 2; + parameter int unsigned IC_LINE_SIZE = 64; + parameter int unsigned IC_LINE_BYTES = IC_LINE_SIZE/8; + parameter int unsigned IC_LINE_W = $clog2(IC_LINE_BYTES); + parameter int unsigned IC_NUM_LINES = IC_SIZE_BYTES / IC_NUM_WAYS / IC_LINE_BYTES; + parameter int unsigned IC_LINE_BEATS = IC_LINE_BYTES / BUS_BYTES; + parameter int unsigned IC_LINE_BEATS_W = $clog2(IC_LINE_BEATS); + parameter int unsigned IC_INDEX_W = $clog2(IC_NUM_LINES); + parameter int unsigned IC_INDEX_HI = IC_INDEX_W + IC_LINE_W - 1; + parameter int unsigned IC_TAG_SIZE = ADDR_W - IC_INDEX_W - IC_LINE_W + 1; // 1 valid bit + parameter int unsigned IC_OUTPUT_BEATS = (BUS_BYTES / 2); // number of halfwords + // ICache Scrambling Parameters + parameter int unsigned SCRAMBLE_KEY_W = 128; + parameter int unsigned SCRAMBLE_NONCE_W = 64; + + // PMP constants + parameter int unsigned PMP_MAX_REGIONS = 16; + parameter int unsigned PMP_CFG_W = 8; + + // PMP acces type + parameter int unsigned PMP_I = 0; + parameter int unsigned PMP_I2 = 1; + parameter int unsigned PMP_D = 2; + + typedef enum logic [1:0] { + PMP_ACC_EXEC = 2'b00, + PMP_ACC_WRITE = 2'b01, + PMP_ACC_READ = 2'b10 + } pmp_req_e; + + // PMP cfg structures + typedef enum logic [1:0] { + PMP_MODE_OFF = 2'b00, + PMP_MODE_TOR = 2'b01, + PMP_MODE_NA4 = 2'b10, + PMP_MODE_NAPOT = 2'b11 + } pmp_cfg_mode_e; + + typedef struct packed { + logic lock; + pmp_cfg_mode_e mode; + logic exec; + logic write; + logic read; + } pmp_cfg_t; + + // Machine Security Configuration (ePMP) + typedef struct packed { + logic rlb; // Rule Locking Bypass + logic mmwp; // Machine Mode Whitelist Policy + logic mml; // Machine Mode Lockdown + } pmp_mseccfg_t; + + // CSRs + typedef enum logic[11:0] { + // Machine information + CSR_MVENDORID = 12'hF11, + CSR_MARCHID = 12'hF12, + CSR_MIMPID = 12'hF13, + CSR_MHARTID = 12'hF14, + + // Machine trap setup + CSR_MSTATUS = 12'h300, + CSR_MISA = 12'h301, + CSR_MIE = 12'h304, + CSR_MTVEC = 12'h305, + CSR_MCOUNTEREN= 12'h306, + + // Machine trap handling + CSR_MSCRATCH = 12'h340, + CSR_MEPC = 12'h341, + CSR_MCAUSE = 12'h342, + CSR_MTVAL = 12'h343, + CSR_MIP = 12'h344, + + // Physical memory protection + CSR_PMPCFG0 = 12'h3A0, + CSR_PMPCFG1 = 12'h3A1, + CSR_PMPCFG2 = 12'h3A2, + CSR_PMPCFG3 = 12'h3A3, + CSR_PMPADDR0 = 12'h3B0, + CSR_PMPADDR1 = 12'h3B1, + CSR_PMPADDR2 = 12'h3B2, + CSR_PMPADDR3 = 12'h3B3, + CSR_PMPADDR4 = 12'h3B4, + CSR_PMPADDR5 = 12'h3B5, + CSR_PMPADDR6 = 12'h3B6, + CSR_PMPADDR7 = 12'h3B7, + CSR_PMPADDR8 = 12'h3B8, + CSR_PMPADDR9 = 12'h3B9, + CSR_PMPADDR10 = 12'h3BA, + CSR_PMPADDR11 = 12'h3BB, + CSR_PMPADDR12 = 12'h3BC, + CSR_PMPADDR13 = 12'h3BD, + CSR_PMPADDR14 = 12'h3BE, + CSR_PMPADDR15 = 12'h3BF, + + // ePMP control + CSR_MSECCFG = 12'h747, + CSR_MSECCFGH = 12'h757, + + // Debug trigger + CSR_TSELECT = 12'h7A0, + CSR_TDATA1 = 12'h7A1, + CSR_TDATA2 = 12'h7A2, + CSR_TDATA3 = 12'h7A3, + CSR_MCONTEXT = 12'h7A8, + CSR_SCONTEXT = 12'h7AA, + + // Debug/trace + CSR_DCSR = 12'h7b0, + CSR_DPC = 12'h7b1, + + // Debug + CSR_DSCRATCH0 = 12'h7b2, // optional + CSR_DSCRATCH1 = 12'h7b3, // optional + + // Machine Counter/Timers + CSR_MCOUNTINHIBIT = 12'h320, + CSR_MHPMEVENT3 = 12'h323, + CSR_MHPMEVENT4 = 12'h324, + CSR_MHPMEVENT5 = 12'h325, + CSR_MHPMEVENT6 = 12'h326, + CSR_MHPMEVENT7 = 12'h327, + CSR_MHPMEVENT8 = 12'h328, + CSR_MHPMEVENT9 = 12'h329, + CSR_MHPMEVENT10 = 12'h32A, + CSR_MHPMEVENT11 = 12'h32B, + CSR_MHPMEVENT12 = 12'h32C, + CSR_MHPMEVENT13 = 12'h32D, + CSR_MHPMEVENT14 = 12'h32E, + CSR_MHPMEVENT15 = 12'h32F, + CSR_MHPMEVENT16 = 12'h330, + CSR_MHPMEVENT17 = 12'h331, + CSR_MHPMEVENT18 = 12'h332, + CSR_MHPMEVENT19 = 12'h333, + CSR_MHPMEVENT20 = 12'h334, + CSR_MHPMEVENT21 = 12'h335, + CSR_MHPMEVENT22 = 12'h336, + CSR_MHPMEVENT23 = 12'h337, + CSR_MHPMEVENT24 = 12'h338, + CSR_MHPMEVENT25 = 12'h339, + CSR_MHPMEVENT26 = 12'h33A, + CSR_MHPMEVENT27 = 12'h33B, + CSR_MHPMEVENT28 = 12'h33C, + CSR_MHPMEVENT29 = 12'h33D, + CSR_MHPMEVENT30 = 12'h33E, + CSR_MHPMEVENT31 = 12'h33F, + CSR_MCYCLE = 12'hB00, + CSR_MINSTRET = 12'hB02, + CSR_MHPMCOUNTER3 = 12'hB03, + CSR_MHPMCOUNTER4 = 12'hB04, + CSR_MHPMCOUNTER5 = 12'hB05, + CSR_MHPMCOUNTER6 = 12'hB06, + CSR_MHPMCOUNTER7 = 12'hB07, + CSR_MHPMCOUNTER8 = 12'hB08, + CSR_MHPMCOUNTER9 = 12'hB09, + CSR_MHPMCOUNTER10 = 12'hB0A, + CSR_MHPMCOUNTER11 = 12'hB0B, + CSR_MHPMCOUNTER12 = 12'hB0C, + CSR_MHPMCOUNTER13 = 12'hB0D, + CSR_MHPMCOUNTER14 = 12'hB0E, + CSR_MHPMCOUNTER15 = 12'hB0F, + CSR_MHPMCOUNTER16 = 12'hB10, + CSR_MHPMCOUNTER17 = 12'hB11, + CSR_MHPMCOUNTER18 = 12'hB12, + CSR_MHPMCOUNTER19 = 12'hB13, + CSR_MHPMCOUNTER20 = 12'hB14, + CSR_MHPMCOUNTER21 = 12'hB15, + CSR_MHPMCOUNTER22 = 12'hB16, + CSR_MHPMCOUNTER23 = 12'hB17, + CSR_MHPMCOUNTER24 = 12'hB18, + CSR_MHPMCOUNTER25 = 12'hB19, + CSR_MHPMCOUNTER26 = 12'hB1A, + CSR_MHPMCOUNTER27 = 12'hB1B, + CSR_MHPMCOUNTER28 = 12'hB1C, + CSR_MHPMCOUNTER29 = 12'hB1D, + CSR_MHPMCOUNTER30 = 12'hB1E, + CSR_MHPMCOUNTER31 = 12'hB1F, + CSR_MCYCLEH = 12'hB80, + CSR_MINSTRETH = 12'hB82, + CSR_MHPMCOUNTER3H = 12'hB83, + CSR_MHPMCOUNTER4H = 12'hB84, + CSR_MHPMCOUNTER5H = 12'hB85, + CSR_MHPMCOUNTER6H = 12'hB86, + CSR_MHPMCOUNTER7H = 12'hB87, + CSR_MHPMCOUNTER8H = 12'hB88, + CSR_MHPMCOUNTER9H = 12'hB89, + CSR_MHPMCOUNTER10H = 12'hB8A, + CSR_MHPMCOUNTER11H = 12'hB8B, + CSR_MHPMCOUNTER12H = 12'hB8C, + CSR_MHPMCOUNTER13H = 12'hB8D, + CSR_MHPMCOUNTER14H = 12'hB8E, + CSR_MHPMCOUNTER15H = 12'hB8F, + CSR_MHPMCOUNTER16H = 12'hB90, + CSR_MHPMCOUNTER17H = 12'hB91, + CSR_MHPMCOUNTER18H = 12'hB92, + CSR_MHPMCOUNTER19H = 12'hB93, + CSR_MHPMCOUNTER20H = 12'hB94, + CSR_MHPMCOUNTER21H = 12'hB95, + CSR_MHPMCOUNTER22H = 12'hB96, + CSR_MHPMCOUNTER23H = 12'hB97, + CSR_MHPMCOUNTER24H = 12'hB98, + CSR_MHPMCOUNTER25H = 12'hB99, + CSR_MHPMCOUNTER26H = 12'hB9A, + CSR_MHPMCOUNTER27H = 12'hB9B, + CSR_MHPMCOUNTER28H = 12'hB9C, + CSR_MHPMCOUNTER29H = 12'hB9D, + CSR_MHPMCOUNTER30H = 12'hB9E, + CSR_MHPMCOUNTER31H = 12'hB9F, + CSR_MSHWM = 12'hBC1, + CSR_MSHWMB = 12'hBC2, + CSR_CDBG_CTRL = 12'hBC4, + CSR_CPUCTRL = 12'h7C0, + CSR_SECURESEED = 12'h7C1 + } csr_num_e; + + // CSR pmp-related offsets + parameter logic [11:0] CSR_OFF_PMP_CFG = 12'h3A0; // pmp_cfg @ 12'h3a0 - 12'h3a3 + parameter logic [11:0] CSR_OFF_PMP_ADDR = 12'h3B0; // pmp_addr @ 12'h3b0 - 12'h3bf + + // CSR status bits + parameter int unsigned CSR_MSTATUS_MIE_BIT = 3; + parameter int unsigned CSR_MSTATUS_MPIE_BIT = 7; + parameter int unsigned CSR_MSTATUS_MPP_BIT_LOW = 11; + parameter int unsigned CSR_MSTATUS_MPP_BIT_HIGH = 12; + parameter int unsigned CSR_MSTATUS_MPRV_BIT = 17; + parameter int unsigned CSR_MSTATUS_TW_BIT = 21; + + // CSR machine ISA + parameter logic [1:0] CSR_MISA_MXL = 2'd1; // M-XLEN: XLEN in M-Mode for RV32 + + // CSR interrupt pending/enable bits + parameter int unsigned CSR_MSIX_BIT = 3; + parameter int unsigned CSR_MTIX_BIT = 7; + parameter int unsigned CSR_MEIX_BIT = 11; + parameter int unsigned CSR_MFIX_BIT_LOW = 16; + parameter int unsigned CSR_MFIX_BIT_HIGH = 30; + + // CSR Machine Security Configuration bits + parameter int unsigned CSR_MSECCFG_MML_BIT = 0; + parameter int unsigned CSR_MSECCFG_MMWP_BIT = 1; + parameter int unsigned CSR_MSECCFG_RLB_BIT = 2; + + // Vendor ID + // No JEDEC ID has been allocated to lowRISC so the value is 0 to indicate the field is not + // implemented + localparam logic [31:0] CSR_MVENDORID_VALUE = 32'b0; + localparam logic [31:0] CSR_MVENDORID_CHERI_VALUE = 32'h255; + + // Architecture ID + // Top bit is unset to indicate an open source project. The lower bits are an ID allocated by the + // RISC-V Foundation. Note this is allocated specifically to Ibex, should significant changes be + // made a different architecture ID should be supplied. + localparam logic [31:0] CSR_MARCHID_VALUE = {1'b0, 31'd22}; + localparam logic [31:0] CSR_MARCHID_CHERI_VALUE = 32'hce1; + + + // Implementation ID + // 0 indicates this field is not implemeted. Ibex implementors may wish to indicate an RTL/netlist + // version here using their own unique encoding (e.g. 32 bits of the git hash of the implemented + // commit). + localparam logic [31:0] CSR_MIMPID_VALUE = 32'b0; + + // These LFSR parameters have been generated with + // $ opentitan/util/design/gen-lfsr-seed.py --width 32 --seed 2480124384 --prefix "" + parameter int LfsrWidth = 32; + typedef logic [LfsrWidth-1:0] lfsr_seed_t; + typedef logic [LfsrWidth-1:0][$clog2(LfsrWidth)-1:0] lfsr_perm_t; + parameter lfsr_seed_t RndCnstLfsrSeedDefault = 32'hac533bf4; + parameter lfsr_perm_t RndCnstLfsrPermDefault = { + 160'h1e35ecba467fd1b12e958152c04fa43878a8daed + }; + parameter logic [SCRAMBLE_KEY_W-1:0] RndCnstIbexKeyDefault = + 128'h14e8cecae3040d5e12286bb3cc113298; + parameter logic [SCRAMBLE_NONCE_W-1:0] RndCnstIbexNonceDefault = + 64'hf79780bc735f3843; + + // Fetch enable. Mult-bit signal used for security hardening. For non-secure implementation all + // bits other than the bottom bit are ignored. + typedef logic [3:0] fetch_enable_t; + + // Note that if adjusting these parameters it is assumed the bottom bit is set for On and unset + // for Off. This allows the use of FetchEnableOn/FetchEnableOff to work for both secure and + // non-secure Ibex. If this assumption is broken the RTL that uses the fetch_enable signal within + // `cheriot_core` may need adjusting. + parameter fetch_enable_t FetchEnableOn = 4'b1001; + parameter fetch_enable_t FetchEnableOff = 4'b0110; + + typedef logic [3:0] ibex_mubi_t; + + // Note that if adjusting these parameters it is assumed the bottom bit is set for On and unset + // for Off. This allows the use of IbexMuBiOn/IbexMuBiOff to work for both secure and non-secure + // Ibex. If this assumption is broken the RTL that uses ibex_mubi_t types such as the fetch_enable + // and core_busy signals within `cheriot_core` may need adjusting. + parameter ibex_mubi_t IbexMuBiOn = 4'b0101; + parameter ibex_mubi_t IbexMuBiOff = 4'b1010; + + ////////////// + // ID stage // + ////////////// + + typedef enum logic [3:0] { + RESET, + BOOT_SET, + WAIT_SLEEP, + SLEEP, + FIRST_FETCH, + DECODE, + FLUSH, + IRQ_TAKEN, + DBG_TAKEN_IF, + DBG_TAKEN_ID + } ctrl_fsm_e; + + ////////////// + // LSU // + ////////////// + + typedef enum logic [3:0] { + IDLE, WAIT_GNT_MIS, WAIT_RVALID_MIS, WAIT_GNT, + WAIT_RVALID_MIS_GNTS_DONE, + CTX_WAIT_GNT1, CTX_WAIT_GNT2, CTX_WAIT_RESP + } ls_fsm_e; + + typedef enum logic [2:0] {CRX_IDLE, CRX_WAIT_RESP1, CRX_WAIT_RESP2} cap_rx_fsm_t; + + +endpackage
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_pmp.sv b/hw/ip/cheriot-ibex/rtl/cheriot_pmp.sv new file mode 100644 index 0000000..6363e70 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_pmp.sv
@@ -0,0 +1,184 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +module cheriot_pmp #( + // Granularity of NAPOT access, + // 0 = No restriction, 1 = 8 byte, 2 = 16 byte, 3 = 32 byte, etc. + parameter int unsigned PMPGranularity = 0, + // Number of access channels (e.g. i-side + d-side) + parameter int unsigned PMPNumChan = 2, + // Number of implemented regions + parameter int unsigned PMPNumRegions = 4 +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + // Interface to CSRs + input cheriot_pkg::pmp_cfg_t csr_pmp_cfg_i [PMPNumRegions], + input logic [33:0] csr_pmp_addr_i [PMPNumRegions], + input cheriot_pkg::pmp_mseccfg_t csr_pmp_mseccfg_i, + + input cheriot_pkg::priv_lvl_e priv_mode_i [PMPNumChan], + // Access checking channels + input logic [33:0] pmp_req_addr_i [PMPNumChan], + input cheriot_pkg::pmp_req_e pmp_req_type_i [PMPNumChan], + output logic pmp_req_err_o [PMPNumChan] + +); + + import cheriot_pkg::*; + + // Access Checking Signals + logic [33:0] region_start_addr [PMPNumRegions]; + logic [33:PMPGranularity+2] region_addr_mask [PMPNumRegions]; + logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_match_gt; + logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_match_lt; + logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_match_eq; + logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_match_all; + logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_basic_perm_check; + logic [PMPNumChan-1:0][PMPNumRegions-1:0] region_mml_perm_check; + logic [PMPNumChan-1:0] access_fault; + + + // --------------- + // Access checking + // --------------- + + for (genvar r = 0; r < PMPNumRegions; r++) begin : g_addr_exp + // Start address for TOR matching + if (r == 0) begin : g_entry0 + assign region_start_addr[r] = (csr_pmp_cfg_i[r].mode == PMP_MODE_TOR) ? 34'h000000000 : + csr_pmp_addr_i[r]; + end else begin : g_oth + assign region_start_addr[r] = (csr_pmp_cfg_i[r].mode == PMP_MODE_TOR) ? csr_pmp_addr_i[r-1] : + csr_pmp_addr_i[r]; + end + // Address mask for NA matching + for (genvar b = PMPGranularity + 2; b < 34; b++) begin : g_bitmask + if (b == 2) begin : g_bit0 + // Always mask bit 2 for NAPOT + assign region_addr_mask[r][b] = (csr_pmp_cfg_i[r].mode != PMP_MODE_NAPOT); + end else begin : g_others + // We will mask this bit if it is within the programmed granule + // i.e. addr = yyyy 0111 + // ^ + // | This bit pos is the top of the mask, all lower bits set + // thus mask = 1111 0000 + if (PMPGranularity == 0) begin : g_region_addr_mask_zero_granularity + assign region_addr_mask[r][b] = (csr_pmp_cfg_i[r].mode != PMP_MODE_NAPOT) | + ~&csr_pmp_addr_i[r][b-1:2]; + end else begin : g_region_addr_mask_other_granularity + assign region_addr_mask[r][b] = (csr_pmp_cfg_i[r].mode != PMP_MODE_NAPOT) | + ~&csr_pmp_addr_i[r][b-1:PMPGranularity+1]; + end + end + end + end + + for (genvar c = 0; c < PMPNumChan; c++) begin : g_access_check + for (genvar r = 0; r < PMPNumRegions; r++) begin : g_regions + // Comparators are sized according to granularity + assign region_match_eq[c][r] = (pmp_req_addr_i[c][33:PMPGranularity+2] & + region_addr_mask[r]) == + (region_start_addr[r][33:PMPGranularity+2] & + region_addr_mask[r]); + assign region_match_gt[c][r] = pmp_req_addr_i[c][33:PMPGranularity+2] > + region_start_addr[r][33:PMPGranularity+2]; + assign region_match_lt[c][r] = pmp_req_addr_i[c][33:PMPGranularity+2] < + csr_pmp_addr_i[r][33:PMPGranularity+2]; + + always_comb begin + region_match_all[c][r] = 1'b0; + unique case (csr_pmp_cfg_i[r].mode) + PMP_MODE_OFF: region_match_all[c][r] = 1'b0; + PMP_MODE_NA4: region_match_all[c][r] = region_match_eq[c][r]; + PMP_MODE_NAPOT: region_match_all[c][r] = region_match_eq[c][r]; + PMP_MODE_TOR: begin + region_match_all[c][r] = (region_match_eq[c][r] | region_match_gt[c][r]) & + region_match_lt[c][r]; + end + default: region_match_all[c][r] = 1'b0; + endcase + end + + // Check specific required permissions + assign region_basic_perm_check[c][r] = + ((pmp_req_type_i[c] == PMP_ACC_EXEC) & csr_pmp_cfg_i[r].exec) | + ((pmp_req_type_i[c] == PMP_ACC_WRITE) & csr_pmp_cfg_i[r].write) | + ((pmp_req_type_i[c] == PMP_ACC_READ) & csr_pmp_cfg_i[r].read); + + + // Compute permission checks that apply when MSECCFG.MML is set. + always_comb begin + region_mml_perm_check[c][r] = 1'b0; + + if (!csr_pmp_cfg_i[r].read && csr_pmp_cfg_i[r].write) begin + // Special-case shared regions where R = 0, W = 1 + unique case ({csr_pmp_cfg_i[r].lock, csr_pmp_cfg_i[r].exec}) + // Read/write in M, read only in S/U + 2'b00: region_mml_perm_check[c][r] = + (pmp_req_type_i[c] == PMP_ACC_READ) | + ((pmp_req_type_i[c] == PMP_ACC_WRITE) & (priv_mode_i[c] == PRIV_LVL_M)); + // Read/write in M/S/U + 2'b01: region_mml_perm_check[c][r] = + (pmp_req_type_i[c] == PMP_ACC_READ) | (pmp_req_type_i[c] == PMP_ACC_WRITE); + // Execute only on M/S/U + 2'b10: region_mml_perm_check[c][r] = (pmp_req_type_i[c] == PMP_ACC_EXEC); + // Read/execute in M, execute only on S/U + 2'b11: region_mml_perm_check[c][r] = + (pmp_req_type_i[c] == PMP_ACC_EXEC) | + ((pmp_req_type_i[c] == PMP_ACC_READ) & (priv_mode_i[c] == PRIV_LVL_M)); + default: ; + endcase + end else begin + if (csr_pmp_cfg_i[r].read & csr_pmp_cfg_i[r].write & csr_pmp_cfg_i[r].exec + & csr_pmp_cfg_i[r].lock) begin + // Special-case shared read only region when R = 1, W = 1, X = 1, L = 1 + region_mml_perm_check[c][r] = pmp_req_type_i[c] == PMP_ACC_READ; + end else begin + // Otherwise use basic permission check. Permission is always denied if in S/U mode and + // L is set or if in M mode and L is unset. + region_mml_perm_check[c][r] = + priv_mode_i[c] == PRIV_LVL_M ? csr_pmp_cfg_i[r].lock & region_basic_perm_check[c][r] : + ~csr_pmp_cfg_i[r].lock & region_basic_perm_check[c][r]; + end + end + end + end + + // Access fault determination / prioritization + always_comb begin + // When MSECCFG.MMWP is set default deny always, otherwise allow for M-mode, deny for other + // modes + access_fault[c] = csr_pmp_mseccfg_i.mmwp | (priv_mode_i[c] != PRIV_LVL_M); + + // PMP entries are statically prioritized, from 0 to N-1 + // The lowest-numbered PMP entry which matches an address determines accessability + for (int r = PMPNumRegions - 1; r >= 0; r--) begin + if (region_match_all[c][r]) begin + if (csr_pmp_mseccfg_i.mml) begin + // When MSECCFG.MML is set use MML specific permission check + access_fault[c] = ~region_mml_perm_check[c][r]; + end else begin + // Otherwise use original PMP behaviour + access_fault[c] = (priv_mode_i[c] == PRIV_LVL_M) ? + // For M-mode, any region which matches with the L-bit clear, or with sufficient + // access permissions will be allowed + (csr_pmp_cfg_i[r].lock & ~region_basic_perm_check[c][r]) : + // For other modes, the lock bit doesn't matter + ~region_basic_perm_check[c][r]; + end + end + end + end + + assign pmp_req_err_o[c] = access_fault[c]; + end + + // RLB, rule locking bypass, is only relevant to cheriot_cs_registers which controls writes to the + // PMP CSRs. Tie to unused signal here to prevent lint warnings. + logic unused_csr_pmp_mseccfg_rlb; + assign unused_csr_pmp_mseccfg_rlb = csr_pmp_mseccfg_i.rlb; +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_pmp_reset_default.svh b/hw/ip/cheriot-ibex/rtl/cheriot_pmp_reset_default.svh new file mode 100644 index 0000000..cda701b --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_pmp_reset_default.svh
@@ -0,0 +1,53 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Default reset values for PMP CSRs. Where the number of regions +// (PMPNumRegions) is less than 16 the reset values for the higher numbered +// regions are ignored. +// +// See the Ibex Reference Guide (Custom Reset Values under Physical Memory +// Protection) for more information. + +localparam pmp_cfg_t pmp_cfg_rst[16] = '{ + '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 0 + '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 1 + '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 2 + '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 3 + '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 4 + '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 5 + '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 6 + '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 7 + '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 8 + '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 9 + '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 10 + '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 11 + '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 12 + '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 13 + '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0}, // region 14 + '{lock: 1'b0, mode: PMP_MODE_OFF, exec: 1'b0, write: 1'b0, read: 1'b0} // region 15 +}; + +// Addresses are given in byte granularity for readibility. A minimum of two +// bits will be stripped off the bottom (PMPGranularity == 0) with more stripped +// off at coarser granularities. +localparam [33:0] pmp_addr_rst[16] = '{ + 34'h0, // region 0 + 34'h0, // region 1 + 34'h0, // region 2 + 34'h0, // region 3 + 34'h0, // region 4 + 34'h0, // region 5 + 34'h0, // region 6 + 34'h0, // region 7 + 34'h0, // region 8 + 34'h0, // region 9 + 34'h0, // region 10 + 34'h0, // region 11 + 34'h0, // region 12 + 34'h0, // region 13 + 34'h0, // region 14 + 34'h0 // region 15 +}; + +localparam pmp_mseccfg_t pmp_mseccfg_rst = '{rlb : 1'b0, mmwp: 1'b0, mml: 1'b0};
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_prefetch_buffer.sv b/hw/ip/cheriot-ibex/rtl/cheriot_prefetch_buffer.sv new file mode 100644 index 0000000..00de519 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_prefetch_buffer.sv
@@ -0,0 +1,281 @@ +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Prefetcher Buffer for 32 bit memory interface + * + * Prefetch Buffer that caches instructions. This cuts overly long critical + * paths to the instruction cache. + */ +module cheriot_prefetch_buffer #( + parameter bit ResetAll = 1'b0 +) ( + input logic clk_i, + input logic rst_ni, + + input logic req_i, + + input logic branch_i, + input logic branch_mispredict_i, + input logic [31:0] mispredict_addr_i, + input logic [31:0] addr_i, + + + input logic ready_i, + output logic valid_o, + output logic [31:0] rdata_o, + output logic [31:0] addr_o, + output logic err_o, + output logic err_plus2_o, + + input logic cheri_force_uc_i, + + // goes to instruction memory / instruction cache + output logic instr_req_o, + input logic instr_gnt_i, + output logic [31:0] instr_addr_o, + input logic [31:0] instr_rdata_i, + input logic instr_err_i, + input logic instr_rvalid_i, + + // Prefetch Buffer Status + output logic busy_o +); + + localparam int unsigned NUM_REQS = 2; + + logic valid_new_req, valid_req; + logic valid_req_d, valid_req_q; + logic discard_req_d, discard_req_q; + logic [NUM_REQS-1:0] rdata_outstanding_n, rdata_outstanding_s, rdata_outstanding_q; + logic [NUM_REQS-1:0] branch_discard_n, branch_discard_s, branch_discard_q; + logic [NUM_REQS-1:0] rdata_outstanding_rev; + + logic [31:0] stored_addr_d, stored_addr_q; + logic stored_addr_en; + logic [31:0] fetch_addr_d, fetch_addr_q; + logic fetch_addr_en; + logic [31:0] instr_addr, instr_addr_w_aligned; + + logic fifo_valid; + logic [31:0] fifo_addr; + logic fifo_ready; + logic fifo_clear; + logic [NUM_REQS-1:0] fifo_busy; + + logic valid_raw; + + logic branch_or_mispredict; + + //////////////////////////// + // Prefetch buffer status // + //////////////////////////// + + assign busy_o = (|rdata_outstanding_q) | instr_req_o; + + assign branch_or_mispredict = branch_i | branch_mispredict_i; + + ////////////////////////////////////////////// + // Fetch fifo - consumes addresses and data // + ////////////////////////////////////////////// + + // A branch will invalidate any previously fetched instructions. + // Note that the FENCE.I instruction relies on this flushing behaviour on branch. If it is + // altered the FENCE.I implementation may require changes. + assign fifo_clear = branch_or_mispredict; + + // Reversed version of rdata_outstanding_q which can be overlaid with fifo fill state + for (genvar i = 0; i < NUM_REQS; i++) begin : gen_rd_rev + assign rdata_outstanding_rev[i] = rdata_outstanding_q[NUM_REQS-1-i]; + end + + // The fifo is ready to accept a new request if it is not full - including space reserved for + // requests already outstanding. + // Overlay the fifo fill state with the outstanding requests to see if there is space. + assign fifo_ready = ~&(fifo_busy | rdata_outstanding_rev); + + cheriot_fetch_fifo #( + .NUM_REQS (NUM_REQS), + .ResetAll (ResetAll) + ) fifo_i ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + + .clear_i ( fifo_clear ), + .busy_o ( fifo_busy ), + + .in_valid_i ( fifo_valid ), + .in_addr_i ( fifo_addr ), + .in_rdata_i ( instr_rdata_i ), + .in_err_i ( instr_err_i ), + .cheri_force_uc_i ( cheri_force_uc_i ), + + .out_valid_o ( valid_raw ), + .out_ready_i ( ready_i ), + .out_rdata_o ( rdata_o ), + .out_addr_o ( addr_o ), + .out_err_o ( err_o ), + .out_err_plus2_o ( err_plus2_o ) + ); + + ////////////// + // Requests // + ////////////// + + // Make a new request any time there is space in the FIFO, and space in the request queue + assign valid_new_req = req_i & (fifo_ready | branch_or_mispredict) & + ~rdata_outstanding_q[NUM_REQS-1]; + + assign valid_req = valid_req_q | valid_new_req; + + // Hold the request stable for requests that didn't get granted + assign valid_req_d = valid_req & ~instr_gnt_i; + + // Record whether an outstanding bus request is cancelled by a branch + assign discard_req_d = valid_req_q & (branch_or_mispredict | discard_req_q); + + //////////////// + // Fetch addr // + //////////////// + + // Two addresses are tracked in the prefetch buffer: + // 1. stored_addr_q - This is the address issued on the bus. It stays stable until + // the request is granted. + // 2. fetch_addr_q - This is our next address to fetch from. It is updated on branches to + // capture the new address, and then for each new request issued. + // A third address is tracked in the fetch FIFO itself: + // 3. instr_addr_q - This is the address at the head of the FIFO, efectively our oldest fetched + // address. This address is updated on branches, and does its own increment + // each time the FIFO is popped. + + // 1. stored_addr_q + + // Only update stored_addr_q for new ungranted requests + assign stored_addr_en = valid_new_req & ~valid_req_q & ~instr_gnt_i; + + // Store whatever address was issued on the bus + assign stored_addr_d = instr_addr; + + // CPU resets with a branch, so no need to reset these addresses + if (ResetAll) begin : g_stored_addr_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + stored_addr_q <= '0; + end else if (stored_addr_en) begin + stored_addr_q <= stored_addr_d; + end + end + end else begin : g_stored_addr_nr + always_ff @(posedge clk_i) begin + if (stored_addr_en) begin + stored_addr_q <= stored_addr_d; + end + end + end + // 2. fetch_addr_q + + // Update on a branch or as soon as a request is issued + assign fetch_addr_en = branch_or_mispredict | (valid_new_req & ~valid_req_q); + + assign fetch_addr_d = (branch_i ? addr_i : + branch_mispredict_i ? {mispredict_addr_i[31:2], 2'b00} : + {fetch_addr_q[31:2], 2'b00}) + + // Current address + 4 + {{29{1'b0}},(valid_new_req & ~valid_req_q),2'b00}; + + if (ResetAll) begin : g_fetch_addr_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + fetch_addr_q <= '0; + end else if (fetch_addr_en) begin + fetch_addr_q <= fetch_addr_d; + end + end + end else begin : g_fetch_addr_nr + always_ff @(posedge clk_i) begin + if (fetch_addr_en) begin + fetch_addr_q <= fetch_addr_d; + end + end + end + + // Address mux + assign instr_addr = valid_req_q ? stored_addr_q : + branch_i ? addr_i : + branch_mispredict_i ? mispredict_addr_i : + fetch_addr_q; + + assign instr_addr_w_aligned = {instr_addr[31:2], 2'b00}; + + /////////////////////////////// + // Request outstanding queue // + /////////////////////////////// + + for (genvar i = 0; i < NUM_REQS; i++) begin : g_outstanding_reqs + // Request 0 (always the oldest outstanding request) + if (i == 0) begin : g_req0 + // A request becomes outstanding once granted, and is cleared once the rvalid is received. + // Outstanding requests shift down the queue towards entry 0. + assign rdata_outstanding_n[i] = (valid_req & instr_gnt_i) | + rdata_outstanding_q[i]; + // If a branch is received at any point while a request is outstanding, it must be tracked + // to ensure we discard the data once received + assign branch_discard_n[i] = (valid_req & instr_gnt_i & discard_req_d) | + (branch_or_mispredict & rdata_outstanding_q[i]) | + branch_discard_q[i]; + + end else begin : g_reqtop + // Entries > 0 consider the FIFO fill state to calculate their next state (by checking + // whether the previous entry is valid) + + assign rdata_outstanding_n[i] = (valid_req & instr_gnt_i & + rdata_outstanding_q[i-1]) | + rdata_outstanding_q[i]; + assign branch_discard_n[i] = (valid_req & instr_gnt_i & discard_req_d & + rdata_outstanding_q[i-1]) | + (branch_or_mispredict & rdata_outstanding_q[i]) | + branch_discard_q[i]; + end + end + + // Shift the entries down on each instr_rvalid_i + assign rdata_outstanding_s = instr_rvalid_i ? {1'b0,rdata_outstanding_n[NUM_REQS-1:1]} : + rdata_outstanding_n; + assign branch_discard_s = instr_rvalid_i ? {1'b0,branch_discard_n[NUM_REQS-1:1]} : + branch_discard_n; + + // Push a new entry to the FIFO once complete (and not cancelled by a branch) + assign fifo_valid = instr_rvalid_i & ~branch_discard_q[0]; + + assign fifo_addr = branch_i ? addr_i : mispredict_addr_i; + + /////////////// + // Registers // + /////////////// + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + valid_req_q <= 1'b0; + discard_req_q <= 1'b0; + rdata_outstanding_q <= 'b0; + branch_discard_q <= 'b0; + end else begin + valid_req_q <= valid_req_d; + discard_req_q <= discard_req_d; + rdata_outstanding_q <= rdata_outstanding_s; + branch_discard_q <= branch_discard_s; + end + end + + ///////////// + // Outputs // + ///////////// + + assign instr_req_o = valid_req; + assign instr_addr_o = instr_addr_w_aligned; + + assign valid_o = valid_raw & ~branch_mispredict_i; + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_register_file_ff.sv b/hw/ip/cheriot-ibex/rtl/cheriot_register_file_ff.sv new file mode 100644 index 0000000..1da818e --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_register_file_ff.sv
@@ -0,0 +1,103 @@ +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * RISC-V register file + * + * Register file with 31 or 15x 32 bit wide registers. Register 0 is fixed to 0. + * This register file is based on flip flops. Use this register file when + * targeting FPGA synthesis or Verilator simulation. + */ +module cheriot_register_file_ff #( + parameter bit RV32E = 0, + parameter int unsigned DataWidth = 32, + parameter bit DummyInstructions = 0, + parameter logic [DataWidth-1:0] WordZeroVal = '0 +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + input logic test_en_i, + input logic dummy_instr_id_i, + + //Read port R1 + input logic [4:0] raddr_a_i, + output logic [DataWidth-1:0] rdata_a_o, + + //Read port R2 + input logic [4:0] raddr_b_i, + output logic [DataWidth-1:0] rdata_b_o, + + + // Write port W1 + input logic [4:0] waddr_a_i, + input logic [DataWidth-1:0] wdata_a_i, + input logic we_a_i + +); + + localparam int unsigned ADDR_WIDTH = RV32E ? 4 : 5; + localparam int unsigned NUM_WORDS = 2**ADDR_WIDTH; + + logic [NUM_WORDS-1:0][DataWidth-1:0] rf_reg; + logic [NUM_WORDS-1:1][DataWidth-1:0] rf_reg_q; + logic [NUM_WORDS-1:1] we_a_dec; + + always_comb begin : we_a_decoder + for (int unsigned i = 1; i < NUM_WORDS; i++) begin + we_a_dec[i] = (waddr_a_i == 5'(i)) ? we_a_i : 1'b0; + end + end + + // No flops for R0 as it's hard-wired to 0 + for (genvar i = 1; i < NUM_WORDS; i++) begin : g_rf_flops + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rf_reg_q[i] <= WordZeroVal; + end else if (we_a_dec[i]) begin + rf_reg_q[i] <= wdata_a_i; + end + end + end + + // With dummy instructions enabled, R0 behaves as a real register but will always return 0 for + // real instructions. + if (DummyInstructions) begin : g_dummy_r0 + logic we_r0_dummy; + logic [DataWidth-1:0] rf_r0_q; + + // Write enable for dummy R0 register (waddr_a_i will always be 0 for dummy instructions) + assign we_r0_dummy = we_a_i & dummy_instr_id_i; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rf_r0_q <= WordZeroVal; + end else if (we_r0_dummy) begin + rf_r0_q <= wdata_a_i; + end + end + + // Output the dummy data for dummy instructions, otherwise R0 reads as zero + assign rf_reg[0] = dummy_instr_id_i ? rf_r0_q : WordZeroVal; + + end else begin : g_normal_r0 + logic unused_dummy_instr_id; + assign unused_dummy_instr_id = dummy_instr_id_i; + + // R0 is nil + assign rf_reg[0] = WordZeroVal; + end + + assign rf_reg[NUM_WORDS-1:1] = rf_reg_q[NUM_WORDS-1:1]; + + assign rdata_a_o = rf_reg[raddr_a_i]; + assign rdata_b_o = rf_reg[raddr_b_i]; + + // Signal not used in FF register file + logic unused_test_en; + assign unused_test_en = test_en_i; + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_register_file_fpga.sv b/hw/ip/cheriot-ibex/rtl/cheriot_register_file_fpga.sv new file mode 100644 index 0000000..2c00bc6 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_register_file_fpga.sv
@@ -0,0 +1,83 @@ +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * RISC-V register file + * + * Register file with 31 or 15x 32 bit wide registers. Register 0 is fixed to 0. + * + * This register file is designed to make FPGA synthesis tools infer RAM primitives. For Xilinx + * FPGA architectures, it will produce RAM32M primitives. Other vendors have not yet been tested. + */ +module cheriot_register_file_fpga #( + parameter bit RV32E = 0, + parameter int unsigned DataWidth = 32, + parameter bit DummyInstructions = 0, + parameter logic [DataWidth-1:0] WordZeroVal = '0 +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + input logic test_en_i, + input logic dummy_instr_id_i, + + //Read port R1 + input logic [ 4:0] raddr_a_i, + output logic [DataWidth-1:0] rdata_a_o, + //Read port R2 + input logic [ 4:0] raddr_b_i, + output logic [DataWidth-1:0] rdata_b_o, + // Write port W1 + input logic [ 4:0] waddr_a_i, + input logic [DataWidth-1:0] wdata_a_i, + input logic we_a_i +); + + localparam int ADDR_WIDTH = RV32E ? 4 : 5; + localparam int NUM_WORDS = 2 ** ADDR_WIDTH; + + logic [DataWidth-1:0] mem[NUM_WORDS]; + logic we; // write enable if writing to any register other than R0 + + // async_read a + assign rdata_a_o = (raddr_a_i == '0) ? '0 : mem[raddr_a_i]; + + // async_read b + assign rdata_b_o = (raddr_b_i == '0) ? '0 : mem[raddr_b_i]; + + // we select + assign we = (waddr_a_i == '0) ? 1'b0 : we_a_i; + + // Note that the SystemVerilog LRM requires variables on the LHS of assignments within + // "always_ff" to not be written to by any other process. However, to enable the initialization + // of the inferred RAM32M primitives with non-zero values, below "initial" procedure is needed. + // Therefore, we use "always" instead of the generally preferred "always_ff" for the synchronous + // write procedure. + always @(posedge clk_i) begin : sync_write + if (we == 1'b1) begin + mem[waddr_a_i] <= wdata_a_i; + end + end : sync_write + + // Make sure we initialize the BRAM with the correct register reset value. + initial begin + for (int k = 0; k < NUM_WORDS; k++) begin + mem[k] = WordZeroVal; + end + end + + // Reset not used in this register file version + logic unused_rst_ni; + assign unused_rst_ni = rst_ni; + + // Dummy instruction changes not relevant for FPGA implementation + logic unused_dummy_instr; + assign unused_dummy_instr = dummy_instr_id_i; + // Test enable signal not used in FPGA implementation + logic unused_test_en; + assign unused_test_en = test_en_i; + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_register_file_latch.sv b/hw/ip/cheriot-ibex/rtl/cheriot_register_file_latch.sv new file mode 100644 index 0000000..d953b79 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_register_file_latch.sv
@@ -0,0 +1,163 @@ +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * RISC-V register file + * + * Register file with 31 or 15x 32 bit wide registers. Register 0 is fixed to 0. + * This register file is based on latches and is thus smaller than the flip-flop + * based RF. It requires a target technology-specific clock gating cell. Use this + * register file when targeting ASIC synthesis or event-based simulators. + */ +module cheriot_register_file_latch #( + parameter bit RV32E = 0, + parameter int unsigned DataWidth = 32, + parameter bit DummyInstructions = 0, + parameter logic [DataWidth-1:0] WordZeroVal = '0 +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + input logic test_en_i, + input logic dummy_instr_id_i, + + //Read port R1 + input logic [4:0] raddr_a_i, + output logic [DataWidth-1:0] rdata_a_o, + + //Read port R2 + input logic [4:0] raddr_b_i, + output logic [DataWidth-1:0] rdata_b_o, + + // Write port W1 + input logic [4:0] waddr_a_i, + input logic [DataWidth-1:0] wdata_a_i, + input logic we_a_i + +); + + localparam int unsigned ADDR_WIDTH = RV32E ? 4 : 5; + localparam int unsigned NUM_WORDS = 2**ADDR_WIDTH; + + logic [DataWidth-1:0] mem[NUM_WORDS]; + + logic [NUM_WORDS-1:1] waddr_onehot_a; + + logic [NUM_WORDS-1:1] mem_clocks; + logic [DataWidth-1:0] wdata_a_q; + + // internal addresses + logic [ADDR_WIDTH-1:0] raddr_a_int, raddr_b_int, waddr_a_int; + + assign raddr_a_int = raddr_a_i[ADDR_WIDTH-1:0]; + assign raddr_b_int = raddr_b_i[ADDR_WIDTH-1:0]; + assign waddr_a_int = waddr_a_i[ADDR_WIDTH-1:0]; + + logic clk_int; + + ////////// + // READ // + ////////// + assign rdata_a_o = mem[raddr_a_int]; + assign rdata_b_o = mem[raddr_b_int]; + + /////////// + // WRITE // + /////////// + // Global clock gating + prim_clock_gating cg_we_global ( + .clk_i ( clk_i ), + .en_i ( we_a_i ), + .test_en_i ( test_en_i ), + .clk_o ( clk_int ) + ); + + // Sample input data + // Use clk_int here, since otherwise we don't want to write anything anyway. + always_ff @(posedge clk_int or negedge rst_ni) begin : sample_wdata + if (!rst_ni) begin + wdata_a_q <= WordZeroVal; + end else begin + if (we_a_i) begin + wdata_a_q <= wdata_a_i; + end + end + end + + // Write address decoding + always_comb begin : wad + for (int i = 1; i < NUM_WORDS; i++) begin : wad_word_iter + if (we_a_i && (waddr_a_int == 5'(i))) begin + waddr_onehot_a[i] = 1'b1; + end else begin + waddr_onehot_a[i] = 1'b0; + end + end + end + + // Individual clock gating (if integrated clock-gating cells are available) + for (genvar x = 1; x < NUM_WORDS; x++) begin : gen_cg_word_iter + prim_clock_gating cg_i ( + .clk_i ( clk_int ), + .en_i ( waddr_onehot_a[x] ), + .test_en_i ( test_en_i ), + .clk_o ( mem_clocks[x] ) + ); + end + + // Actual write operation: + // Generate the sequential process for the NUM_WORDS words of the memory. + // The process is synchronized with the clocks mem_clocks[i], i = 1, ..., NUM_WORDS-1. + for (genvar i = 1; i < NUM_WORDS; i++) begin : g_rf_latches + always_latch begin + if (mem_clocks[i]) begin + mem[i] = wdata_a_q; + end + end + end + + // With dummy instructions enabled, R0 behaves as a real register but will always return 0 for + // real instructions. + if (DummyInstructions) begin : g_dummy_r0 + logic we_r0_dummy; + logic r0_clock; + logic [DataWidth-1:0] mem_r0; + + // Write enable for dummy R0 register (waddr_a_i will always be 0 for dummy instructions) + assign we_r0_dummy = we_a_i & dummy_instr_id_i; + + // R0 clock gate + prim_clock_gating cg_i ( + .clk_i ( clk_int ), + .en_i ( we_r0_dummy ), + .test_en_i ( test_en_i ), + .clk_o ( r0_clock ) + ); + + always_latch begin : latch_wdata + if (r0_clock) begin + mem_r0 = wdata_a_q; + end + end + + // Output the dummy data for dummy instructions, otherwise R0 reads as zero + assign mem[0] = dummy_instr_id_i ? mem_r0 : WordZeroVal; + + end else begin : g_normal_r0 + logic unused_dummy_instr_id; + assign unused_dummy_instr_id = dummy_instr_id_i; + + assign mem[0] = WordZeroVal; + end + +`ifdef VERILATOR + initial begin + $display("Latch-based register file not supported for Verilator simulation"); + $fatal; + end +`endif + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_top.sv b/hw/ip/cheriot-ibex/rtl/cheriot_top.sv new file mode 100644 index 0000000..7dd2663 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_top.sv
@@ -0,0 +1,1191 @@ +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +`ifdef RISCV_FORMAL + `define RVFI +`endif + +`include "prim_assert.sv" + +/** + * Top level module of the ibex RISC-V core + */ +module cheriot_top import cheriot_pkg::*; import cheri_pkg::*; #( + parameter bit PMPEnable = 1'b0, + parameter int unsigned PMPGranularity = 0, + parameter int unsigned PMPNumRegions = 4, + parameter int unsigned MHPMCounterNum = 0, + parameter int unsigned MHPMCounterWidth = 40, + parameter bit RV32E = 1'b0, + parameter rv32m_e RV32M = RV32MFast, + parameter rv32b_e RV32B = RV32BNone, + parameter regfile_e RegFile = RegFileFF, + parameter bit BranchTargetALU = 1'b0, + parameter bit WritebackStage = 1'b0, + parameter bit ICache = 1'b0, + parameter bit ICacheECC = 1'b0, + parameter bit BranchPredictor = 1'b0, + parameter bit DbgTriggerEn = 1'b0, + parameter int unsigned DbgHwBreakNum = 1, + parameter bit SecureIbex = 1'b0, + parameter bit ICacheScramble = 1'b0, + parameter lfsr_seed_t RndCnstLfsrSeed = RndCnstLfsrSeedDefault, + parameter lfsr_perm_t RndCnstLfsrPerm = RndCnstLfsrPermDefault, + parameter int unsigned DmHaltAddr = 32'h1A110800, + parameter int unsigned DmExceptionAddr = 32'h1A110808, + // Default seed and nonce for scrambling + parameter logic [SCRAMBLE_KEY_W-1:0] RndCnstIbexKey = RndCnstIbexKeyDefault, + parameter logic [SCRAMBLE_NONCE_W-1:0] RndCnstIbexNonce = RndCnstIbexNonceDefault, + // CHERIoT paramters + parameter bit CHERIoTEn = 1'b1, + parameter int unsigned DataWidth = 33, + parameter int unsigned HeapBase = 32'h2001_0000, + parameter int unsigned TSMapBase = 32'h2002_f000, // 4kB default + parameter int unsigned TSMapSize = 1024, // 32-bit words + parameter bit MemCapFmt = 1'b0, + parameter bit CheriPPLBC = 1'b1, + parameter bit CheriSBND2 = 1'b0, + parameter bit CheriTBRE = 1'b1, + parameter bit CheriStkZ = 1'b1, + parameter int unsigned MMRegDinW = 128, + parameter int unsigned MMRegDoutW = 64 +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + input logic test_en_i, // enable all clock gates for testing + input prim_ram_1p_pkg::ram_1p_cfg_t ram_cfg_i, + + input logic cheri_pmode_i, + input logic cheri_tsafe_en_i, + + input logic [31:0] hart_id_i, + input logic [31:0] boot_addr_i, + + // Instruction memory interface + output logic instr_req_o, + input logic instr_gnt_i, + input logic instr_rvalid_i, + output logic [31:0] instr_addr_o, + input logic [31:0] instr_rdata_i, + input logic [6:0] instr_rdata_intg_i, + input logic instr_err_i, + + // Data memory interface + output logic data_req_o, + output logic data_is_cap_o, + input logic data_gnt_i, + input logic data_rvalid_i, + output logic data_we_o, + output logic [3:0] data_be_o, + output logic [31:0] data_addr_o, + output logic [DataWidth-1:0] data_wdata_o, + output logic [6:0] data_wdata_intg_o, + input logic [DataWidth-1:0] data_rdata_i, + input logic [6:0] data_rdata_intg_i, + input logic data_err_i, + + // TS map memory interface + output logic tsmap_cs_o, + output logic [15:0] tsmap_addr_o, + input logic [31:0] tsmap_rdata_i, + input logic [6:0] tsmap_rdata_intg_i, + input logic [MMRegDinW-1:0] mmreg_corein_i, + output logic [MMRegDoutW-1:0] mmreg_coreout_o, + + // Interrupt inputs + input logic irq_software_i, + input logic irq_timer_i, + input logic irq_external_i, + input logic [14:0] irq_fast_i, + input logic irq_nm_i, // non-maskeable interrupt + + // Scrambling Interface + input logic scramble_key_valid_i, + input logic [SCRAMBLE_KEY_W-1:0] scramble_key_i, + input logic [SCRAMBLE_NONCE_W-1:0] scramble_nonce_i, + output logic scramble_req_o, + + // Debug Interface + input logic debug_req_i, + output crash_dump_t crash_dump_o, + output logic double_fault_seen_o, + + // RISC-V Formal Interface + // Does not comply with the coding standards of _i/_o suffixes, but follows + // the convention of RISC-V Formal Interface Specification. +`ifdef RVFI + output logic rvfi_valid, + output logic [63:0] rvfi_order, + output logic [31:0] rvfi_insn, + output logic rvfi_trap, + output logic rvfi_halt, + output logic rvfi_intr, + output logic [ 1:0] rvfi_mode, + output logic [ 1:0] rvfi_ixl, + output logic [ 4:0] rvfi_rs1_addr, + output logic [ 4:0] rvfi_rs2_addr, + output logic [ 4:0] rvfi_rs3_addr, + output logic [31:0] rvfi_rs1_rdata, + output logic [31:0] rvfi_rs2_rdata, + output logic [31:0] rvfi_rs3_rdata, + output reg_cap_t rvfi_rs1_rcap, + output reg_cap_t rvfi_rs2_rcap, + output reg_cap_t rvfi_rd_wcap, + output logic [ 4:0] rvfi_rd_addr, + output logic [31:0] rvfi_rd_wdata, + output logic [31:0] rvfi_pc_rdata, + output logic [31:0] rvfi_pc_wdata, + output logic [31:0] rvfi_mem_addr, + output logic [ 3:0] rvfi_mem_rmask, + output logic [ 3:0] rvfi_mem_wmask, + output logic [DataWidth-1:0] rvfi_mem_rdata, + output logic [DataWidth-1:0] rvfi_mem_wdata, + output logic rvfi_mem_is_cap, + output reg_cap_t rvfi_mem_rcap, + output reg_cap_t rvfi_mem_wcap, + + output logic [31:0] rvfi_ext_mip, + output logic rvfi_ext_nmi, + output logic rvfi_ext_debug_req, + output logic [63:0] rvfi_ext_mcycle, +`endif + + // CPU Control Signals + input fetch_enable_t fetch_enable_i, + output logic alert_minor_o, + output logic alert_major_internal_o, + output logic alert_major_bus_o, + output logic core_sleep_o, + + // DFT bypass controls + input logic scan_rst_ni +); + + localparam bit Lockstep = SecureIbex; + localparam bit ResetAll = Lockstep; + localparam bit DummyInstructions = SecureIbex; + localparam bit RegFileECC = SecureIbex; + localparam int unsigned RegFileDataWidth = RegFileECC ? 32 + 7 : 32; + // Icache parameters + localparam int unsigned BusSizeECC = ICacheECC ? (BUS_SIZE + 7) : BUS_SIZE; + localparam int unsigned LineSizeECC = BusSizeECC * IC_LINE_BEATS; + localparam int unsigned TagSizeECC = ICacheECC ? (IC_TAG_SIZE + 6) : IC_TAG_SIZE; + // Scrambling Parameter + localparam int unsigned NumAddrScrRounds = ICacheScramble ? 2 : 0; + localparam int unsigned NumDiffRounds = NumAddrScrRounds; + + // Clock signals + logic clk; + logic core_busy_d, core_busy_q; + logic clock_en; + logic irq_pending; + // Core <-> Register file signals + logic dummy_instr_id; + logic [4:0] rf_raddr_a; + logic [4:0] rf_raddr_b; + logic [4:0] rf_waddr_wb; + logic rf_we_wb; + logic [RegFileDataWidth-1:0] rf_wdata_wb_ecc; + logic [RegFileDataWidth-1:0] rf_rdata_a_ecc, rf_rdata_a_ecc_buf; + logic [RegFileDataWidth-1:0] rf_rdata_b_ecc, rf_rdata_b_ecc_buf; + reg_cap_t rf_rcap_a, rf_rcap_b; + reg_cap_t rf_wcap; + + // Core <-> RAMs signals + logic [IC_NUM_WAYS-1:0] ic_tag_req; + logic ic_tag_write; + logic [IC_INDEX_W-1:0] ic_tag_addr; + logic [TagSizeECC-1:0] ic_tag_wdata; + logic [TagSizeECC-1:0] ic_tag_rdata [IC_NUM_WAYS]; + logic [IC_NUM_WAYS-1:0] ic_data_req; + logic ic_data_write; + logic [IC_INDEX_W-1:0] ic_data_addr; + logic [LineSizeECC-1:0] ic_data_wdata; + logic [LineSizeECC-1:0] ic_data_rdata [IC_NUM_WAYS]; + // Alert signals + logic core_alert_major, core_alert_minor; + logic lockstep_alert_major_internal, lockstep_alert_major_bus; + logic lockstep_alert_minor; + // Scramble signals + logic icache_inval; + logic [SCRAMBLE_KEY_W-1:0] scramble_key_q; + logic [SCRAMBLE_NONCE_W-1:0] scramble_nonce_q; + logic scramble_key_valid_d, scramble_key_valid_q; + logic scramble_req_d, scramble_req_q; + + fetch_enable_t fetch_enable_buf; + + logic [31:0] rf_reg_rdy; + logic [4:0] rf_trvk_addr; + logic rf_trvk_en; + logic rf_trvk_clrtag; + logic [6:0] rf_trvk_par; + logic [4:0] rf_trsv_addr; + logic rf_trsv_en; + logic [6:0] rf_trsv_par; + logic rf_alert; + + ///////////////////// + // Main clock gate // + ///////////////////// + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + core_busy_q <= 1'b0; + end else begin + core_busy_q <= core_busy_d; + end + end + + assign clock_en = core_busy_q | debug_req_i | irq_pending | irq_nm_i; + assign core_sleep_o = ~clock_en; + + prim_clock_gating core_clock_gate_i ( + .clk_i (clk_i), + .en_i (clock_en), + .test_en_i(test_en_i), + .clk_o (clk) + ); + + //////////////////////// + // Core instantiation // + //////////////////////// + + // Buffer security critical signals to prevent synthesis optimisation removing them + prim_buf #(.Width($bits(fetch_enable_t))) u_fetch_enable_buf ( + .in_i (fetch_enable_i), + .out_o(fetch_enable_buf) + ); + + prim_buf #(.Width(RegFileDataWidth)) u_rf_rdata_a_ecc_buf ( + .in_i (rf_rdata_a_ecc), + .out_o(rf_rdata_a_ecc_buf) + ); + + prim_buf #(.Width(RegFileDataWidth)) u_rf_rdata_b_ecc_buf ( + .in_i (rf_rdata_b_ecc), + .out_o(rf_rdata_b_ecc_buf) + ); + + cheriot_core #( + .PMPEnable (PMPEnable), + .PMPGranularity (PMPGranularity), + .PMPNumRegions (PMPNumRegions), + .MHPMCounterNum (MHPMCounterNum), + .MHPMCounterWidth (MHPMCounterWidth), + .RV32E (RV32E), + .RV32M (RV32M), + .RV32B (RV32B), + .BranchTargetALU (BranchTargetALU), + .ICache (ICache), + .ICacheECC (ICacheECC), + .BusSizeECC (BusSizeECC), + .TagSizeECC (TagSizeECC), + .LineSizeECC (LineSizeECC), + .BranchPredictor (BranchPredictor), + .DbgTriggerEn (DbgTriggerEn), + .DbgHwBreakNum (DbgHwBreakNum), + .WritebackStage (WritebackStage), + .ResetAll (ResetAll), + .RndCnstLfsrSeed (RndCnstLfsrSeed), + .RndCnstLfsrPerm (RndCnstLfsrPerm), + .SecureIbex (SecureIbex), + .DummyInstructions(DummyInstructions), + .RegFileECC (RegFileECC), + .RegFileDataWidth (RegFileDataWidth), + .DmHaltAddr (DmHaltAddr), + .DmExceptionAddr (DmExceptionAddr), + .CHERIoTEn (CHERIoTEn), + .DataWidth (DataWidth), + .HeapBase (HeapBase ), + .TSMapBase (TSMapBase ), + .TSMapSize (TSMapSize), + .MemCapFmt (MemCapFmt ), + .CheriPPLBC (CheriPPLBC), + .CheriSBND2 (CheriSBND2), + .CheriTBRE (CheriTBRE), + .CheriStkZ (CheriStkZ) + ) u_cheriot_core ( + .clk_i(clk), + .rst_ni, + + .hart_id_i, + .boot_addr_i, + .cheri_pmode_i, + .cheri_tsafe_en_i, + + .instr_req_o, + .instr_gnt_i, + .instr_rvalid_i, + .instr_addr_o, + .instr_rdata_i, + .instr_err_i, + + .data_req_o, + .data_is_cap_o, + .data_gnt_i, + .data_rvalid_i, + .data_we_o, + .data_be_o, + .data_addr_o, + .data_wdata_o, + .data_rdata_i, + .data_err_i, + + .dummy_instr_id_o (dummy_instr_id), + .rf_raddr_a_o (rf_raddr_a), + .rf_raddr_b_o (rf_raddr_b), + .rf_waddr_wb_o (rf_waddr_wb), + .rf_we_wb_o (rf_we_wb), + .rf_wdata_wb_ecc_o(rf_wdata_wb_ecc), + .rf_rdata_a_ecc_i (rf_rdata_a_ecc_buf), + .rf_rdata_b_ecc_i (rf_rdata_b_ecc_buf), + .rf_wcap_wb_o (rf_wcap), + .rf_rcap_a_i (rf_rcap_a), + .rf_rcap_b_i (rf_rcap_b), + .rf_reg_rdy_i (rf_reg_rdy), + .rf_trsv_en_o (rf_trsv_en), + .rf_trsv_addr_o (rf_trsv_addr), + .rf_trsv_par_o (rf_trsv_par), + .rf_trvk_addr_o (rf_trvk_addr), + .rf_trvk_en_o (rf_trvk_en ), + .rf_trvk_clrtag_o (rf_trvk_clrtag), + .rf_trvk_par_o (rf_trvk_par), + .tsmap_cs_o, + .tsmap_addr_o, + .tsmap_rdata_i, + .mmreg_corein_i, + .mmreg_coreout_o, + + .ic_tag_req_o (ic_tag_req), + .ic_tag_write_o (ic_tag_write), + .ic_tag_addr_o (ic_tag_addr), + .ic_tag_wdata_o (ic_tag_wdata), + .ic_tag_rdata_i (ic_tag_rdata), + .ic_data_req_o (ic_data_req), + .ic_data_write_o (ic_data_write), + .ic_data_addr_o (ic_data_addr), + .ic_data_wdata_o (ic_data_wdata), + .ic_data_rdata_i (ic_data_rdata), + .ic_scr_key_valid_i(scramble_key_valid_q), + + .irq_software_i, + .irq_timer_i, + .irq_external_i, + .irq_fast_i, + .irq_nm_i, + .irq_pending_o(irq_pending), + + .debug_req_i, + .crash_dump_o, + .double_fault_seen_o, + +`ifdef RVFI + .rvfi_valid, + .rvfi_order, + .rvfi_insn, + .rvfi_trap, + .rvfi_halt, + .rvfi_intr, + .rvfi_mode, + .rvfi_ixl, + .rvfi_rs1_addr, + .rvfi_rs2_addr, + .rvfi_rs3_addr, + .rvfi_rs1_rdata, + .rvfi_rs1_rcap, + .rvfi_rs2_rdata, + .rvfi_rs2_rcap, + .rvfi_rs3_rdata, + .rvfi_rd_addr, + .rvfi_rd_wdata, + .rvfi_rd_wcap, + .rvfi_pc_rdata, + .rvfi_pc_wdata, + .rvfi_mem_addr, + .rvfi_mem_rmask, + .rvfi_mem_wmask, + .rvfi_mem_rdata, + .rvfi_mem_wdata, + .rvfi_mem_rcap, + .rvfi_mem_wcap, + .rvfi_mem_is_cap, + .rvfi_ext_mip, + .rvfi_ext_nmi, + .rvfi_ext_debug_req, + .rvfi_ext_mcycle, +`endif + + .fetch_enable_i(fetch_enable_buf), + .alert_minor_o (core_alert_minor), + .alert_major_o (core_alert_major), + .icache_inval_o(icache_inval), + .core_busy_o (core_busy_d) + ); + + ///////////////////////////////// + // Register file Instantiation // + ///////////////////////////////// + if (!CHERIoTEn) begin + assign rf_alert = 1'b0; // rf_alert only available in cheri_regfile + end + + if (CHERIoTEn) begin : gen_regfile_cheriot + + localparam int unsigned NRegs = RV32E? 16 : 32; + localparam int unsigned NCaps = 16; + + cheri_regfile #( + .NREGS (NRegs), + .NCAPS (NCaps), + .RegFileECC(RegFileECC), + .DataWidth (RegFileDataWidth), + .CheriPPLBC(CheriPPLBC) + ) register_file_i ( + .clk_i (clk), + .rst_ni (rst_ni), + .par_rst_ni (rst_ni), + .raddr_a_i (rf_raddr_a), + .rdata_a_o (rf_rdata_a_ecc), + .rcap_a_o (rf_rcap_a), + .raddr_b_i (rf_raddr_b), + .rdata_b_o (rf_rdata_b_ecc), + .rcap_b_o (rf_rcap_b), + .waddr_a_i (rf_waddr_wb), + .wdata_a_i (rf_wdata_wb_ecc), + .wcap_a_i (rf_wcap), + .we_a_i (rf_we_wb), + .reg_rdy_o (rf_reg_rdy), + .trvk_addr_i (rf_trvk_addr), + .trvk_en_i (rf_trvk_en), + .trvk_clrtag_i (rf_trvk_clrtag), + .trvk_par_i (rf_trvk_par), + .trsv_addr_i (rf_trsv_addr), + .trsv_en_i (rf_trsv_en), + .trsv_par_i (rf_trsv_par), + .alert_o (rf_alert) + ); + + end else if (RegFile == RegFileFF) begin : gen_regfile_ff + cheriot_register_file_ff #( + .RV32E (RV32E), + .DataWidth (RegFileDataWidth), + .DummyInstructions(DummyInstructions), + .WordZeroVal (RegFileDataWidth'(prim_secded_pkg::SecdedInv3932ZeroWord)) + ) register_file_i ( + .clk_i (clk), + .rst_ni(rst_ni), + + .test_en_i (test_en_i), + .dummy_instr_id_i(dummy_instr_id), + + .raddr_a_i(rf_raddr_a), + .rdata_a_o(rf_rdata_a_ecc), + .raddr_b_i(rf_raddr_b), + .rdata_b_o(rf_rdata_b_ecc), + .waddr_a_i(rf_waddr_wb), + .wdata_a_i(rf_wdata_wb_ecc), + .we_a_i (rf_we_wb) + ); + + assign rf_rcap_a = NULL_REG_CAP; + assign rf_rcap_b = NULL_REG_CAP; + assign rf_reg_rdy = {32{1'b1}}; + + end else if (RegFile == RegFileFPGA) begin : gen_regfile_fpga + cheriot_register_file_fpga #( + .RV32E (RV32E), + .DataWidth (RegFileDataWidth), + .DummyInstructions(DummyInstructions), + .WordZeroVal (RegFileDataWidth'(prim_secded_pkg::SecdedInv3932ZeroWord)) + ) register_file_i ( + .clk_i (clk), + .rst_ni(rst_ni), + + .test_en_i (test_en_i), + .dummy_instr_id_i(dummy_instr_id), + + .raddr_a_i(rf_raddr_a), + .rdata_a_o(rf_rdata_a_ecc), + .raddr_b_i(rf_raddr_b), + .rdata_b_o(rf_rdata_b_ecc), + .waddr_a_i(rf_waddr_wb), + .wdata_a_i(rf_wdata_wb_ecc), + .we_a_i (rf_we_wb) + ); + + assign rf_rcap_a = NULL_REG_CAP; + assign rf_rcap_b = NULL_REG_CAP; + assign rf_reg_rdy = {32{1'b1}}; + + end else if (RegFile == RegFileLatch) begin : gen_regfile_latch + cheriot_register_file_latch #( + .RV32E (RV32E), + .DataWidth (RegFileDataWidth), + .DummyInstructions(DummyInstructions), + .WordZeroVal (RegFileDataWidth'(prim_secded_pkg::SecdedInv3932ZeroWord)) + ) register_file_i ( + .clk_i (clk), + .rst_ni(rst_ni), + + .test_en_i (test_en_i), + .dummy_instr_id_i(dummy_instr_id), + + .raddr_a_i(rf_raddr_a), + .rdata_a_o(rf_rdata_a_ecc), + .raddr_b_i(rf_raddr_b), + .rdata_b_o(rf_rdata_b_ecc), + .waddr_a_i(rf_waddr_wb), + .wdata_a_i(rf_wdata_wb_ecc), + .we_a_i (rf_we_wb) + ); + + assign rf_rcap_a = NULL_REG_CAP; + assign rf_rcap_b = NULL_REG_CAP; + assign rf_reg_rdy = {32{1'b1}}; + + end + + /////////////////////////////// + // Scrambling Infrastructure // + /////////////////////////////// + + if (ICacheScramble) begin : gen_scramble + + // Scramble key valid starts with OTP returning new valid key and stays high + // until we request a new valid key. + assign scramble_key_valid_d = scramble_req_q ? scramble_key_valid_i : + icache_inval ? 1'b0 : + scramble_key_valid_q; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + scramble_key_q <= RndCnstIbexKey; + scramble_nonce_q <= RndCnstIbexNonce; + end else if (scramble_key_valid_i) begin + scramble_key_q <= scramble_key_i; + scramble_nonce_q <= scramble_nonce_i; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + scramble_key_valid_q <= 1'b1; + scramble_req_q <= '0; + end else begin + scramble_key_valid_q <= scramble_key_valid_d; + scramble_req_q <= scramble_req_d; + end + end + + // Scramble key request starts with invalidate signal from ICache and stays high + // until we got a valid key. + assign scramble_req_d = scramble_req_q ? ~scramble_key_valid_i : icache_inval; + assign scramble_req_o = scramble_req_q; + + end else begin : gen_noscramble + + logic unused_scramble_inputs = scramble_key_valid_i & (|scramble_key_i) & (|RndCnstIbexKey) & + (|scramble_nonce_i) & (|RndCnstIbexNonce) & scramble_req_q & + icache_inval & scramble_key_valid_d & scramble_req_d; + + assign scramble_req_d = 1'b0; + assign scramble_req_q = 1'b0; + assign scramble_req_o = 1'b0; + assign scramble_key_q = '0; + assign scramble_nonce_q = '0; + assign scramble_key_valid_q = 1'b1; + assign scramble_key_valid_d = 1'b1; + end + + //////////////////////// + // Rams Instantiation // + //////////////////////// + + if (ICache) begin : gen_rams + + for (genvar way = 0; way < IC_NUM_WAYS; way++) begin : gen_rams_inner + + // Tag RAM instantiation + prim_ram_1p_scr #( + .Width (TagSizeECC), + .Depth (IC_NUM_LINES), + .DataBitsPerMask (TagSizeECC), + .EnableParity (0), + .DiffWidth (TagSizeECC), + .NumAddrScrRounds (NumAddrScrRounds), + .NumDiffRounds (NumDiffRounds) + ) tag_bank ( + .clk_i, + .rst_ni, + + .key_valid_i (scramble_key_valid_q), + .key_i (scramble_key_q), + .nonce_i (scramble_nonce_q), + + .req_i (ic_tag_req[way]), + + .gnt_o (), + .write_i (ic_tag_write), + .addr_i (ic_tag_addr), + .wdata_i (ic_tag_wdata), + .wmask_i ({TagSizeECC{1'b1}}), + .intg_error_i(1'b0), + + .rdata_o (ic_tag_rdata[way]), + .rvalid_o (), + .raddr_o (), + .rerror_o (), + .cfg_i (ram_cfg_i) + ); + + // Data RAM instantiation + prim_ram_1p_scr #( + .Width (LineSizeECC), + .Depth (IC_NUM_LINES), + .DataBitsPerMask (LineSizeECC), + .ReplicateKeyStream (1), + .EnableParity (0), + .DiffWidth (LineSizeECC), + .NumAddrScrRounds (NumAddrScrRounds), + .NumDiffRounds (NumDiffRounds) + ) data_bank ( + .clk_i, + .rst_ni, + + .key_valid_i (scramble_key_valid_q), + .key_i (scramble_key_q), + .nonce_i (scramble_nonce_q), + + .req_i (ic_data_req[way]), + + .gnt_o (), + .write_i (ic_data_write), + .addr_i (ic_data_addr), + .wdata_i (ic_data_wdata), + .wmask_i ({LineSizeECC{1'b1}}), + .intg_error_i(1'b0), + + .rdata_o (ic_data_rdata[way]), + .rvalid_o (), + .raddr_o (), + .rerror_o (), + .cfg_i (ram_cfg_i) + ); + end + + end else begin : gen_norams + + prim_ram_1p_pkg::ram_1p_cfg_t unused_ram_cfg; + logic unused_ram_inputs; + + assign unused_ram_cfg = ram_cfg_i; + assign unused_ram_inputs = (|ic_tag_req) & ic_tag_write & (|ic_tag_addr) & (|ic_tag_wdata) & + (|ic_data_req) & ic_data_write & (|ic_data_addr) & (|ic_data_wdata) & + (|scramble_key_q) & (|scramble_nonce_q) & scramble_key_valid_q & + scramble_key_valid_d & (|scramble_nonce_q) & + (|NumAddrScrRounds); + + assign ic_tag_rdata = '{default:'b0}; + assign ic_data_rdata = '{default:'b0}; + + end + + // Redundant lockstep core implementation + if (Lockstep) begin : gen_lockstep + // Note: certain synthesis tools like DC are very smart at optimizing away redundant logic. + // Hence, we have to insert an optimization barrier at the IOs of the lockstep Ibex. + // This is achieved by manually buffering each bit using prim_buf. + // Our Xilinx and DC synthesis flows make sure that these buffers cannot be optimized away + // using keep attributes (Vivado) and size_only constraints (DC). + logic [37:0] rf_wcap_vec, rf_rcap_a_vec, rf_rcap_b_vec; + + localparam int NumBufferBits = $bits({ + hart_id_i, + boot_addr_i, + instr_req_o, + instr_gnt_i, + instr_rvalid_i, + instr_addr_o, + instr_rdata_i, + instr_rdata_intg_i, + instr_err_i, + data_req_o, + data_gnt_i, + data_rvalid_i, + data_we_o, + data_be_o, + data_addr_o, + data_wdata_o, + data_is_cap_o, + data_rdata_i, + data_rdata_intg_i, + data_err_i, + dummy_instr_id, + rf_raddr_a, + rf_raddr_b, + rf_waddr_wb, + rf_we_wb, + rf_wdata_wb_ecc, + rf_rdata_a_ecc, + rf_rdata_b_ecc, + ic_tag_req, + ic_tag_write, + ic_tag_addr, + ic_tag_wdata, + ic_data_req, + ic_data_write, + ic_data_addr, + ic_data_wdata, + scramble_key_valid_i, + irq_software_i, + irq_timer_i, + irq_external_i, + irq_fast_i, + irq_nm_i, + irq_pending, + debug_req_i, + crash_dump_o, + double_fault_seen_o, + fetch_enable_i, + icache_inval, + core_busy_d, + cheri_pmode_i, + cheri_tsafe_en_i, + rf_wcap_vec, + rf_rcap_a_vec, + rf_rcap_b_vec, + rf_reg_rdy, + rf_trsv_en, + rf_trsv_addr, + rf_trsv_par, + rf_trvk_addr, + rf_trvk_en, + rf_trvk_clrtag, + rf_trvk_par, + tsmap_cs_o, + tsmap_addr_o, + tsmap_rdata_i, + tsmap_rdata_intg_i, + mmreg_corein_i, + mmreg_coreout_o + }); + + logic [NumBufferBits-1:0] buf_in, buf_out; + + logic [31:0] hart_id_local; + logic [31:0] boot_addr_local; + + logic instr_req_local; + logic instr_gnt_local; + logic instr_rvalid_local; + logic [31:0] instr_addr_local; + logic [31:0] instr_rdata_local; + logic [6:0] instr_rdata_intg_local; + logic instr_err_local; + + logic data_req_local; + logic data_gnt_local; + logic data_rvalid_local; + logic data_we_local; + logic [3:0] data_be_local; + logic [31:0] data_addr_local; + logic [DataWidth-1:0] data_wdata_local; + logic data_is_cap_local; + logic [6:0] data_wdata_intg_local; + logic [DataWidth-1:0] data_rdata_local; + logic [6:0] data_rdata_intg_local; + logic data_err_local; + + logic dummy_instr_id_local; + logic [4:0] rf_raddr_a_local; + logic [4:0] rf_raddr_b_local; + logic [4:0] rf_waddr_wb_local; + logic rf_we_wb_local; + logic [RegFileDataWidth-1:0] rf_wdata_wb_ecc_local; + logic [RegFileDataWidth-1:0] rf_rdata_a_ecc_local; + logic [RegFileDataWidth-1:0] rf_rdata_b_ecc_local; + + logic cheri_pmode_local; + logic cheri_tsafe_en_local; + logic [37:0] rf_wcap_vec_local; + logic [37:0] rf_rcap_a_vec_local; + logic [37:0] rf_rcap_b_vec_local; + logic [31:0] rf_reg_rdy_local; + logic rf_trsv_en_local; + logic [4:0] rf_trsv_addr_local; + logic [6:0] rf_trsv_par_local; + logic [4:0] rf_trvk_addr_local; + logic rf_trvk_en_local; + logic rf_trvk_clrtag_local; + logic [6:0] rf_trvk_par_local; + logic tsmap_cs_local; + logic [15:0] tsmap_addr_local; + logic [31:0] tsmap_rdata_local; + logic [6:0] tsmap_rdata_intg_local; + logic [MMRegDinW-1:0] mmreg_corein_local; + logic [MMRegDoutW-1:0] mmreg_coreout_local; + reg_cap_t rf_wcap_local, rf_rcap_a_local, rf_rcap_b_local; + + logic [IC_NUM_WAYS-1:0] ic_tag_req_local; + logic ic_tag_write_local; + logic [IC_INDEX_W-1:0] ic_tag_addr_local; + logic [TagSizeECC-1:0] ic_tag_wdata_local; + logic [IC_NUM_WAYS-1:0] ic_data_req_local; + logic ic_data_write_local; + logic [IC_INDEX_W-1:0] ic_data_addr_local; + logic [LineSizeECC-1:0] ic_data_wdata_local; + logic scramble_key_valid_local; + + logic irq_software_local; + logic irq_timer_local; + logic irq_external_local; + logic [14:0] irq_fast_local; + logic irq_nm_local; + logic irq_pending_local; + + logic debug_req_local; + crash_dump_t crash_dump_local; + logic double_fault_seen_local; + fetch_enable_t fetch_enable_local; + + logic icache_inval_local; + logic core_busy_local; + + assign buf_in = { + hart_id_i, + boot_addr_i, + instr_req_o, + instr_gnt_i, + instr_rvalid_i, + instr_addr_o, + instr_rdata_i, + instr_rdata_intg_i, + instr_err_i, + data_req_o, + data_gnt_i, + data_rvalid_i, + data_we_o, + data_be_o, + data_addr_o, + data_wdata_o, + data_is_cap_o, + data_rdata_i, + data_rdata_intg_i, + data_err_i, + dummy_instr_id, + rf_raddr_a, + rf_raddr_b, + rf_waddr_wb, + rf_we_wb, + rf_wdata_wb_ecc, + rf_rdata_a_ecc, + rf_rdata_b_ecc, + ic_tag_req, + ic_tag_write, + ic_tag_addr, + ic_tag_wdata, + ic_data_req, + ic_data_write, + ic_data_addr, + ic_data_wdata, + scramble_key_valid_q, + irq_software_i, + irq_timer_i, + irq_external_i, + irq_fast_i, + irq_nm_i, + irq_pending, + debug_req_i, + crash_dump_o, + double_fault_seen_o, + fetch_enable_i, + icache_inval, + core_busy_d, + cheri_pmode_i, + cheri_tsafe_en_i, + rf_wcap_vec, + rf_rcap_a_vec, + rf_rcap_b_vec, + rf_reg_rdy, + rf_trsv_en, + rf_trsv_addr, + rf_trsv_par, + rf_trvk_addr, + rf_trvk_en, + rf_trvk_clrtag, + rf_trvk_par, + tsmap_cs_o, + tsmap_addr_o, + tsmap_rdata_i, + tsmap_rdata_intg_i, + mmreg_corein_i, + mmreg_coreout_o + }; + + assign { + hart_id_local, + boot_addr_local, + instr_req_local, + instr_gnt_local, + instr_rvalid_local, + instr_addr_local, + instr_rdata_local, + instr_rdata_intg_local, + instr_err_local, + data_req_local, + data_gnt_local, + data_rvalid_local, + data_we_local, + data_be_local, + data_addr_local, + data_wdata_local, + data_is_cap_local, + data_rdata_local, + data_rdata_intg_local, + data_err_local, + dummy_instr_id_local, + rf_raddr_a_local, + rf_raddr_b_local, + rf_waddr_wb_local, + rf_we_wb_local, + rf_wdata_wb_ecc_local, + rf_rdata_a_ecc_local, + rf_rdata_b_ecc_local, + ic_tag_req_local, + ic_tag_write_local, + ic_tag_addr_local, + ic_tag_wdata_local, + ic_data_req_local, + ic_data_write_local, + ic_data_addr_local, + ic_data_wdata_local, + scramble_key_valid_local, + irq_software_local, + irq_timer_local, + irq_external_local, + irq_fast_local, + irq_nm_local, + irq_pending_local, + debug_req_local, + crash_dump_local, + double_fault_seen_local, + fetch_enable_local, + icache_inval_local, + core_busy_local, + cheri_pmode_local, + cheri_tsafe_en_local, + rf_wcap_vec_local, + rf_rcap_a_vec_local, + rf_rcap_b_vec_local, + rf_reg_rdy_local, + rf_trsv_en_local, + rf_trsv_addr_local, + rf_trsv_par_local, + rf_trvk_addr_local, + rf_trvk_en_local, + rf_trvk_clrtag_local, + rf_trvk_par_local, + tsmap_cs_local, + tsmap_addr_local, + tsmap_rdata_local, + tsmap_rdata_intg_local, + mmreg_corein_local, + mmreg_coreout_local + } = buf_out; + + assign rf_wcap_vec = reg2vec(rf_wcap); + assign rf_rcap_a_vec = reg2vec(rf_rcap_a); + assign rf_rcap_b_vec = reg2vec(rf_rcap_b); + assign rf_wcap_local = vec2reg(rf_wcap_vec_local); + assign rf_rcap_a_local = vec2reg(rf_rcap_a_vec_local); + assign rf_rcap_b_local = vec2reg(rf_rcap_b_vec_local); + + // Manually buffer all input signals. + prim_buf #(.Width(NumBufferBits)) u_signals_prim_buf ( + .in_i(buf_in), + .out_o(buf_out) + ); + + logic [TagSizeECC-1:0] ic_tag_rdata_local [IC_NUM_WAYS]; + logic [LineSizeECC-1:0] ic_data_rdata_local [IC_NUM_WAYS]; + for (genvar k = 0; k < IC_NUM_WAYS; k++) begin : gen_ways + prim_buf #(.Width(TagSizeECC)) u_tag_prim_buf ( + .in_i(ic_tag_rdata[k]), + .out_o(ic_tag_rdata_local[k]) + ); + prim_buf #(.Width(LineSizeECC)) u_data_prim_buf ( + .in_i(ic_data_rdata[k]), + .out_o(ic_data_rdata_local[k]) + ); + end + + logic lockstep_alert_minor_local, lockstep_alert_major_internal_local; + logic lockstep_alert_major_bus_local; + + cheriot_lockstep #( + .PMPEnable (PMPEnable), + .PMPGranularity (PMPGranularity), + .PMPNumRegions (PMPNumRegions), + .MHPMCounterNum (MHPMCounterNum), + .MHPMCounterWidth (MHPMCounterWidth), + .RV32E (RV32E), + .RV32M (RV32M), + .RV32B (RV32B), + .BranchTargetALU (BranchTargetALU), + .ICache (ICache), + .ICacheECC (ICacheECC), + .BusSizeECC (BusSizeECC), + .TagSizeECC (TagSizeECC), + .LineSizeECC (LineSizeECC), + .BranchPredictor (BranchPredictor), + .DbgTriggerEn (DbgTriggerEn), + .DbgHwBreakNum (DbgHwBreakNum), + .WritebackStage (WritebackStage), + .ResetAll (ResetAll), + .RndCnstLfsrSeed (RndCnstLfsrSeed), + .RndCnstLfsrPerm (RndCnstLfsrPerm), + .SecureIbex (SecureIbex), + .DummyInstructions(DummyInstructions), + .RegFileECC (RegFileECC), + .RegFileDataWidth (RegFileDataWidth), + .DmHaltAddr (DmHaltAddr), + .DmExceptionAddr (DmExceptionAddr), + .CHERIoTEn (CHERIoTEn), + .DataWidth (DataWidth), + .HeapBase (HeapBase ), + .TSMapBase (TSMapBase ), + .TSMapSize (TSMapSize), + .MemCapFmt (MemCapFmt ), + .CheriPPLBC (CheriPPLBC), + .CheriSBND2 (CheriSBND2), + .CheriTBRE (CheriTBRE) + ) u_cheriot_lockstep ( + .clk_i (clk), + .rst_ni (rst_ni), // should use a different reset tree + + .hart_id_i (hart_id_local), + .boot_addr_i (boot_addr_local), + .cheri_pmode_i (cheri_pmode_local), + .cheri_tsafe_en_i (cheri_tsafe_en_local), + + .instr_req_i (instr_req_local), + .instr_gnt_i (instr_gnt_local), + .instr_rvalid_i (instr_rvalid_local), + .instr_addr_i (instr_addr_local), + .instr_rdata_i (instr_rdata_local), + .instr_rdata_intg_i (instr_rdata_intg_local), + .instr_err_i (instr_err_local), + + .data_req_i (data_req_local), + .data_gnt_i (data_gnt_local), + .data_rvalid_i (data_rvalid_local), + .data_we_i (data_we_local), + .data_be_i (data_be_local), + .data_addr_i (data_addr_local), + .data_wdata_i (data_wdata_local), + .data_is_cap_i (data_is_cap_local), + .data_wdata_intg_o (data_wdata_intg_local), + .data_rdata_i (data_rdata_local), + .data_rdata_intg_i (data_rdata_intg_local), + .data_err_i (data_err_local), + + .dummy_instr_id_i (dummy_instr_id_local), + .rf_raddr_a_i (rf_raddr_a_local), + .rf_raddr_b_i (rf_raddr_b_local), + .rf_waddr_wb_i (rf_waddr_wb_local), + .rf_we_wb_i (rf_we_wb_local), + .rf_wdata_wb_ecc_i (rf_wdata_wb_ecc_local), + .rf_rdata_a_ecc_i (rf_rdata_a_ecc_local), + .rf_rdata_b_ecc_i (rf_rdata_b_ecc_local), + .rf_wcap_wb_i (rf_wcap_local ), + .rf_rcap_a_i (rf_rcap_a_local ), + .rf_rcap_b_i (rf_rcap_b_local ), + .rf_reg_rdy_i (rf_reg_rdy_local ), + .rf_trsv_en_i (rf_trsv_en_local ), + .rf_trsv_addr_i (rf_trsv_addr_local ), + .rf_trsv_par_i (rf_trsv_par_local ), + .rf_trvk_addr_i (rf_trvk_addr_local ), + .rf_trvk_en_i (rf_trvk_en_local ), + .rf_trvk_clrtag_i (rf_trvk_clrtag_local ), + .rf_trvk_par_i (rf_trvk_par_local ), + .tsmap_cs_i (tsmap_cs_local ), + .tsmap_addr_i (tsmap_addr_local ), + .tsmap_rdata_i (tsmap_rdata_local ), + .tsmap_rdata_intg_i (tsmap_rdata_intg_local), + .mmreg_corein_i (mmreg_corein_local ), + .mmreg_coreout_i (mmreg_coreout_local ), + + .ic_tag_req_i (ic_tag_req_local), + .ic_tag_write_i (ic_tag_write_local), + .ic_tag_addr_i (ic_tag_addr_local), + .ic_tag_wdata_i (ic_tag_wdata_local), + .ic_tag_rdata_i (ic_tag_rdata_local), + .ic_data_req_i (ic_data_req_local), + .ic_data_write_i (ic_data_write_local), + .ic_data_addr_i (ic_data_addr_local), + .ic_data_wdata_i (ic_data_wdata_local), + .ic_data_rdata_i (ic_data_rdata_local), + .ic_scr_key_valid_i (scramble_key_valid_local), + + .irq_software_i (irq_software_local), + .irq_timer_i (irq_timer_local), + .irq_external_i (irq_external_local), + .irq_fast_i (irq_fast_local), + .irq_nm_i (irq_nm_local), + .irq_pending_i (irq_pending_local), + + .debug_req_i (debug_req_local), + .crash_dump_i (crash_dump_local), + .double_fault_seen_i (double_fault_seen_local), + + .fetch_enable_i (fetch_enable_local), + .alert_minor_o (lockstep_alert_minor_local), + .alert_major_internal_o (lockstep_alert_major_internal_local), + .alert_major_bus_o (lockstep_alert_major_bus_local), + .icache_inval_i (icache_inval_local), + .core_busy_i (core_busy_local), + .test_en_i (test_en_i), + .scan_rst_ni (scan_rst_ni) + ); + + // Manually buffer the output signals. + prim_buf #(.Width (7)) u_prim_buf_wdata_intg ( + .in_i(data_wdata_intg_local), + .out_o(data_wdata_intg_o) + ); + + prim_buf u_prim_buf_alert_minor ( + .in_i (lockstep_alert_minor_local), + .out_o(lockstep_alert_minor) + ); + + prim_buf u_prim_buf_alert_major_internal ( + .in_i (lockstep_alert_major_internal_local), + .out_o(lockstep_alert_major_internal) + ); + + prim_buf u_prim_buf_alert_major_bus ( + .in_i (lockstep_alert_major_bus_local), + .out_o(lockstep_alert_major_bus) + ); + + end else begin : gen_no_lockstep + assign lockstep_alert_major_internal = 1'b0; + assign lockstep_alert_major_bus = 1'b0; + assign lockstep_alert_minor = 1'b0; + assign data_wdata_intg_o = 'b0; + logic unused_scan, unused_intg; + assign unused_scan = scan_rst_ni; + assign unused_intg = |{instr_rdata_intg_i, data_rdata_intg_i}; + end + + assign alert_major_internal_o = core_alert_major | lockstep_alert_major_internal | rf_alert; + assign alert_major_bus_o = lockstep_alert_major_bus; + assign alert_minor_o = core_alert_minor | lockstep_alert_minor; + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_top_tracing.sv b/hw/ip/cheriot-ibex/rtl/cheriot_top_tracing.sv new file mode 100644 index 0000000..aa74060 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_top_tracing.sv
@@ -0,0 +1,347 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Top level module of the ibex RISC-V core with tracing enabled + */ + +module cheriot_top_tracing import cheriot_pkg::*; import cheri_pkg::*; #( + parameter bit PMPEnable = 1'b0, + parameter int unsigned PMPGranularity = 0, + parameter int unsigned PMPNumRegions = 4, + parameter int unsigned MHPMCounterNum = 0, + parameter int unsigned MHPMCounterWidth = 40, + parameter bit RV32E = 1'b0, + parameter rv32m_e RV32M = RV32MFast, + parameter rv32b_e RV32B = RV32BNone, + parameter regfile_e RegFile = RegFileFF, + parameter bit BranchTargetALU = 1'b1, + parameter bit WritebackStage = 1'b1, + parameter bit ICache = 1'b0, + parameter bit ICacheECC = 1'b0, + parameter bit BranchPredictor = 1'b0, + parameter bit DbgTriggerEn = 1'b0, + parameter int unsigned DbgHwBreakNum = 1, + parameter bit SecureIbex = 1'b0, + parameter bit ICacheScramble = 1'b0, + parameter lfsr_seed_t RndCnstLfsrSeed = RndCnstLfsrSeedDefault, + parameter lfsr_perm_t RndCnstLfsrPerm = RndCnstLfsrPermDefault, + parameter bit HWTraceEn = 1'b0, + parameter int unsigned DmHaltAddr = 32'h1A110800, + parameter int unsigned DmExceptionAddr = 32'h1A110808, + parameter bit CHERIoTEn = 1'b1, + parameter int unsigned DataWidth = 33, + parameter int unsigned HeapBase = 32'h2001_0000, + parameter int unsigned TSMapBase = 32'h2004_0000, // 4kB default + parameter int unsigned TSMapSize = 1024, + parameter bit MemCapFmt = 1'b0, + parameter bit CheriPPLBC = 1'b1, + parameter bit CheriSBND2 = 1'b0, + parameter bit CheriTBRE = 1'b1, + parameter bit CheriStkZ = 1'b1, + parameter int unsigned MMRegDinW = 128, + parameter int unsigned MMRegDoutW = 64 +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + input logic test_en_i, // enable all clock gates for testing + input logic scan_rst_ni, + input prim_ram_1p_pkg::ram_1p_cfg_t ram_cfg_i, + + input logic cheri_pmode_i, + input logic cheri_tsafe_en_i, + input logic [31:0] hart_id_i, + input logic [31:0] boot_addr_i, + + // Instruction memory interface + output logic instr_req_o, + input logic instr_gnt_i, + input logic instr_rvalid_i, + output logic [31:0] instr_addr_o, + input logic [31:0] instr_rdata_i, + input logic [6:0] instr_rdata_intg_i, + input logic instr_err_i, + + // Data memory interface + output logic data_req_o, + output logic data_is_cap_o, + input logic data_gnt_i, + input logic data_rvalid_i, + output logic data_we_o, + output logic [3:0] data_be_o, + output logic [31:0] data_addr_o, + output logic [DataWidth-1:0] data_wdata_o, + output logic [6:0] data_wdata_intg_o, + input logic [DataWidth-1:0] data_rdata_i, + input logic [6:0] data_rdata_intg_i, + input logic data_err_i, + + // TS map memory interface + output logic tsmap_cs_o, + output logic [15:0] tsmap_addr_o, + input logic [31:0] tsmap_rdata_i, + input logic [6:0] tsmap_rdata_intg_i, + input logic [MMRegDinW-1:0] mmreg_corein_i, + output logic [MMRegDoutW-1:0] mmreg_coreout_o, + + // Interrupt inputs + input logic irq_software_i, + input logic irq_timer_i, + input logic irq_external_i, + input logic [14:0] irq_fast_i, + input logic irq_nm_i, // non-maskeable interrupt + + // Scrambling Interface + input logic scramble_key_valid_i, + input logic [SCRAMBLE_KEY_W-1:0] scramble_key_i, + input logic [SCRAMBLE_NONCE_W-1:0] scramble_nonce_i, + output logic scramble_req_o, + + // Debug Interface + input logic debug_req_i, + output crash_dump_t crash_dump_o, + output logic double_fault_seen_o, + + // CPU Control Signals + input fetch_enable_t fetch_enable_i, + output logic core_sleep_o +); + + // cheriot_tracer relies on the signals from the RISC-V Formal Interface + `ifndef RVFI + $fatal("Fatal error: RVFI needs to be defined globally."); + `endif + + logic rvfi_valid; + logic [63:0] rvfi_order; + logic [31:0] rvfi_insn; + logic rvfi_trap; + logic rvfi_halt; + logic rvfi_intr; + logic [ 1:0] rvfi_mode; + logic [ 1:0] rvfi_ixl; + logic [ 4:0] rvfi_rs1_addr; + logic [ 4:0] rvfi_rs2_addr; + logic [ 4:0] rvfi_rs3_addr; + logic [31:0] rvfi_rs1_rdata; + reg_cap_t rvfi_rs1_rcap; + reg_cap_t rvfi_rs2_rcap; + logic [31:0] rvfi_rs2_rdata; + logic [31:0] rvfi_rs3_rdata; + logic [ 4:0] rvfi_rd_addr; + logic [31:0] rvfi_rd_wdata; + reg_cap_t rvfi_rd_wcap; + logic [31:0] rvfi_pc_rdata; + logic [31:0] rvfi_pc_wdata; + logic [31:0] rvfi_mem_addr; + logic [ 3:0] rvfi_mem_rmask; + logic [ 3:0] rvfi_mem_wmask; + logic [DataWidth-1:0] rvfi_mem_rdata; + logic [DataWidth-1:0] rvfi_mem_wdata; + logic rvfi_mem_is_cap; + reg_cap_t rvfi_mem_rcap; + reg_cap_t rvfi_mem_wcap; + logic [31:0] rvfi_mem2_addr; + logic rvfi_mem2_we; + logic [65:0] rvfi_mem2_rdata; + logic [65:0] rvfi_mem2_wdata; + logic [31:0] rvfi_ext_mip; + logic rvfi_ext_nmi; + logic rvfi_ext_debug_req; + logic [63:0] rvfi_ext_mcycle; + + logic [31:0] unused_rvfi_ext_mip; + logic unused_rvfi_ext_nmi; + logic unused_rvfi_ext_debug_req; + logic [63:0] unused_rvfi_ext_mcycle; + + + // Tracer doesn't use these signals, though other modules may probe down into tracer to observe + // them. + assign unused_rvfi_ext_mip = rvfi_ext_mip; + assign unused_rvfi_ext_nmi = rvfi_ext_nmi; + assign unused_rvfi_ext_debug_req = rvfi_ext_debug_req; + assign unused_rvfi_ext_mcycle = rvfi_ext_mcycle; + + cheriot_top #( + .PMPEnable ( PMPEnable ), + .PMPGranularity ( PMPGranularity ), + .PMPNumRegions ( PMPNumRegions ), + .MHPMCounterNum ( MHPMCounterNum ), + .MHPMCounterWidth ( MHPMCounterWidth ), + .RV32E ( RV32E ), + .RV32M ( RV32M ), + .RV32B ( RV32B ), + .RegFile ( RegFile ), + .BranchTargetALU ( BranchTargetALU ), + .ICache ( ICache ), + .ICacheECC ( ICacheECC ), + .BranchPredictor ( BranchPredictor ), + .DbgTriggerEn ( DbgTriggerEn ), + .DbgHwBreakNum ( DbgHwBreakNum ), + .WritebackStage ( WritebackStage ), + .SecureIbex ( SecureIbex ), + .ICacheScramble ( ICacheScramble ), + .RndCnstLfsrSeed ( RndCnstLfsrSeed ), + .RndCnstLfsrPerm ( RndCnstLfsrPerm ), + .DmHaltAddr (DmHaltAddr ), + .DmExceptionAddr (DmExceptionAddr ), + .CHERIoTEn (CHERIoTEn), + .DataWidth (DataWidth), + .HeapBase (HeapBase ), + .TSMapBase (TSMapBase ), + .TSMapSize (TSMapSize), + .MemCapFmt (MemCapFmt ), + .CheriPPLBC (CheriPPLBC), + .CheriSBND2 (CheriSBND2), + .CheriTBRE (CheriTBRE), + .CheriStkZ (CheriStkZ) + ) u_cheriot_top ( + .clk_i, + .rst_ni, + + .test_en_i, + .scan_rst_ni, + .ram_cfg_i, + + .cheri_pmode_i, + .cheri_tsafe_en_i, + .hart_id_i, + .boot_addr_i, + + .instr_req_o, + .instr_gnt_i, + .instr_rvalid_i, + .instr_addr_o, + .instr_rdata_i, + .instr_rdata_intg_i, + .instr_err_i, + + .data_req_o, + .data_is_cap_o, + .data_gnt_i, + .data_rvalid_i, + .data_we_o, + .data_be_o, + .data_addr_o, + .data_wdata_o, + .data_wdata_intg_o, + .data_rdata_i, + .data_rdata_intg_i, + .data_err_i, + + .tsmap_cs_o, + .tsmap_addr_o, + .tsmap_rdata_i, + .tsmap_rdata_intg_i, + .mmreg_corein_i, + .mmreg_coreout_o, + + .irq_software_i, + .irq_timer_i, + .irq_external_i, + .irq_fast_i, + .irq_nm_i, + + .scramble_key_valid_i, + .scramble_key_i, + .scramble_nonce_i, + .scramble_req_o, + + .debug_req_i, + .crash_dump_o, + .double_fault_seen_o, + +`ifdef RVFI + .rvfi_valid, + .rvfi_order, + .rvfi_insn, + .rvfi_trap, + .rvfi_halt, + .rvfi_intr, + .rvfi_mode, + .rvfi_ixl, + .rvfi_rs1_addr, + .rvfi_rs2_addr, + .rvfi_rs3_addr, + .rvfi_rs1_rdata, + .rvfi_rs1_rcap, + .rvfi_rs2_rdata, + .rvfi_rs2_rcap, + .rvfi_rs3_rdata, + .rvfi_rd_addr, + .rvfi_rd_wdata, + .rvfi_rd_wcap, + .rvfi_pc_rdata, + .rvfi_pc_wdata, + .rvfi_mem_addr, + .rvfi_mem_rmask, + .rvfi_mem_wmask, + .rvfi_mem_rdata, + .rvfi_mem_wdata, + .rvfi_mem_rcap, + .rvfi_mem_wcap, + .rvfi_mem_is_cap, + .rvfi_ext_mip, + .rvfi_ext_nmi, + .rvfi_ext_debug_req, + .rvfi_ext_mcycle, +`endif + .fetch_enable_i, + .core_sleep_o, + .alert_major_bus_o(), + .alert_major_internal_o(), + .alert_minor_o() + ); + +`ifdef RVFI + cheriot_tracer #( + .DataWidth (DataWidth) + ) u_cheriot_tracer ( + .clk_i, + .rst_ni, + + .cheri_pmode_i, + .cheri_tsafe_en_i, + .hart_id_i, + + .rvfi_valid, + .rvfi_order, + .rvfi_insn, + .rvfi_trap, + .rvfi_halt, + .rvfi_intr, + .rvfi_mode, + .rvfi_ixl, + .rvfi_rs1_addr, + .rvfi_rs2_addr, + .rvfi_rs3_addr, + .rvfi_rs1_rdata, + .rvfi_rs2_rdata, + .rvfi_rs3_rdata, + .rvfi_rs1_rcap, + .rvfi_rs2_rcap, + .rvfi_rd_wcap, + .rvfi_rd_addr, + .rvfi_rd_wdata, + .rvfi_pc_rdata, + .rvfi_pc_wdata, + .rvfi_mem_addr, + .rvfi_mem_rmask, + .rvfi_mem_wmask, + .rvfi_mem_rdata, + .rvfi_mem_wdata, + .rvfi_mem_rcap, + .rvfi_mem_wcap, + .rvfi_mem_is_cap + ); +`endif + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_tracer.sv b/hw/ip/cheriot-ibex/rtl/cheriot_tracer.sv new file mode 100644 index 0000000..2f08ba9 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_tracer.sv
@@ -0,0 +1,1410 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Trace executed instructions in simulation + * + * This tracer takes execution information from the RISC-V Verification Interface (RVFI) and + * produces a text file with a human-readable trace. + * + * All traced instructions are written to a log file. By default, the log file is named + * trace_core_<HARTID>.log, with <HARTID> being the 8 digit hart ID of the core being traced. + * + * The file name base, defaulting to "trace_core" can be set using the "cheriot_tracer_file_base" + * plusarg passed to the simulation, e.g. "+cheriot_tracer_file_base=ibex_my_trace". The exact syntax + * of passing plusargs to a simulation depends on the simulator. + * + * The creation of the instruction trace is enabled by default but can be disabled for a simulation. + * This behaviour is controlled by the plusarg "cheriot_tracer_enable". Use "cheriot_tracer_enable=0" to + * disable the tracer. + * + * The trace contains six columns, separated by tabs: + * - The simulation time + * - The clock cycle count since reset + * - The program counter (PC) + * - The instruction + * - The decoded instruction in the same format as objdump, together with the accessed registers and + * read/written memory values. Jumps and branches show the target address. + * This column may be omitted if the instruction does not decode into a long form. + * - Accessed registers and memory locations. + * + * Significant effort is spent to make the decoding produced by this tracer as similar as possible + * to the one produced by objdump. This simplifies the correlation between the static program + * information from the objdump-generated disassembly, and the runtime information from this tracer. + */ + +module cheriot_tracer import cheri_pkg::*; # ( + parameter int unsigned DataWidth = 32, + parameter bit CheriCapIT8 = 1'b0 +) ( + input logic clk_i, + input logic rst_ni, + + input logic cheri_pmode_i, + input logic cheri_tsafe_en_i, + input logic [31:0] hart_id_i, + + // RVFI as described at https://github.com/SymbioticEDA/riscv-formal/blob/master/docs/rvfi.md + // The standard interface does not have _i/_o suffixes. For consistency with the standard the + // signals in this module don't have the suffixes either. + input logic rvfi_valid, + input logic [63:0] rvfi_order, + input logic [31:0] rvfi_insn, + input logic rvfi_trap, + input logic rvfi_halt, + input logic rvfi_intr, + input logic [ 1:0] rvfi_mode, + input logic [ 1:0] rvfi_ixl, + input logic [ 4:0] rvfi_rs1_addr, + input logic [ 4:0] rvfi_rs2_addr, + input logic [ 4:0] rvfi_rs3_addr, + input logic [31:0] rvfi_rs1_rdata, + input reg_cap_t rvfi_rs1_rcap, + input logic [31:0] rvfi_rs2_rdata, + input reg_cap_t rvfi_rs2_rcap, + input logic [31:0] rvfi_rs3_rdata, + input logic [ 4:0] rvfi_rd_addr, + input logic [31:0] rvfi_rd_wdata, + input reg_cap_t rvfi_rd_wcap, + input logic [31:0] rvfi_pc_rdata, + input logic [31:0] rvfi_pc_wdata, + input logic [31:0] rvfi_mem_addr, + input logic [ 3:0] rvfi_mem_rmask, + input logic [ 3:0] rvfi_mem_wmask, + input logic [DataWidth-1:0] rvfi_mem_rdata, + input logic [DataWidth-1:0] rvfi_mem_wdata, + input logic rvfi_mem_is_cap, + input reg_cap_t rvfi_mem_rcap, + input reg_cap_t rvfi_mem_wcap +); + +// synthesis translate_off + + // These signals are part of RVFI, but not used in this module currently. + // Keep them as part of the interface to change the tracer more easily in the future. Assigning + // these signals to unused_* signals marks them explicitly as unused, an annotation picked up by + // linters, including Verilator lint. + logic [63:0] unused_rvfi_order = rvfi_order; + logic unused_rvfi_trap = rvfi_trap; + logic unused_rvfi_halt = rvfi_halt; + logic unused_rvfi_intr = rvfi_intr; + logic [ 1:0] unused_rvfi_mode = rvfi_mode; + logic [ 1:0] unused_rvfi_ixl = rvfi_ixl; + + import cheriot_tracer_pkg::*; + + int file_handle; + string file_name; + + int unsigned cycle; + string decoded_str; + logic insn_is_compressed; + logic rvfi_mem_wdata_bit32; + + // Data items accessed during this instruction + localparam logic [9:0] RS1 = (1 << 0); + localparam logic [9:0] RS2 = (1 << 1); + localparam logic [9:0] RS3 = (1 << 2); + localparam logic [9:0] RD = (1 << 3); + localparam logic [9:0] MEM = (1 << 4); + localparam logic [9:0] CS1 = (1 << 5); + localparam logic [9:0] CS2 = (1 << 6); + localparam logic [9:0] CD = (1 << 7); + localparam logic [9:0] MEMC = (1 << 8); + localparam logic [9:0] MEM2 = (1 << 9); + logic [9:0] data_accessed; + + logic trace_log_enable; + initial begin + if ($value$plusargs("cheriot_tracer_enable=%b", trace_log_enable)) begin + if (trace_log_enable == 1'b0) begin + $display("%m: Instruction trace disabled."); + end + end else begin + trace_log_enable = 1'b1; + end + end + + function automatic void printbuffer_dumpline(); + string rvfi_insn_str; + string disp_str; + logic [32:0] tmp33; + + if (file_handle == 32'h0) begin + string file_name_base = "trace_core"; + void'($value$plusargs("cheriot_tracer_file_base=%s", file_name_base)); + $sformat(file_name, "%s_%h.log", file_name_base, hart_id_i); + + $display("%m: Writing execution trace to %s", file_name); + file_handle = $fopen(file_name, "w"); + $fwrite(file_handle, + "Time\tCycle\tPC\tInsn\tDecoded instruction\tRegister and memory contents\n"); + end + + // Write compressed instructions as four hex digits (16 bit word), and + // uncompressed ones as 8 hex digits (32 bit words). + if (insn_is_compressed) begin + rvfi_insn_str = $sformatf(" %4h", rvfi_insn[15:0]); + end else begin + rvfi_insn_str = $sformatf("%8h", rvfi_insn); + end + + if (rvfi_trap) disp_str = $sformatf("-->%s", decoded_str); + else if (rvfi_intr) disp_str = $sformatf("==>%s", decoded_str); + else disp_str = decoded_str; + + $fwrite(file_handle, "%15t\t%d\t%h\t%s\t%s\t", + $time, cycle, rvfi_pc_rdata, rvfi_insn_str, disp_str); + + if ((data_accessed & RS1) != 0) begin + $fwrite(file_handle, " %s:0x%08x", reg_addr_to_str(rvfi_rs1_addr), rvfi_rs1_rdata); + end + if ((data_accessed & CS1) != 0) begin + tmp33 = CheriCapIT8 ? reg2memcap_it8_fmt0(rvfi_rs1_rcap) : reg2memcap_fmt0(rvfi_rs1_rcap); + $fwrite(file_handle, " %s:0x%08x+0x%09x", reg_addr_to_str(rvfi_rs1_addr), rvfi_rs1_rdata, tmp33); + end + if ((data_accessed & RS2) != 0) begin + $fwrite(file_handle, " %s:0x%08x", reg_addr_to_str(rvfi_rs2_addr), rvfi_rs2_rdata); + end + if ((data_accessed & CS2) != 0) begin + tmp33 = CheriCapIT8 ? reg2memcap_it8_fmt0(rvfi_rs2_rcap) : reg2memcap_fmt0(rvfi_rs2_rcap); + $fwrite(file_handle, " %s:0x%08x+0x%09x", reg_addr_to_str(rvfi_rs2_addr), rvfi_rs2_rdata, tmp33); + end + if ((data_accessed & RS3) != 0) begin + $fwrite(file_handle, " %s:0x%08x", reg_addr_to_str(rvfi_rs3_addr), rvfi_rs3_rdata); + end + if ((data_accessed & RD) != 0) begin + $fwrite(file_handle, " %s=0x%08x", reg_addr_to_str(rvfi_rd_addr), rvfi_rd_wdata); + end + + if ((data_accessed & CD) != 0) begin + tmp33 = CheriCapIT8 ? reg2memcap_it8_fmt0(rvfi_rd_wcap) : reg2memcap_fmt0(rvfi_rd_wcap); + $fwrite(file_handle, " %s=0x%08x+0x%09x", reg_addr_to_str(rvfi_rd_addr), rvfi_rd_wdata, tmp33); + end + + if ((data_accessed & MEM) != 0) begin + $fwrite(file_handle, " PA:0x%08x", rvfi_mem_addr); + + if (rvfi_mem_wmask == 4'b0001) + $fwrite(file_handle, " store:0x%1b??????%02x", rvfi_mem_wdata_bit32, rvfi_mem_wdata[7:0]); + else if (rvfi_mem_wmask == 4'b0011) + $fwrite(file_handle, " store:0x%1b????%04x", rvfi_mem_wdata_bit32, rvfi_mem_wdata[15:0]); + else if (rvfi_mem_wmask != 4'b0000) + $fwrite(file_handle, " store:0x%09x", rvfi_mem_wdata); + + if (rvfi_mem_rmask != 4'b0000) + $fwrite(file_handle, " load:0x%08x", rvfi_mem_rdata); + end + + if ((data_accessed & MEMC) != 0) begin + $fwrite(file_handle, " PA:0x%08x", rvfi_mem_addr); + + if (rvfi_mem_wmask != 0) begin + tmp33 = CheriCapIT8 ? reg2memcap_it8_fmt0(rvfi_mem_wcap) : reg2memcap_fmt0(rvfi_mem_wcap); + $fwrite(file_handle, " store:0x%09x+0x%09x", rvfi_mem_wdata, tmp33); + end else begin + tmp33 = CheriCapIT8 ? reg2memcap_it8_fmt0(rvfi_mem_rcap) : reg2memcap_fmt0(rvfi_mem_rcap); + $fwrite(file_handle, " load:0x%09x+0x%09x", rvfi_mem_rdata, tmp33); + end + end + + $fwrite(file_handle, "\n"); + endfunction + + + // Format register address with "x" prefix, left-aligned to a fixed width of 3 characters. + function automatic string reg_addr_to_str(input logic [4:0] addr); + if (addr < 10) begin + return $sformatf(" x%0d", addr); + end else begin + return $sformatf("x%0d", addr); + end + endfunction + + // Get a SCR name for a CHERI SCR address. + function automatic string get_scr_name(input logic [4:0] scr_addr); + unique case (scr_addr) + 5'd27: return "ztopc"; + 5'd28: return "mtcc"; + 5'd29: return "mtdc"; + 5'd30: return "mscratchc"; + 5'd31: return "mepcc"; + default: return $sformatf("scr%d", scr_addr); + endcase + endfunction + + // Get a CSR name for a CSR address. + function automatic string get_csr_name(input logic [11:0] csr_addr); + unique case (csr_addr) + 12'd0: return "ustatus"; + 12'd4: return "uie"; + 12'd5: return "utvec"; + 12'd64: return "uscratch"; + 12'd65: return "uepc"; + 12'd66: return "ucause"; + 12'd67: return "utval"; + 12'd68: return "uip"; + 12'd1: return "fflags"; + 12'd2: return "frm"; + 12'd3: return "fcsr"; + 12'd3072: return "cycle"; + 12'd3073: return "time"; + 12'd3074: return "instret"; + 12'd3075: return "hpmcounter3"; + 12'd3076: return "hpmcounter4"; + 12'd3077: return "hpmcounter5"; + 12'd3078: return "hpmcounter6"; + 12'd3079: return "hpmcounter7"; + 12'd3080: return "hpmcounter8"; + 12'd3081: return "hpmcounter9"; + 12'd3082: return "hpmcounter10"; + 12'd3083: return "hpmcounter11"; + 12'd3084: return "hpmcounter12"; + 12'd3085: return "hpmcounter13"; + 12'd3086: return "hpmcounter14"; + 12'd3087: return "hpmcounter15"; + 12'd3088: return "hpmcounter16"; + 12'd3089: return "hpmcounter17"; + 12'd3090: return "hpmcounter18"; + 12'd3091: return "hpmcounter19"; + 12'd3092: return "hpmcounter20"; + 12'd3093: return "hpmcounter21"; + 12'd3094: return "hpmcounter22"; + 12'd3095: return "hpmcounter23"; + 12'd3096: return "hpmcounter24"; + 12'd3097: return "hpmcounter25"; + 12'd3098: return "hpmcounter26"; + 12'd3099: return "hpmcounter27"; + 12'd3100: return "hpmcounter28"; + 12'd3101: return "hpmcounter29"; + 12'd3102: return "hpmcounter30"; + 12'd3103: return "hpmcounter31"; + 12'd3200: return "cycleh"; + 12'd3201: return "timeh"; + 12'd3202: return "instreth"; + 12'd3203: return "hpmcounter3h"; + 12'd3204: return "hpmcounter4h"; + 12'd3205: return "hpmcounter5h"; + 12'd3206: return "hpmcounter6h"; + 12'd3207: return "hpmcounter7h"; + 12'd3208: return "hpmcounter8h"; + 12'd3209: return "hpmcounter9h"; + 12'd3210: return "hpmcounter10h"; + 12'd3211: return "hpmcounter11h"; + 12'd3212: return "hpmcounter12h"; + 12'd3213: return "hpmcounter13h"; + 12'd3214: return "hpmcounter14h"; + 12'd3215: return "hpmcounter15h"; + 12'd3216: return "hpmcounter16h"; + 12'd3217: return "hpmcounter17h"; + 12'd3218: return "hpmcounter18h"; + 12'd3219: return "hpmcounter19h"; + 12'd3220: return "hpmcounter20h"; + 12'd3221: return "hpmcounter21h"; + 12'd3222: return "hpmcounter22h"; + 12'd3223: return "hpmcounter23h"; + 12'd3224: return "hpmcounter24h"; + 12'd3225: return "hpmcounter25h"; + 12'd3226: return "hpmcounter26h"; + 12'd3227: return "hpmcounter27h"; + 12'd3228: return "hpmcounter28h"; + 12'd3229: return "hpmcounter29h"; + 12'd3230: return "hpmcounter30h"; + 12'd3231: return "hpmcounter31h"; + 12'd256: return "sstatus"; + 12'd258: return "sedeleg"; + 12'd259: return "sideleg"; + 12'd260: return "sie"; + 12'd261: return "stvec"; + 12'd262: return "scounteren"; + 12'd320: return "sscratch"; + 12'd321: return "sepc"; + 12'd322: return "scause"; + 12'd323: return "stval"; + 12'd324: return "sip"; + 12'd384: return "satp"; + 12'd3857: return "mvendorid"; + 12'd3858: return "marchid"; + 12'd3859: return "mimpid"; + 12'd3860: return "mhartid"; + 12'd768: return "mstatus"; + 12'd769: return "misa"; + 12'd770: return "medeleg"; + 12'd771: return "mideleg"; + 12'd772: return "mie"; + 12'd773: return "mtvec"; + 12'd774: return "mcounteren"; + 12'd832: return "mscratch"; + 12'd833: return "mepc"; + 12'd834: return "mcause"; + 12'd835: return "mtval"; + 12'd836: return "mip"; + 12'd928: return "pmpcfg0"; + 12'd929: return "pmpcfg1"; + 12'd930: return "pmpcfg2"; + 12'd931: return "pmpcfg3"; + 12'd944: return "pmpaddr0"; + 12'd945: return "pmpaddr1"; + 12'd946: return "pmpaddr2"; + 12'd947: return "pmpaddr3"; + 12'd948: return "pmpaddr4"; + 12'd949: return "pmpaddr5"; + 12'd950: return "pmpaddr6"; + 12'd951: return "pmpaddr7"; + 12'd952: return "pmpaddr8"; + 12'd953: return "pmpaddr9"; + 12'd954: return "pmpaddr10"; + 12'd955: return "pmpaddr11"; + 12'd956: return "pmpaddr12"; + 12'd957: return "pmpaddr13"; + 12'd958: return "pmpaddr14"; + 12'd959: return "pmpaddr15"; + 12'd2816: return "mcycle"; + 12'd2818: return "minstret"; + 12'd2819: return "mhpmcounter3"; + 12'd2820: return "mhpmcounter4"; + 12'd2821: return "mhpmcounter5"; + 12'd2822: return "mhpmcounter6"; + 12'd2823: return "mhpmcounter7"; + 12'd2824: return "mhpmcounter8"; + 12'd2825: return "mhpmcounter9"; + 12'd2826: return "mhpmcounter10"; + 12'd2827: return "mhpmcounter11"; + 12'd2828: return "mhpmcounter12"; + 12'd2829: return "mhpmcounter13"; + 12'd2830: return "mhpmcounter14"; + 12'd2831: return "mhpmcounter15"; + 12'd2832: return "mhpmcounter16"; + 12'd2833: return "mhpmcounter17"; + 12'd2834: return "mhpmcounter18"; + 12'd2835: return "mhpmcounter19"; + 12'd2836: return "mhpmcounter20"; + 12'd2837: return "mhpmcounter21"; + 12'd2838: return "mhpmcounter22"; + 12'd2839: return "mhpmcounter23"; + 12'd2840: return "mhpmcounter24"; + 12'd2841: return "mhpmcounter25"; + 12'd2842: return "mhpmcounter26"; + 12'd2843: return "mhpmcounter27"; + 12'd2844: return "mhpmcounter28"; + 12'd2845: return "mhpmcounter29"; + 12'd2846: return "mhpmcounter30"; + 12'd2847: return "mhpmcounter31"; + 12'd2944: return "mcycleh"; + 12'd2946: return "minstreth"; + 12'd2947: return "mhpmcounter3h"; + 12'd2948: return "mhpmcounter4h"; + 12'd2949: return "mhpmcounter5h"; + 12'd2950: return "mhpmcounter6h"; + 12'd2951: return "mhpmcounter7h"; + 12'd2952: return "mhpmcounter8h"; + 12'd2953: return "mhpmcounter9h"; + 12'd2954: return "mhpmcounter10h"; + 12'd2955: return "mhpmcounter11h"; + 12'd2956: return "mhpmcounter12h"; + 12'd2957: return "mhpmcounter13h"; + 12'd2958: return "mhpmcounter14h"; + 12'd2959: return "mhpmcounter15h"; + 12'd2960: return "mhpmcounter16h"; + 12'd2961: return "mhpmcounter17h"; + 12'd2962: return "mhpmcounter18h"; + 12'd2963: return "mhpmcounter19h"; + 12'd2964: return "mhpmcounter20h"; + 12'd2965: return "mhpmcounter21h"; + 12'd2966: return "mhpmcounter22h"; + 12'd2967: return "mhpmcounter23h"; + 12'd2968: return "mhpmcounter24h"; + 12'd2969: return "mhpmcounter25h"; + 12'd2970: return "mhpmcounter26h"; + 12'd2971: return "mhpmcounter27h"; + 12'd2972: return "mhpmcounter28h"; + 12'd2973: return "mhpmcounter29h"; + 12'd2974: return "mhpmcounter30h"; + 12'd2975: return "mhpmcounter31h"; + 12'd803: return "mhpmevent3"; + 12'd804: return "mhpmevent4"; + 12'd805: return "mhpmevent5"; + 12'd806: return "mhpmevent6"; + 12'd807: return "mhpmevent7"; + 12'd808: return "mhpmevent8"; + 12'd809: return "mhpmevent9"; + 12'd810: return "mhpmevent10"; + 12'd811: return "mhpmevent11"; + 12'd812: return "mhpmevent12"; + 12'd813: return "mhpmevent13"; + 12'd814: return "mhpmevent14"; + 12'd815: return "mhpmevent15"; + 12'd816: return "mhpmevent16"; + 12'd817: return "mhpmevent17"; + 12'd818: return "mhpmevent18"; + 12'd819: return "mhpmevent19"; + 12'd820: return "mhpmevent20"; + 12'd821: return "mhpmevent21"; + 12'd822: return "mhpmevent22"; + 12'd823: return "mhpmevent23"; + 12'd824: return "mhpmevent24"; + 12'd825: return "mhpmevent25"; + 12'd826: return "mhpmevent26"; + 12'd827: return "mhpmevent27"; + 12'd828: return "mhpmevent28"; + 12'd829: return "mhpmevent29"; + 12'd830: return "mhpmevent30"; + 12'd831: return "mhpmevent31"; + 12'd1952: return "tselect"; + 12'd1953: return "tdata1"; + 12'd1954: return "tdata2"; + 12'd1955: return "tdata3"; + 12'd1968: return "dcsr"; + 12'd1969: return "dpc"; + 12'd1970: return "dscratch"; + 12'd512: return "hstatus"; + 12'd514: return "hedeleg"; + 12'd515: return "hideleg"; + 12'd516: return "hie"; + 12'd517: return "htvec"; + 12'd576: return "hscratch"; + 12'd577: return "hepc"; + 12'd578: return "hcause"; + 12'd579: return "hbadaddr"; + 12'd580: return "hip"; + 12'd896: return "mbase"; + 12'd897: return "mbound"; + 12'd898: return "mibase"; + 12'd899: return "mibound"; + 12'd900: return "mdbase"; + 12'd901: return "mdbound"; + 12'd800: return "mcountinhibit"; + 12'd3009: return "mshwm"; + 12'd3010: return "mshwmb"; + 12'd3012: return "cdbgctrl"; + default: return $sformatf("0x%x", csr_addr); + endcase + endfunction + + function automatic void decode_mnemonic(input string mnemonic); + decoded_str = mnemonic; + endfunction + + function automatic void decode_r_insn(input string mnemonic); + data_accessed = RS1 | RS2 | RD; + decoded_str = $sformatf("%s\tx%0d,x%0d,x%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr, + rvfi_rs2_addr); + endfunction + + function automatic void decode_r1_insn(input string mnemonic); + data_accessed = RS1 | RD; + decoded_str = $sformatf("%s\tx%0d,x%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr); + endfunction + + function automatic void decode_r_cmixcmov_insn(input string mnemonic); + data_accessed = RS1 | RS2 | RS3 | RD; + decoded_str = $sformatf("%s\tx%0d,x%0d,x%0d,x%0d", mnemonic, rvfi_rd_addr, rvfi_rs2_addr, + rvfi_rs1_addr, rvfi_rs3_addr); + endfunction + + function automatic void decode_r_funnelshift_insn(input string mnemonic); + data_accessed = RS1 | RS2 | RS3 | RD; + decoded_str = $sformatf("%s\tx%0d,x%0d,x%0d,x%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr, + rvfi_rs3_addr, rvfi_rs2_addr); + endfunction + + function automatic void decode_i_insn(input string mnemonic); + data_accessed = RS1 | RD; + decoded_str = $sformatf("%s\tx%0d,x%0d,%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr, + $signed({{20 {rvfi_insn[31]}}, rvfi_insn[31:20]})); + endfunction + + function automatic void decode_i_shift_insn(input string mnemonic); + // SLLI, SRLI, SRAI, SROI, SLOI, RORI + logic [4:0] shamt; + shamt = {rvfi_insn[24:20]}; + data_accessed = RS1 | RD; + decoded_str = $sformatf("%s\tx%0d,x%0d,0x%0x", mnemonic, rvfi_rd_addr, rvfi_rs1_addr, shamt); + endfunction + + function automatic void decode_i_funnelshift_insn( input string mnemonic); + // fsri + logic [5:0] shamt; + shamt = {rvfi_insn[25:20]}; + data_accessed = RS1 | RS3 | RD; + decoded_str = $sformatf("%s\tx%0d,x%0d,x%0d,0x%0x", mnemonic, rvfi_rd_addr, rvfi_rs1_addr, + rvfi_rs3_addr, shamt); + endfunction + + function automatic void decode_i_jalr_insn(input string mnemonic); + // JALR + if (cheri_pmode_i) begin + data_accessed = CS1 | CD; + // CH.cjalr + decoded_str = $sformatf("CH.c%s\tc%0d,%0d(c%0d)", mnemonic, rvfi_rd_addr, + $signed({{20 {rvfi_insn[31]}}, rvfi_insn[31:20]}), rvfi_rs1_addr); + end else begin + // jalr + data_accessed = RS1 | RD; + decoded_str = $sformatf("%s\tx%0d,%0d(x%0d)", mnemonic, rvfi_rd_addr, + $signed({{20 {rvfi_insn[31]}}, rvfi_insn[31:20]}), rvfi_rs1_addr); + end + endfunction + + function automatic void decode_u_insn(input string mnemonic); + data_accessed = RD; + decoded_str = $sformatf("%s\tx%0d,0x%0x", mnemonic, rvfi_rd_addr, {rvfi_insn[31:12]}); + endfunction + + function automatic void decode_j_insn(input string mnemonic); + // JAL + if (cheri_pmode_i) begin + data_accessed = CD; + decoded_str = $sformatf("%s\tc%0d,%0x", "CH.cjal", rvfi_rd_addr, rvfi_pc_wdata); + end else begin + data_accessed = RD; + decoded_str = $sformatf("%s\tx%0d,%0x", mnemonic, rvfi_rd_addr, rvfi_pc_wdata); + end + endfunction + + function automatic void decode_b_insn(input string mnemonic); + logic [31:0] branch_target; + logic [31:0] imm; + + // We cannot use rvfi_pc_wdata for conditional jumps. + imm = $signed({ {19 {rvfi_insn[31]}}, rvfi_insn[31], rvfi_insn[7], + rvfi_insn[30:25], rvfi_insn[11:8], 1'b0 }); + branch_target = rvfi_pc_rdata + imm; + + data_accessed = RS1 | RS2; + decoded_str = $sformatf("%s\tx%0d,x%0d,%0x", + mnemonic, rvfi_rs1_addr, rvfi_rs2_addr, branch_target); + endfunction + + function automatic void decode_csr_insn(input string mnemonic); + logic [11:0] csr; + string csr_name; + csr = rvfi_insn[31:20]; + csr_name = get_csr_name(csr); + + data_accessed = RD; + + if (!rvfi_insn[14]) begin + data_accessed |= RS1; + decoded_str = $sformatf("%s\tx%0d,%s,x%0d", + mnemonic, rvfi_rd_addr, csr_name, rvfi_rs1_addr); + end else begin + decoded_str = $sformatf("%s\tx%0d,%s,%0d", + mnemonic, rvfi_rd_addr, csr_name, {27'b0, rvfi_insn[19:15]}); + end + endfunction + + function automatic void decode_cr_insn(input string mnemonic); + if (rvfi_rs2_addr == 5'b0) begin + if ((rvfi_insn[12] == 1'b1) && cheri_pmode_i) begin + // C.CH.JALR + data_accessed = CS1 | CD; + decoded_str = $sformatf("%s\tc%0d", "c.CH.cjalr", rvfi_rs1_addr); + end else if (rvfi_insn[12] == 1'b1) begin + // C.JALR + data_accessed = RS1 | RD; + decoded_str = $sformatf("%s\tx%0d", mnemonic, rvfi_rs1_addr); + end else if (cheri_pmode_i) begin + // C.CH.JR + data_accessed = CS1; + decoded_str = $sformatf("%s\tc%0d", "c.CH.cjr" , rvfi_rs1_addr); + end else begin + // C.JR + data_accessed = RS1; + decoded_str = $sformatf("%s\tx%0d", mnemonic, rvfi_rs1_addr); + end + end else begin + data_accessed = RS1 | RS2 | RD; // RS1 == RD + decoded_str = $sformatf("%s\tx%0d,x%0d", mnemonic, rvfi_rd_addr, rvfi_rs2_addr); + end + endfunction + + function automatic void decode_ci_cli_insn(input string mnemonic); + logic [5:0] imm; + imm = {rvfi_insn[12], rvfi_insn[6:2]}; + data_accessed = RD; + decoded_str = $sformatf("%s\tx%0d,%0d", mnemonic, rvfi_rd_addr, $signed(imm)); + endfunction + + function automatic void decode_ci_caddi_insn(input string mnemonic); + logic [5:0] nzimm; + nzimm = {rvfi_insn[12], rvfi_insn[6:2]}; + data_accessed = RS1 | RD; + decoded_str = $sformatf("%s\tx%0d,%0d", mnemonic, rvfi_rd_addr, $signed(nzimm)); + endfunction + + function automatic void decode_ci_caddi16sp_insn(input string mnemonic); + logic [9:0] nzimm; + nzimm = {rvfi_insn[12], rvfi_insn[4:3], rvfi_insn[5], rvfi_insn[2], rvfi_insn[6], 4'b0}; + if (cheri_pmode_i) begin + data_accessed = CS1 | CD; + decoded_str = $sformatf("%s\tc%0d,%0d", "c.CH.cinc16csp", rvfi_rd_addr, $signed(nzimm)); + end else begin + data_accessed = RS1 | RD; + decoded_str = $sformatf("%s\tx%0d,%0d", mnemonic, rvfi_rd_addr, $signed(nzimm)); + end + endfunction + + function automatic void decode_ci_clui_insn(input string mnemonic); + logic [5:0] nzimm; + nzimm = {rvfi_insn[12], rvfi_insn[6:2]}; + data_accessed = RD; + decoded_str = $sformatf("%s\tx%0d,0x%0x", mnemonic, rvfi_rd_addr, 20'($signed(nzimm))); + endfunction + + function automatic void decode_ci_cslli_insn(input string mnemonic); + logic [5:0] shamt; + shamt = {rvfi_insn[12], rvfi_insn[6:2]}; + data_accessed = RS1 | RD; + decoded_str = $sformatf("%s\tx%0d,0x%0x", mnemonic, rvfi_rd_addr, shamt); + endfunction + + function automatic void decode_ciw_insn(input string mnemonic); + // C.ADDI4SPN + logic [9:0] nzuimm; + nzuimm = {rvfi_insn[10:7], rvfi_insn[12:11], rvfi_insn[5], rvfi_insn[6], 2'b00}; + if (cheri_pmode_i) begin + // c.CH.incaddr4spn + data_accessed = CD | CS1; + decoded_str = $sformatf("%s\tc%0d,csp,%0d", mnemonic, rvfi_rd_addr, nzuimm); + end else begin + // c.addi4spn + data_accessed = RD; + decoded_str = $sformatf("%s\tx%0d,x2,%0d", mnemonic, rvfi_rd_addr, nzuimm); + end + endfunction + + function automatic void decode_cb_sr_insn(input string mnemonic); + logic [5:0] shamt; + shamt = {rvfi_insn[12], rvfi_insn[6:2]}; + data_accessed = RS1 | RD; + decoded_str = $sformatf("%s\tx%0d,0x%0x", mnemonic, rvfi_rs1_addr, shamt); + endfunction + + function automatic void decode_cb_insn(input string mnemonic); + logic [7:0] imm; + logic [31:0] jump_target; + if (rvfi_insn[15:13] == 3'b110 || rvfi_insn[15:13] == 3'b111) begin + // C.BNEZ and C.BEQZ + // We cannot use rvfi_pc_wdata for conditional jumps. + imm = {rvfi_insn[12], rvfi_insn[6:5], rvfi_insn[2], rvfi_insn[11:10], rvfi_insn[4:3]}; + jump_target = rvfi_pc_rdata + 32'($signed({imm, 1'b0})); + data_accessed = RS1; + decoded_str = $sformatf("%s\tx%0d,%0x", mnemonic, rvfi_rs1_addr, jump_target); + end else if (rvfi_insn[15:13] == 3'b100) begin + // C.ANDI + imm = {{2{rvfi_insn[12]}}, rvfi_insn[12], rvfi_insn[6:2]}; + data_accessed = RS1 | RD; // RS1 == RD + decoded_str = $sformatf("%s\tx%0d,%0d", mnemonic, rvfi_rd_addr, $signed(imm)); + end else begin + imm = {rvfi_insn[12], rvfi_insn[6:2], 2'b00}; + data_accessed = RS1; + decoded_str = $sformatf("%s\tx%0d,0x%0x", mnemonic, rvfi_rs1_addr, imm); + end + endfunction + + function automatic void decode_cs_insn(input string mnemonic); + data_accessed = RS1 | RS2 | RD; // RS1 == RD + decoded_str = $sformatf("%s\tx%0d,x%0d", mnemonic, rvfi_rd_addr, rvfi_rs2_addr); + endfunction + + function automatic void decode_cj_insn(input string mnemonic); + if (rvfi_insn[15:13] == 3'b001) begin + // C.JAL + if (cheri_pmode_i) begin + data_accessed = CD; + decoded_str = $sformatf("%s\t%0x", "c.CH.cjal", rvfi_pc_wdata); + end else begin + data_accessed = RD; + decoded_str = $sformatf("%s\t%0x", mnemonic, rvfi_pc_wdata); + end + end else begin + // C.J + if (cheri_pmode_i) + decoded_str = $sformatf("%s\t%0x", "c.CH.cj", rvfi_pc_wdata); + else + decoded_str = $sformatf("%s\t%0x", mnemonic, rvfi_pc_wdata); + end + endfunction + + function automatic void decode_compressed_load_insn(input string mnemonic); + logic [7:0] imm; + + if ((rvfi_insn[15:13] == 3'b011) && (rvfi_insn[1:0] == OPCODE_C0)) begin + // CHERI: c.clc, use RV64 c.ld encoding + imm = {rvfi_insn[6:5], rvfi_insn[12:10], 3'b000}; + data_accessed = CS1 | CD | MEMC; + decoded_str = $sformatf("%s\tc%0d,%0d(c%0d)", mnemonic, rvfi_rd_addr, imm, rvfi_rs1_addr); + end else if ((rvfi_insn[15:13] == 3'b011) && (rvfi_insn[1:0] == OPCODE_C2)) begin + // CHERI: c.clcsp, RV32: c.ldsp + imm = {rvfi_insn[4:2], rvfi_insn[12], rvfi_insn[6:5], 3'b000}; + data_accessed = CS1 | CD | MEMC; + decoded_str = $sformatf("%s\tc%0d,%0d(c%0d)", mnemonic, rvfi_rd_addr, imm, rvfi_rs1_addr); + end else begin + if (rvfi_insn[1:0] == OPCODE_C0) begin + // C.LW + imm = {1'b0, rvfi_insn[5], rvfi_insn[12:10], rvfi_insn[6], 2'b00}; + end else begin + // C.LWSP + imm = {rvfi_insn[3:2], rvfi_insn[12], rvfi_insn[6:4], 2'b00}; + end + if (cheri_pmode_i) begin + data_accessed = CS1 | RD | MEM; + decoded_str = $sformatf("%s\tx%0d,%0d(c%0d)", mnemonic, rvfi_rd_addr, imm, rvfi_rs1_addr); + end else begin + data_accessed = RS1 | RD | MEM; + decoded_str = $sformatf("%s\tx%0d,%0d(x%0d)", mnemonic, rvfi_rd_addr, imm, rvfi_rs1_addr); + end + end + endfunction + + function automatic void decode_compressed_store_insn(input string mnemonic); + logic [7:0] imm; + + + if ((rvfi_insn[15:13] == 3'b111) && (rvfi_insn[1:0] == OPCODE_C0)) begin + // CHERI: c.csc, use RV64 c.sd encoding + imm = {rvfi_insn[6:5], rvfi_insn[12:10], 3'b000}; + data_accessed = CS1 | CS2 | MEMC; + decoded_str = $sformatf("%s\tc%0d,%0d(c%0d)", mnemonic, rvfi_rs2_addr, imm, rvfi_rs1_addr); + end else if ((rvfi_insn[15:13] == 3'b111) && (rvfi_insn[1:0] == OPCODE_C2)) begin + // CHERI: c.cscsp, RV32: c.sdsp + imm = {rvfi_insn[9:7], rvfi_insn[12:10], 3'b000}; + data_accessed = CS1 | CS2 | MEMC; + decoded_str = $sformatf("%s\tc%0d,%0d(c%0d)", mnemonic, rvfi_rs2_addr, imm, rvfi_rs1_addr); + end else begin + if (rvfi_insn[1:0] == OPCODE_C0) begin + // C.SW + imm = {1'b0, rvfi_insn[5], rvfi_insn[12:10], rvfi_insn[6], 2'b00}; + end else begin + // C.SWSP + imm = {rvfi_insn[8:7], rvfi_insn[12:9], 2'b00}; + end + if (cheri_pmode_i) begin + data_accessed = CS1 | RS2 | MEM; + decoded_str = $sformatf("%s\tx%0d,%0d(c%0d)", mnemonic, rvfi_rs2_addr, imm, rvfi_rs1_addr); + end else begin + data_accessed = RS1 | RS2 | MEM; + decoded_str = $sformatf("%s\tx%0d,%0d(x%0d)", mnemonic, rvfi_rs2_addr, imm, rvfi_rs1_addr); + end + end + endfunction + + function automatic void decode_load_insn(); + string mnemonic; + logic [13:0] imm; + + /* + Gives wrong results in Verilator < 4.020. + See https://github.com/lowRISC/ibex/issues/372 and + https://www.veripool.org/issues/1536-Verilator-Misoptimization-in-if-and-case-with-default-statement-inside-a-function + + unique case (rvfi_insn[14:12]) + 3'b000: mnemonic = "lb"; + 3'b001: mnemonic = "lh"; + 3'b010: mnemonic = "lw"; + 3'b100: mnemonic = "lbu"; + 3'b101: mnemonic = "lhu"; + default: begin + decode_mnemonic("INVALID"); + return; + end + endcase + */ + logic [2:0] size; + logic is_cap; + + size = rvfi_insn[14:12]; + is_cap = 1'b0; + + if (size == 3'b000) begin + mnemonic = cheri_pmode_i ? "clb" : "lb"; + end else if (size == 3'b001) begin + mnemonic = cheri_pmode_i ? "clh" :"lh"; + end else if (size == 3'b010) begin + mnemonic = cheri_pmode_i ? "clw" :"lw"; + end else if (size == 3'b100) begin + mnemonic = cheri_pmode_i ? "clbu" :"lbu"; + end else if (size == 3'b101) begin + mnemonic = cheri_pmode_i ? "clhu" :"lhu"; + end else if (size == 3'b011) begin + mnemonic = "CH.clc"; + is_cap = 1'b1; + end else begin + decode_mnemonic("INVALID"); + return; + end + + imm = {{3{rvfi_insn[31]}},rvfi_insn[30:20]}; + + if (is_cap) begin + data_accessed = CD | CS1 | MEMC; + decoded_str = $sformatf("%s\tc%0d,%0d(c%0d)", mnemonic, rvfi_rd_addr, + $signed(imm), rvfi_rs1_addr); + end else if (cheri_pmode_i) begin + data_accessed = RD | CS1 | MEM; + decoded_str = $sformatf("%s\tx%0d,%0d(c%0d)", mnemonic, rvfi_rd_addr, + $signed(imm), rvfi_rs1_addr); + end else begin + data_accessed = RD | RS1 | MEM; + decoded_str = $sformatf("%s\tx%0d,%0d(x%0d)", mnemonic, rvfi_rd_addr, + $signed(imm), rvfi_rs1_addr); + end + endfunction + + function automatic void decode_store_insn(); + string mnemonic; + logic is_cap; + logic [13:0] imm; + + is_cap = 1'b0; + unique case (rvfi_insn[13:12]) + 2'b00: mnemonic = cheri_pmode_i ? "csb" : "sb"; + 2'b01: mnemonic = cheri_pmode_i ? "csh" : "sh"; + 2'b10: mnemonic = cheri_pmode_i ? "csw" : "sw"; + 2'b11: begin + mnemonic = "CH.csc"; + is_cap = 1'b1; + end + default: begin + decode_mnemonic("INVALID"); + return; + end + endcase + + imm = {{3{rvfi_insn[31]}},rvfi_insn[30:25], rvfi_insn[11:7]}; + + if (!rvfi_insn[14]) begin + // regular store + if (is_cap) begin + data_accessed = CS1 | CS2 | MEMC; + decoded_str = $sformatf("%s\tc%0d,%0d(c%0d)", + mnemonic, + rvfi_rs2_addr, + $signed(imm), + rvfi_rs1_addr); + end else if (cheri_pmode_i) begin + data_accessed = CS1 | RS2 | MEM; + decoded_str = $sformatf("%s\tx%0d,%0d(c%0d)", + mnemonic, + rvfi_rs2_addr, + $signed(imm), + rvfi_rs1_addr); + end else begin + data_accessed = RS1 | RS2 | MEM; + decoded_str = $sformatf("%s\tx%0d,%0d(x%0d)", + mnemonic, + rvfi_rs2_addr, + $signed(imm), + rvfi_rs1_addr); + end + end else begin + decode_mnemonic("INVALID"); + end + endfunction + + function automatic string get_fence_description(logic [3:0] bits); + string desc = ""; + if (bits[3]) begin + desc = {desc, "i"}; + end + if (bits[2]) begin + desc = {desc, "o"}; + end + if (bits[1]) begin + desc = {desc, "r"}; + end + if (bits[0]) begin + desc = {desc, "w"}; + end + return desc; + endfunction + + function automatic void decode_fence(); + string predecessor; + string successor; + predecessor = get_fence_description(rvfi_insn[27:24]); + successor = get_fence_description(rvfi_insn[23:20]); + decoded_str = $sformatf("fence\t%s,%s", predecessor, successor); + endfunction + + function automatic void decode_cheri_rd_rs1_insn(input string mnemonic); + data_accessed = RS1 | RD; + decoded_str = $sformatf("%s\tx%0d,x%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr); + endfunction + + function automatic void decode_cheri_rd_cs1_insn(input string mnemonic); + data_accessed = CS1 | RD; + decoded_str = $sformatf("%s\tx%0d,c%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr); + endfunction + + function automatic void decode_cheri_cd_cs1_insn(input string mnemonic); + data_accessed = CS1 | CD; + decoded_str = $sformatf("%s\tc%0d,c%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr); + endfunction + + function automatic void decode_cheri_rd_cs1_cs2_insn(input string mnemonic); + data_accessed = CS2 | CS1 | RD; + decoded_str = $sformatf("%s\tx%0d,c%0d,c%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr, rvfi_rs2_addr); + endfunction + + function automatic void decode_cheri_cd_cs1_cs2_insn(input string mnemonic); + data_accessed = CS2 | CS1 | CD; + decoded_str = $sformatf("%s\tc%0d,c%0d,c%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr, rvfi_rs2_addr); + endfunction + + function automatic void decode_cheri_cd_cs1_rs2_insn(input string mnemonic); + data_accessed = RS2 | CS1 | CD; + decoded_str = $sformatf("%s\tc%0d,c%0d,x%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr, rvfi_rs2_addr); + endfunction + + function automatic void decode_cheri_cd_cs1_imm_insn(input string mnemonic); + logic [13:0] imm; + + data_accessed = CS1 | CD; + + // cincaddrimm and csetboundsimm + imm = {{3{rvfi_insn[31]}}, rvfi_insn[30:20]}; // imm not extended + + if (rvfi_insn[14:12] == 3'b001) // cincaddrimm + decoded_str = $sformatf("%s\tc%0d,c%0d,%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr, $signed(imm)); + else // csetboundsimm + decoded_str = $sformatf("%s\tc%0d,c%0d,%0d", mnemonic, rvfi_rd_addr, rvfi_rs1_addr, imm); + + endfunction + + function automatic void decode_cheri_auipcc_insn(); + logic [31:0] imm; + + // We cannot use rvfi_pc_wdata for conditional jumps. + imm = rvfi_insn[31:12]; + data_accessed = CD; + if (cheri_pmode_i) begin + decoded_str = $sformatf("%s\tc%0d,0x%0x", "CH.auipcc", rvfi_rd_addr, imm); + end else begin + decoded_str = $sformatf("%s\tx%0d,0x%0x", "auipc", rvfi_rd_addr, imm); + end + + endfunction + + + function automatic void decode_cheri_auicgp_insn(); + logic [31:0] imm; + + // We cannot use rvfi_pc_wdata for conditional jumps. + imm = rvfi_insn[31:12]; + data_accessed = CD | CS1; + decoded_str = $sformatf("%s\tc%0d,0x%0x", "CH.auicgp", rvfi_rd_addr, imm); + endfunction + + + function automatic void decode_cheri_cs1_cs2_insn(input string mnemonic); + data_accessed = CS2 | CS1; + decoded_str = $sformatf("%s\tc%0d,c%0d", mnemonic, rvfi_rs1_addr, rvfi_rs2_addr); + endfunction + + function automatic void decode_cheri_scrrw_insn(); + string mnemonic, scr_name; + + scr_name = get_scr_name(rvfi_insn[24:20]); + data_accessed = CS1 | CD; + + if (rvfi_rd_addr == 0) begin + mnemonic = "CH.cspecialw"; + decoded_str = $sformatf("%s\t%s,c%0d", mnemonic, scr_name, rvfi_rs1_addr); + end else if (rvfi_rs1_addr == 0) begin + mnemonic = "CH.cspecialr"; + decoded_str = $sformatf("%s\tc%0d,%s", mnemonic, rvfi_rd_addr, scr_name); + end else begin + mnemonic = "CH.cspecialrw"; + decoded_str = $sformatf("%s\tc%0d,%s,c%0d", mnemonic, rvfi_rd_addr, scr_name, rvfi_rs1_addr); + end + endfunction + + // cycle counter + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + cycle <= 0; + end else begin + cycle <= cycle + 1; + end + end + + // close output file for writing + final begin + if (file_handle != 32'h0) begin + $fclose(file_handle); + end + end + // log execution + always_ff @(posedge clk_i) begin + if (rvfi_valid && trace_log_enable) begin + printbuffer_dumpline(); + end + end + + if (DataWidth == 33) begin + assign rvfi_mem_wdata_bit32 = rvfi_mem_wdata[32]; + end else begin + assign rvfi_mem_wdata_bit32 = 1'b0; + end + + //always_comb begin + // change to always @* to get rid of VCS warnings about dynamic type and sensitivity list + always @* begin + decoded_str = ""; + data_accessed = 5'h0; + insn_is_compressed = 0; + + // Check for compressed instructions + if (rvfi_insn[1:0] != 2'b11) begin + insn_is_compressed = 1; + // Separate case to avoid overlapping decoding + if (rvfi_insn[15:13] == INSN_CMV[15:13] && rvfi_insn[1:0] == OPCODE_C2) begin + if (rvfi_insn[12] == INSN_CADD[12]) begin + if (rvfi_insn[11:2] == INSN_CEBREAK[11:2]) begin + decode_mnemonic("c.ebreak"); + end else if (rvfi_insn[6:2] == INSN_CJALR[6:2]) begin + decode_cr_insn("c.jalr"); + end else begin + decode_cr_insn("c.add"); + end + end else begin + if (rvfi_insn[6:2] == INSN_CJR[6:2]) begin + decode_cr_insn("c.jr"); + end else begin + decode_cr_insn("c.mv"); + end + end + end else begin + unique casez (rvfi_insn[15:0]) + // C0 Opcodes + INSN_CADDI4SPN: begin + if (rvfi_insn[12:2] == 11'h0) begin + // Align with pseudo-mnemonic used by GNU binutils and LLVM's MC layer + decode_mnemonic("c.unimp"); + end else begin + decode_ciw_insn("c.addi4spn"); + end + end + INSN_CLW: decode_compressed_load_insn("c.lw"); + INSN_CSW: decode_compressed_store_insn("c.sw"); + INSN_CCLC: decode_compressed_load_insn("c.CH.clc"); + INSN_CCSC: decode_compressed_store_insn("c.CH.csc"); + // C1 Opcodes + INSN_CADDI: decode_ci_caddi_insn("c.addi"); + INSN_CJAL: decode_cj_insn("c.jal"); + INSN_CJ: decode_cj_insn("c.j"); + INSN_CLI: decode_ci_cli_insn("c.li"); + INSN_CLUI: begin + // These two instructions share opcode + if (rvfi_insn[11:7] == 5'd2) begin + decode_ci_caddi16sp_insn("c.addi16sp"); + end else begin + decode_ci_clui_insn("c.lui"); + end + end + INSN_CSRLI: decode_cb_sr_insn("c.srli"); + INSN_CSRAI: decode_cb_sr_insn("c.srai"); + INSN_CANDI: decode_cb_insn("c.andi"); + INSN_CSUB: decode_cs_insn("c.sub"); + INSN_CXOR: decode_cs_insn("c.xor"); + INSN_COR: decode_cs_insn("c.or"); + INSN_CAND: decode_cs_insn("c.and"); + INSN_CBEQZ: decode_cb_insn("c.beqz"); + INSN_CBNEZ: decode_cb_insn("c.bnez"); + // C2 Opcodes + INSN_CSLLI: decode_ci_cslli_insn("c.slli"); + INSN_CLWSP: decode_compressed_load_insn("c.lwsp"); + INSN_SWSP: decode_compressed_store_insn("c.swsp"); + INSN_CCLCSP: decode_compressed_load_insn("c.CH.clcsp"); + INSN_CCSCSP: decode_compressed_store_insn("c.CH.cscsp"); + default: decode_mnemonic("INVALID"); + endcase + end + end else begin + unique casez (rvfi_insn) + // Regular opcodes + INSN_LUI: decode_u_insn("lui"); + // INSN_AUIPC: decode_u_insn("auipc"); + INSN_JAL: decode_j_insn("jal"); + INSN_JALR: decode_i_jalr_insn("jalr"); + // BRANCH + INSN_BEQ: decode_b_insn("beq"); + INSN_BNE: decode_b_insn("bne"); + INSN_BLT: decode_b_insn("blt"); + INSN_BGE: decode_b_insn("bge"); + INSN_BLTU: decode_b_insn("bltu"); + INSN_BGEU: decode_b_insn("bgeu"); + // OPIMM + INSN_ADDI: begin + if (rvfi_insn == 32'h00_00_00_13) begin + // TODO: objdump doesn't decode this as nop currently, even though it would be helpful + // Decide what to do here: diverge from objdump, or make the trace less readable to + // users. + //decode_mnemonic("nop"); + decode_i_insn("addi"); + end else begin + decode_i_insn("addi"); + end + end + INSN_SLTI: decode_i_insn("slti"); + INSN_SLTIU: decode_i_insn("sltiu"); + INSN_XORI: decode_i_insn("xori"); + INSN_ORI: decode_i_insn("ori"); + // Unlike the ratified v.1.0.0 bitmanip extension, the v.0.94 draft extension continues to + // define the pseudo-instruction + // zext.b rd rs = andi rd, rs, 255. + // However, for now the tracer doesn't emit this due to a lack of support in the LLVM and + // GCC toolchains. Enabling this functionality when the time is right is tracked in + // https://github.com/lowRISC/ibex/issues/1228 + INSN_ANDI: decode_i_insn("andi"); + // INSN_ANDI:begin + // casez (rvfi_insn) + // INSN_ZEXTB: decode_r1_insn("zext.b"); + // default: decode_i_insn("andi"); + // endcase + // end + INSN_SLLI: decode_i_shift_insn("slli"); + INSN_SRLI: decode_i_shift_insn("srli"); + INSN_SRAI: decode_i_shift_insn("srai"); + // OP + INSN_ADD: decode_r_insn("add"); + INSN_SUB: decode_r_insn("sub"); + INSN_SLL: decode_r_insn("sll"); + INSN_SLT: decode_r_insn("slt"); + INSN_SLTU: decode_r_insn("sltu"); + INSN_XOR: decode_r_insn("xor"); + INSN_SRL: decode_r_insn("srl"); + INSN_SRA: decode_r_insn("sra"); + INSN_OR: decode_r_insn("or"); + INSN_AND: decode_r_insn("and"); + // SYSTEM (CSR manipulation) + INSN_CSRRW: decode_csr_insn("csrrw"); + INSN_CSRRS: decode_csr_insn("csrrs"); + INSN_CSRRC: decode_csr_insn("csrrc"); + INSN_CSRRWI: decode_csr_insn("csrrwi"); + INSN_CSRRSI: decode_csr_insn("csrrsi"); + INSN_CSRRCI: decode_csr_insn("csrrci"); + // SYSTEM (others) + INSN_ECALL: decode_mnemonic("ecall"); + INSN_EBREAK: decode_mnemonic("ebreak"); + INSN_MRET: decode_mnemonic("mret"); + INSN_DRET: decode_mnemonic("dret"); + INSN_WFI: decode_mnemonic("wfi"); + // RV32M + INSN_PMUL: decode_r_insn("mul"); + INSN_PMUH: decode_r_insn("mulh"); + INSN_PMULHSU: decode_r_insn("mulhsu"); + INSN_PMULHU: decode_r_insn("mulhu"); + INSN_DIV: decode_r_insn("div"); + INSN_DIVU: decode_r_insn("divu"); + INSN_REM: decode_r_insn("rem"); + INSN_REMU: decode_r_insn("remu"); + // LOAD & STORE + INSN_LOAD: decode_load_insn(); + INSN_STORE: decode_store_insn(); + // MISC-MEM + INSN_FENCE: decode_fence(); + INSN_FENCEI: decode_mnemonic("fence.i"); + // RV32B - ZBA + INSN_SH1ADD: decode_r_insn("sh1add"); + INSN_SH2ADD: decode_r_insn("sh2add"); + INSN_SH3ADD: decode_r_insn("sh3add"); + // RV32B - ZBB + INSN_RORI: decode_i_shift_insn("rori"); + INSN_ROL: decode_r_insn("rol"); + INSN_ROR: decode_r_insn("ror"); + INSN_MIN: decode_r_insn("min"); + INSN_MAX: decode_r_insn("max"); + INSN_MINU: decode_r_insn("minu"); + INSN_MAXU: decode_r_insn("maxu"); + INSN_XNOR: decode_r_insn("xnor"); + INSN_ORN: decode_r_insn("orn"); + INSN_ANDN: decode_r_insn("andn"); + // The ratified v.1.0.0 bitmanip extension defines the pseudo-instruction + // zext.h rd rs = pack rd, rs, zero. + // However, for now the tracer doesn't emit this due to a lack of support in the LLVM and + // GCC toolchains. Enabling this functionality when the time is right is tracked in + // https://github.com/lowRISC/ibex/issues/1228 + INSN_PACK: decode_r_insn("pack"); + // INSN_PACK: begin + // casez (rvfi_insn) + // INSN_ZEXTH: decode_r1_insn("zext.h"); + // default: decode_r_insn("pack"); + // endcase + // end + INSN_PACKH: decode_r_insn("packh"); + INSN_PACKU: decode_r_insn("packu"); + INSN_CLZ: decode_r1_insn("clz"); + INSN_CTZ: decode_r1_insn("ctz"); + INSN_CPOP: decode_r1_insn("cpop"); + INSN_SEXTB: decode_r1_insn("sext.b"); + INSN_SEXTH: decode_r1_insn("sext.h"); + // RV32B - ZBS + INSN_BCLRI: decode_i_insn("bclri"); + INSN_BSETI: decode_i_insn("bseti"); + INSN_BINVI: decode_i_insn("binvi"); + INSN_BEXTI: decode_i_insn("bexti"); + INSN_BCLR: decode_r_insn("bclr"); + INSN_BSET: decode_r_insn("bset"); + INSN_BINV: decode_r_insn("binv"); + INSN_BEXT: decode_r_insn("bext"); + // RV32B - ZBE + INSN_BDECOMPRESS: decode_r_insn("bdecompress"); + INSN_BCOMPRESS: decode_r_insn("bcompress"); + // RV32B - ZBP + INSN_GREV: decode_r_insn("grev"); + INSN_GREVI: begin + unique casez (rvfi_insn) + INSN_REV_P: decode_r1_insn("rev.p"); + INSN_REV2_N: decode_r1_insn("rev2.n"); + INSN_REV_N: decode_r1_insn("rev.n"); + INSN_REV4_B: decode_r1_insn("rev4.b"); + INSN_REV2_B: decode_r1_insn("rev2.b"); + INSN_REV_B: decode_r1_insn("rev.b"); + INSN_REV8_H: decode_r1_insn("rev8.h"); + INSN_REV4_H: decode_r1_insn("rev4.h"); + INSN_REV2_H: decode_r1_insn("rev2.h"); + INSN_REV_H: decode_r1_insn("rev.h"); + INSN_REV16: decode_r1_insn("rev16"); + INSN_REV8: decode_r1_insn("rev8"); + INSN_REV4: decode_r1_insn("rev4"); + INSN_REV2: decode_r1_insn("rev2"); + INSN_REV: decode_r1_insn("rev"); + default: decode_i_insn("grevi"); + endcase + end + INSN_GORC: decode_r_insn("gorc"); + INSN_GORCI: begin + unique casez (rvfi_insn) + INSN_ORC_P: decode_r1_insn("orc.p"); + INSN_ORC2_N: decode_r1_insn("orc2.n"); + INSN_ORC_N: decode_r1_insn("orc.n"); + INSN_ORC4_B: decode_r1_insn("orc4.b"); + INSN_ORC2_B: decode_r1_insn("orc2.b"); + INSN_ORC_B: decode_r1_insn("orc.b"); + INSN_ORC8_H: decode_r1_insn("orc8.h"); + INSN_ORC4_H: decode_r1_insn("orc4.h"); + INSN_ORC2_H: decode_r1_insn("orc2.h"); + INSN_ORC_H: decode_r1_insn("orc.h"); + INSN_ORC16: decode_r1_insn("orc16"); + INSN_ORC8: decode_r1_insn("orc8"); + INSN_ORC4: decode_r1_insn("orc4"); + INSN_ORC2: decode_r1_insn("orc2"); + INSN_ORC: decode_r1_insn("orc"); + default: decode_i_insn("gorci"); + endcase + end + INSN_SHFL: decode_r_insn("shfl"); + INSN_SHFLI: begin + unique casez (rvfi_insn) + INSN_ZIP_N: decode_r1_insn("zip.n"); + INSN_ZIP2_B: decode_r1_insn("zip2.b"); + INSN_ZIP_B: decode_r1_insn("zip.b"); + INSN_ZIP4_H: decode_r1_insn("zip4.h"); + INSN_ZIP2_H: decode_r1_insn("zip2.h"); + INSN_ZIP_H: decode_r1_insn("zip.h"); + INSN_ZIP8: decode_r1_insn("zip8"); + INSN_ZIP4: decode_r1_insn("zip4"); + INSN_ZIP2: decode_r1_insn("zip2"); + INSN_ZIP: decode_r1_insn("zip"); + default: decode_i_insn("shfli"); + endcase + end + INSN_UNSHFL: decode_r_insn("unshfl"); + INSN_UNSHFLI: begin + unique casez (rvfi_insn) + INSN_UNZIP_N: decode_r1_insn("unzip.n"); + INSN_UNZIP2_B: decode_r1_insn("unzip2.b"); + INSN_UNZIP_B: decode_r1_insn("unzip.b"); + INSN_UNZIP4_H: decode_r1_insn("unzip4.h"); + INSN_UNZIP2_H: decode_r1_insn("unzip2.h"); + INSN_UNZIP_H: decode_r1_insn("unzip.h"); + INSN_UNZIP8: decode_r1_insn("unzip8"); + INSN_UNZIP4: decode_r1_insn("unzip4"); + INSN_UNZIP2: decode_r1_insn("unzip2"); + INSN_UNZIP: decode_r1_insn("unzip"); + default: decode_i_insn("unshfli"); + endcase + end + INSN_XPERM_N: decode_r_insn("xperm_n"); + INSN_XPERM_B: decode_r_insn("xperm_b"); + INSN_XPERM_H: decode_r_insn("xperm_h"); + INSN_SLO: decode_r_insn("slo"); + INSN_SRO: decode_r_insn("sro"); + INSN_SLOI: decode_i_shift_insn("sloi"); + INSN_SROI: decode_i_shift_insn("sroi"); + + // RV32B - ZBT + INSN_CMIX: decode_r_cmixcmov_insn("cmix"); + INSN_CMOV: decode_r_cmixcmov_insn("cmov"); + INSN_FSR: decode_r_funnelshift_insn("fsr"); + INSN_FSL: decode_r_funnelshift_insn("fsl"); + INSN_FSRI: decode_i_funnelshift_insn("fsri"); + + // RV32B - ZBF + INSN_BFP: decode_r_insn("bfp"); + + // RV32B - ZBC + INSN_CLMUL: decode_r_insn("clmul"); + INSN_CLMULR: decode_r_insn("clmulr"); + INSN_CLMULH: decode_r_insn("clmulh"); + + // RV32B - ZBR + INSN_CRC32_B: decode_r1_insn("crc32.b"); + INSN_CRC32_H: decode_r1_insn("crc32.h"); + INSN_CRC32_W: decode_r1_insn("crc32.w"); + INSN_CRC32C_B: decode_r1_insn("crc32c.b"); + INSN_CRC32C_H: decode_r1_insn("crc32c.h"); + INSN_CRC32C_W: decode_r1_insn("crc32c.w"); + + // CHERI, get fields + INSN_CHGETPERM: decode_cheri_rd_cs1_insn("CH.cgetperm"); + INSN_CHGETTYPE: decode_cheri_rd_cs1_insn("CH.cgettype"); + INSN_CHGETBASE: decode_cheri_rd_cs1_insn("CH.cgetbase"); + INSN_CHGETTOP: decode_cheri_rd_cs1_insn("CH.cgettop"); + INSN_CHGETLEN: decode_cheri_rd_cs1_insn("CH.cgetlen"); + INSN_CHGETTAG: decode_cheri_rd_cs1_insn("CH.cgettag"); + INSN_CHGETSEALED: decode_cheri_rd_cs1_insn("CH.cgetseald"); + INSN_CHGETADDR: decode_cheri_rd_cs1_insn("CH.cgetaddr"); + INSN_CHGETHIGH: decode_cheri_rd_cs1_insn("CH.cgethigh"); + + INSN_CHSEAL: decode_cheri_cd_cs1_cs2_insn("CH.cseal"); + INSN_CHUNSEAL: decode_cheri_cd_cs1_cs2_insn("CH.cunseal"); + INSN_CHANDPERM: decode_cheri_cd_cs1_rs2_insn("CH.candperm"); + INSN_CHSETADDR: decode_cheri_cd_cs1_rs2_insn("CH.csetaddr"); + INSN_CHINCADDR: decode_cheri_cd_cs1_rs2_insn("CH.cincaddr"); + INSN_CHINCADDRIMM: decode_cheri_cd_cs1_imm_insn("CH.cincaddrimm"); + INSN_CHSETBOUNDS: decode_cheri_cd_cs1_rs2_insn("CH.csetbounds"); + INSN_CHSETBOUNDSEX: decode_cheri_cd_cs1_rs2_insn("CH.csetboundsexact"); + INSN_CHSETBOUNDSRNDN: decode_cheri_cd_cs1_rs2_insn("CH.csetboundsrounddown"); + + INSN_CHSETBOUNDSIMM: decode_cheri_cd_cs1_imm_insn("CH.csetboundsimm"); + INSN_CHCLEARTAG: decode_cheri_cd_cs1_insn("CH.ccleartag"); + INSN_CHCRRL: decode_cheri_rd_rs1_insn("CH.crrl"); + INSN_CHCRAM: decode_cheri_rd_rs1_insn("CH.cram"); + + INSN_CHSUB: decode_cheri_rd_cs1_cs2_insn("CH.csub"); + INSN_CHMOVE: decode_cheri_cd_cs1_insn("CH.cmove"); + INSN_CHTESTSUB: decode_cheri_rd_cs1_cs2_insn("CH.ctestsubset"); + INSN_CHSETEQUAL: decode_cheri_rd_cs1_cs2_insn("CH.csetequalexact"); + INSN_CHSETHIGH: decode_cheri_cd_cs1_rs2_insn("CH.csethigh"); + //INSN_CHJALR: decode_cheri_cd_cs1_insn("CH.jalr"); + INSN_CHCSRRW: decode_cheri_scrrw_insn(); + INSN_AUIPC: decode_cheri_auipcc_insn(); + INSN_AUICGP: decode_cheri_auicgp_insn(); + + default: decode_mnemonic("INVALID"); + endcase + end + end +// synthesis translate_on + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_tracer_pkg.sv b/hw/ip/cheriot-ibex/rtl/cheriot_tracer_pkg.sv new file mode 100644 index 0000000..ce0fed8 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_tracer_pkg.sv
@@ -0,0 +1,379 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Copyright lowRISC contributors. +// Copyright 2017 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +package cheriot_tracer_pkg; + import cheriot_pkg::*; + + parameter logic [1:0] OPCODE_C0 = 2'b00; + parameter logic [1:0] OPCODE_C1 = 2'b01; + parameter logic [1:0] OPCODE_C2 = 2'b10; + + // instruction masks (for tracer) + parameter logic [31:0] INSN_LUI = { 25'h?, {OPCODE_LUI } }; + parameter logic [31:0] INSN_AUIPC = { 25'h?, {OPCODE_AUIPC} }; + parameter logic [31:0] INSN_JAL = { 25'h?, {OPCODE_JAL } }; + parameter logic [31:0] INSN_JALR = { 17'h?, 3'b000, 5'h?, {OPCODE_JALR } }; + + // BRANCH + parameter logic [31:0] INSN_BEQ = { 17'h?, 3'b000, 5'h?, {OPCODE_BRANCH} }; + parameter logic [31:0] INSN_BNE = { 17'h?, 3'b001, 5'h?, {OPCODE_BRANCH} }; + parameter logic [31:0] INSN_BLT = { 17'h?, 3'b100, 5'h?, {OPCODE_BRANCH} }; + parameter logic [31:0] INSN_BGE = { 17'h?, 3'b101, 5'h?, {OPCODE_BRANCH} }; + parameter logic [31:0] INSN_BLTU = { 17'h?, 3'b110, 5'h?, {OPCODE_BRANCH} }; + parameter logic [31:0] INSN_BGEU = { 17'h?, 3'b111, 5'h?, {OPCODE_BRANCH} }; + + // OPIMM + parameter logic [31:0] INSN_ADDI = { 17'h?, 3'b000, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_SLTI = { 17'h?, 3'b010, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_SLTIU = { 17'h?, 3'b011, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_XORI = { 17'h?, 3'b100, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ORI = { 17'h?, 3'b110, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ANDI = { 17'h?, 3'b111, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_SLLI = { 7'b0000000, 10'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_SRLI = { 7'b0000000, 10'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_SRAI = { 7'b0100000, 10'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + + // OP + parameter logic [31:0] INSN_ADD = { 7'b0000000, 10'h?, 3'b000, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_SUB = { 7'b0100000, 10'h?, 3'b000, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_SLL = { 7'b0000000, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_SLT = { 7'b0000000, 10'h?, 3'b010, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_SLTU = { 7'b0000000, 10'h?, 3'b011, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_XOR = { 7'b0000000, 10'h?, 3'b100, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_SRL = { 7'b0000000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_SRA = { 7'b0100000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_OR = { 7'b0000000, 10'h?, 3'b110, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_AND = { 7'b0000000, 10'h?, 3'b111, 5'h?, {OPCODE_OP} }; + + // SYSTEM + parameter logic [31:0] INSN_CSRRW = { 17'h?, 3'b001, 5'h?, {OPCODE_SYSTEM} }; + parameter logic [31:0] INSN_CSRRS = { 17'h?, 3'b010, 5'h?, {OPCODE_SYSTEM} }; + parameter logic [31:0] INSN_CSRRC = { 17'h?, 3'b011, 5'h?, {OPCODE_SYSTEM} }; + parameter logic [31:0] INSN_CSRRWI = { 17'h?, 3'b101, 5'h?, {OPCODE_SYSTEM} }; + parameter logic [31:0] INSN_CSRRSI = { 17'h?, 3'b110, 5'h?, {OPCODE_SYSTEM} }; + parameter logic [31:0] INSN_CSRRCI = { 17'h?, 3'b111, 5'h?, {OPCODE_SYSTEM} }; + parameter logic [31:0] INSN_ECALL = { 12'b000000000000, 13'b0, {OPCODE_SYSTEM} }; + parameter logic [31:0] INSN_EBREAK = { 12'b000000000001, 13'b0, {OPCODE_SYSTEM} }; + parameter logic [31:0] INSN_MRET = { 12'b001100000010, 13'b0, {OPCODE_SYSTEM} }; + parameter logic [31:0] INSN_DRET = { 12'b011110110010, 13'b0, {OPCODE_SYSTEM} }; + parameter logic [31:0] INSN_WFI = { 12'b000100000101, 13'b0, {OPCODE_SYSTEM} }; + + // RV32M + parameter logic [31:0] INSN_DIV = { 7'b0000001, 10'h?, 3'b100, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_DIVU = { 7'b0000001, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_REM = { 7'b0000001, 10'h?, 3'b110, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_REMU = { 7'b0000001, 10'h?, 3'b111, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_PMUL = { 7'b0000001, 10'h?, 3'b000, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_PMUH = { 7'b0000001, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_PMULHSU = { 7'b0000001, 10'h?, 3'b010, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_PMULHU = { 7'b0000001, 10'h?, 3'b011, 5'h?, {OPCODE_OP} }; + + // RV32B + // ZBA + parameter logic [31:0] INSN_SH1ADD = { 7'b0010000, 10'h?, 3'b010, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_SH2ADD = { 7'b0010000, 10'h?, 3'b100, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_SH3ADD = { 7'b0010000, 10'h?, 3'b110, 5'h?, {OPCODE_OP} }; + + // ZBB + // Only log2(XLEN) bits of the immediate are used. For RV32, this means only the bits in + // instr[24:20] are effectively used. Whenever instr[26] is set, sroi/rori is instead decoded as + // fsri. + parameter logic [31:0] INSN_RORI = { 5'b01100 , 1'b0, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_CLZ = { 12'b011000000000, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_CTZ = { 12'b011000000001, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_CPOP = { 12'b011000000010, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_SEXTB = { 12'b011000000100, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_SEXTH = { 12'b011000000101, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + + // The zext.h and zext.b pseudo-instructions are defined in the ratified v.1.0.0 and draft v.0.94 + // specifications of the bitmanip extension, respectively. They are currently not emitted by the + // tracer due to a lack of support in the LLVM and GCC toolchains. Enabling this functionality + // when the time is right is tracked in https://github.com/lowRISC/ibex/issues/1228 + // zext.b -- pseudo-instruction: andi rd, rs 255 + // parameter logic [31:0] INSN_ZEXTB = + // { 4'b0000, 8'b11111111, 5'h?, 3'b111, 5'h?, {OPCODE_OP_IMM} }; + // zext.h -- pseudo-instruction: pack rd, rs zero + // parameter logic [31:0] INSN_ZEXTH = { 7'b0000100, 5'b00000, 5'h?, 3'b100, 5'h?, {OPCODE_OP} }; + + parameter logic [31:0] INSN_ROL = { 7'b0110000, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_ROR = { 7'b0110000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_MIN = { 7'b0000101, 10'h?, 3'b100, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_MAX = { 7'b0000101, 10'h?, 3'b110, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_MINU = { 7'b0000101, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_MAXU = { 7'b0000101, 10'h?, 3'b111, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_XNOR = { 7'b0100000, 10'h?, 3'b100, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_ORN = { 7'b0100000, 10'h?, 3'b110, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_ANDN = { 7'b0100000, 10'h?, 3'b111, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_PACK = { 7'b0000100, 10'h?, 3'b100, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_PACKU = { 7'b0100100, 10'h?, 3'b100, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_PACKH = { 7'b0000100, 10'h?, 3'b111, 5'h?, {OPCODE_OP} }; + + // ZBS + parameter logic [31:0] INSN_BCLRI = { 5'b01001, 12'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_BSETI = { 5'b00101, 12'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_BINVI = { 5'b01101, 12'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + // Only log2(XLEN) bits of the immediate are used. For RV32, this means only the bits in + // instr[24:20] are effectively used. Whenever instr[26] is set, bexti is instead decoded as fsri. + parameter logic [31:0] INSN_BEXTI = { 5'b01001, 1'b0, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + + parameter logic [31:0] INSN_BCLR = { 7'b0100100, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_BSET = { 7'b0010100, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_BINV = { 7'b0110100, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_BEXT = { 7'b0100100, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; + + // ZBP + // grevi + // Only log2(XLEN) bits of the immediate are used. For RV32, this means only the bits in + // instr[24:20] are effectively used. Whenever instr[26] is set, grevi is instead decoded as fsri. + parameter logic [31:0] INSN_GREVI = { 5'b01101, 1'b0, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + // grevi -- pseudo-instructions + parameter logic [31:0] INSN_REV_P = + { 5'b01101, 1'b0, 1'b?, 5'b00001, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_REV2_N = + { 5'b01101, 1'b0, 1'b?, 5'b00010, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_REV_N = + { 5'b01101, 1'b0, 1'b?, 5'b00011, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_REV4_B = + { 5'b01101, 1'b0, 1'b?, 5'b00100, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_REV2_B = + { 5'b01101, 1'b0, 1'b?, 5'b00110, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_REV_B = + { 5'b01101, 1'b0, 1'b?, 5'b00111, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_REV8_H = + { 5'b01101, 1'b0, 1'b?, 5'b01000, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_REV4_H = + { 5'b01101, 1'b0, 1'b?, 5'b01100, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_REV2_H = + { 5'b01101, 1'b0, 1'b?, 5'b01110, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_REV_H = + { 5'b01101, 1'b0, 1'b?, 5'b01111, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_REV16 = + { 5'b01101, 1'b0, 1'b?, 5'b10000, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_REV8 = + { 5'b01101, 1'b0, 1'b?, 5'b11000, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_REV4 = + { 5'b01101, 1'b0, 1'b?, 5'b11100, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_REV2 = + { 5'b01101, 1'b0, 1'b?, 5'b11110, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_REV = + { 5'b01101, 1'b0, 1'b?, 5'b11111, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + // gorci + // Only log2(XLEN) bits of the immediate are used. For RV32, this means only the bits in + // instr[24:20] are effectively used. Whenever instr[26] is set, gorci is instead decoded as fsri. + parameter logic [31:0] INSN_GORCI = { 5'b00101, 1'b0, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + // gorci -- pseudo-instructions + parameter logic [31:0] INSN_ORC_P = + { 5'b00101, 1'b0, 1'b?, 5'b00001, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ORC2_N = + { 5'b00101, 1'b0, 1'b?, 5'b00010, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ORC_N = + { 5'b00101, 1'b0, 1'b?, 5'b00011, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ORC4_B = + { 5'b00101, 1'b0, 1'b?, 5'b00100, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ORC2_B = + { 5'b00101, 1'b0, 1'b?, 5'b00110, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ORC_B = + { 5'b00101, 1'b0, 1'b?, 5'b00111, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ORC8_H = + { 5'b00101, 1'b0, 1'b?, 5'b01000, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ORC4_H = + { 5'b00101, 1'b0, 1'b?, 5'b01100, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ORC2_H = + { 5'b00101, 1'b0, 1'b?, 5'b01110, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ORC_H = + { 5'b00101, 1'b0, 1'b?, 5'b01111, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ORC16 = + { 5'b00101, 1'b0, 1'b?, 5'b10000, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ORC8 = + { 5'b00101, 1'b0, 1'b?, 5'b11000, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ORC4 = + { 5'b00101, 1'b0, 1'b?, 5'b11100, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ORC2 = + { 5'b00101, 1'b0, 1'b?, 5'b11110, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ORC = + { 5'b00101, 1'b0, 1'b?, 5'b11111, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + // shfli + parameter logic [31:0] INSN_SHFLI = { 6'b000010, 11'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + // shfli -- pseudo-instructions + parameter logic [31:0] INSN_ZIP_N = + { 6'b000010, 2'h?, 4'b0001, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ZIP2_B = + { 6'b000010, 2'h?, 4'b0010, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ZIP_B = + { 6'b000010, 2'h?, 4'b0011, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ZIP4_H = + { 6'b000010, 2'h?, 4'b0100, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ZIP2_H = + { 6'b000010, 2'h?, 4'b0110, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ZIP_H = + { 6'b000010, 2'h?, 4'b0111, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ZIP8 = + { 6'b000010, 2'h?, 4'b1000, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ZIP4 = + { 6'b000010, 2'h?, 4'b1100, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ZIP2 = + { 6'b000010, 2'h?, 4'b1110, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_ZIP = + { 6'b000010, 2'h?, 4'b1111, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + // unshfli + parameter logic [31:0] INSN_UNSHFLI = { 6'b000010, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + // unshfli -- pseudo-instructions + parameter logic [31:0] INSN_UNZIP_N = + { 6'b000010, 2'h?, 4'b0001, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_UNZIP2_B = + { 6'b000010, 2'h?, 4'b0010, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_UNZIP_B = + { 6'b000010, 2'h?, 4'b0011, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_UNZIP4_H = + { 6'b000010, 2'h?, 4'b0100, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_UNZIP2_H = + { 6'b000010, 2'h?, 4'b0110, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_UNZIP_H = + { 6'b000010, 2'h?, 4'b0111, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_UNZIP8 = + { 6'b000010, 2'h?, 4'b1000, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_UNZIP4 = + { 6'b000010, 2'h?, 4'b1100, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_UNZIP2 = + { 6'b000010, 2'h?, 4'b1110, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_UNZIP = + { 6'b000010, 2'h?, 4'b1111, 5'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + + parameter logic [31:0] INSN_GREV = { 7'b0110100, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_GORC = { 7'b0010100, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_SHFL = { 7'b0000100, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_UNSHFL = { 7'b0000100, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; + + parameter logic [31:0] INSN_XPERM_N = { 7'b0010100, 10'h?, 3'b010, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_XPERM_B = { 7'b0010100, 10'h?, 3'b100, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_XPERM_H = { 7'b0010100, 10'h?, 3'b110, 5'h?, {OPCODE_OP} }; + + parameter logic [31:0] INSN_SLO = { 7'b0010000, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_SRO = { 7'b0010000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_SLOI = { 5'b00100 , 12'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + // Only log2(XLEN) bits of the immediate are used. For RV32, this means only the bits in + // instr[24:20] are effectively used. Whenever instr[26] is set, sroi/rori is instead decoded as + // fsri. + parameter logic [31:0] INSN_SROI = { 5'b00100 , 1'b0, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + + // ZBE + parameter logic [31:0] INSN_BDECOMPRESS = {7'b0100100, 10'h?, 3'b110, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_BCOMPRESS = {7'b0000100, 10'h?, 3'b110, 5'h?, {OPCODE_OP} }; + + // ZBT + parameter logic [31:0] INSN_FSRI = { 5'h?, 1'b1, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + + parameter logic [31:0] INSN_CMIX = {5'h?, 2'b11, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_CMOV = {5'h?, 2'b11, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_FSL = {5'h?, 2'b10, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_FSR = {5'h?, 2'b10, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; + + // ZBF + parameter logic [31:0] INSN_BFP = {7'b0100100, 10'h?, 3'b111, 5'h?, {OPCODE_OP} }; + + // ZBC + parameter logic [31:0] INSN_CLMUL = {7'b0000101, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_CLMULR = {7'b0000101, 10'h?, 3'b010, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_CLMULH = {7'b0000101, 10'h?, 3'b011, 5'h?, {OPCODE_OP} }; + + // ZBR + parameter logic [31:0] INSN_CRC32_B = + {7'b0110000, 5'b10000, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_CRC32_H = + {7'b0110000, 5'b10001, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_CRC32_W = + {7'b0110000, 5'b10010, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_CRC32C_B = + {7'b0110000, 5'b11000, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_CRC32C_H = + {7'b0110000, 5'b11001, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_CRC32C_W = + {7'b0110000, 5'b11010, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + + // LOAD & STORE + parameter logic [31:0] INSN_LOAD = {25'h?, {OPCODE_LOAD } }; + parameter logic [31:0] INSN_STORE = {25'h?, {OPCODE_STORE} }; + + // MISC-MEM + parameter logic [31:0] INSN_FENCE = { 17'h?, 3'b000, 5'h?, {OPCODE_MISC_MEM} }; + parameter logic [31:0] INSN_FENCEI = { 17'h0, 3'b001, 5'h0, {OPCODE_MISC_MEM} }; + + // Compressed Instructions + // C0 + parameter logic [15:0] INSN_CADDI4SPN = { 3'b000, 11'h?, {OPCODE_C0} }; + parameter logic [15:0] INSN_CLW = { 3'b010, 11'h?, {OPCODE_C0} }; + parameter logic [15:0] INSN_CSW = { 3'b110, 11'h?, {OPCODE_C0} }; + parameter logic [15:0] INSN_CCLC = { 3'b011, 11'h?, {OPCODE_C0} }; + parameter logic [15:0] INSN_CCSC = { 3'b111, 11'h?, {OPCODE_C0} }; + + // C1 + parameter logic [15:0] INSN_CADDI = { 3'b000, 11'h?, {OPCODE_C1} }; + parameter logic [15:0] INSN_CJAL = { 3'b001, 11'h?, {OPCODE_C1} }; + parameter logic [15:0] INSN_CJ = { 3'b101, 11'h?, {OPCODE_C1} }; + parameter logic [15:0] INSN_CLI = { 3'b010, 11'h?, {OPCODE_C1} }; + parameter logic [15:0] INSN_CLUI = { 3'b011, 11'h?, {OPCODE_C1} }; + parameter logic [15:0] INSN_CBEQZ = { 3'b110, 11'h?, {OPCODE_C1} }; + parameter logic [15:0] INSN_CBNEZ = { 3'b111, 11'h?, {OPCODE_C1} }; + parameter logic [15:0] INSN_CSRLI = { 3'b100, 1'h?, 2'b00, 8'h?, {OPCODE_C1} }; + parameter logic [15:0] INSN_CSRAI = { 3'b100, 1'h?, 2'b01, 8'h?, {OPCODE_C1} }; + parameter logic [15:0] INSN_CANDI = { 3'b100, 1'h?, 2'b10, 8'h?, {OPCODE_C1} }; + parameter logic [15:0] INSN_CSUB = { 3'b100, 1'b0, 2'b11, 3'h?, 2'b00, 3'h?, {OPCODE_C1} }; + parameter logic [15:0] INSN_CXOR = { 3'b100, 1'b0, 2'b11, 3'h?, 2'b01, 3'h?, {OPCODE_C1} }; + parameter logic [15:0] INSN_COR = { 3'b100, 1'b0, 2'b11, 3'h?, 2'b10, 3'h?, {OPCODE_C1} }; + parameter logic [15:0] INSN_CAND = { 3'b100, 1'b0, 2'b11, 3'h?, 2'b11, 3'h?, {OPCODE_C1} }; + + // C2 + parameter logic [15:0] INSN_CSLLI = { 3'b000, 11'h?, {OPCODE_C2} }; + parameter logic [15:0] INSN_CLWSP = { 3'b010, 11'h?, {OPCODE_C2} }; + parameter logic [15:0] INSN_SWSP = { 3'b110, 11'h?, {OPCODE_C2} }; + parameter logic [15:0] INSN_CMV = { 3'b100, 1'b0, 10'h?, {OPCODE_C2} }; + parameter logic [15:0] INSN_CADD = { 3'b100, 1'b1, 10'h?, {OPCODE_C2} }; + parameter logic [15:0] INSN_CEBREAK = { 3'b100, 1'b1, 5'h0, 5'h0, {OPCODE_C2} }; + parameter logic [15:0] INSN_CJR = { 3'b100, 1'b0, 5'h0, 5'h0, {OPCODE_C2} }; + parameter logic [15:0] INSN_CJALR = { 3'b100, 1'b1, 5'h?, 5'h0, {OPCODE_C2} }; + parameter logic [15:0] INSN_CCLCSP = { 3'b011, 11'h?, {OPCODE_C2} }; // FLWSP + parameter logic [15:0] INSN_CCSCSP = { 3'b111, 11'h?, {OPCODE_C2} }; // FSWSP + + // 32-bit CHERI instructions + parameter logic [31:0] INSN_CHGETPERM = {7'h7f, 5'h0, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHGETTYPE = {7'h7f, 5'h1, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHGETBASE = {7'h7f, 5'h2, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHGETHIGH = {7'h7f, 5'h17, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHGETTOP = {7'h7f, 5'h18, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHGETLEN = {7'h7f, 5'h3, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHGETTAG = {7'h7f, 5'h4, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHGETSEALED = {7'h7f, 5'h5, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHGETADDR = {7'h7f, 5'hf, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + + parameter logic [31:0] INSN_CHSEAL = {7'h0b, 10'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHUNSEAL = {7'h0c, 10'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHANDPERM = {7'h0d, 10'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHSETADDR = {7'h10, 10'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHINCADDR = {7'h11, 10'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHINCADDRIMM = {12'h?, 5'h?, 3'b001, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHSETBOUNDS = {7'h08, 10'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHSETBOUNDSEX = {7'h09, 10'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHSETBOUNDSRNDN = {7'h0a, 10'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHSETBOUNDSIMM = {12'h?, 5'h?, 3'b010, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHCLEARTAG = {7'h7f, 5'hb, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHCRRL = {7'h7f, 5'h8, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHCRAM = {7'h7f, 5'h9, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + + parameter logic [31:0] INSN_CHSUB = {7'h14, 5'h?, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHMOVE = {7'h7f, 5'ha, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHTESTSUB = {7'h20, 5'h?, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHSETEQUAL = {7'h21, 5'h?, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_CHSETHIGH = {7'h16, 5'h?, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + + parameter logic [31:0] INSN_CHJALR = {7'h7f, 5'hc, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + + parameter logic [31:0] INSN_CHCSRRW = {7'h01, 5'h?, 5'h?, 3'b000, 5'h?, {OPCODE_CHERI} }; + parameter logic [31:0] INSN_AUICGP = { 25'h?, {OPCODE_AUICGP} }; + +endpackage
diff --git a/hw/ip/cheriot-ibex/rtl/cheriot_wb_stage.sv b/hw/ip/cheriot-ibex/rtl/cheriot_wb_stage.sv new file mode 100644 index 0000000..8ff5461 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriot_wb_stage.sv
@@ -0,0 +1,269 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Writeback Stage + * + * Writeback is an optional third pipeline stage. It writes data back to the register file that was + * produced in the ID/EX stage or awaits a response to a load/store (LSU writes direct to register + * file for load data). If the writeback stage is not present (WritebackStage == 0) this acts as + * a simple passthrough to write data direct to the register file. + */ + +`include "prim_assert.sv" +`include "dv_fcov_macros.svh" + + +module cheriot_wb_stage import cheri_pkg::*; #( + parameter bit ResetAll = 1'b0, + parameter bit WritebackStage = 1'b0 +) ( + input logic clk_i, + input logic rst_ni, + + input logic en_wb_i, + input cheriot_pkg::wb_instr_type_e instr_type_wb_i, + input logic [31:0] pc_id_i, + input logic instr_is_compressed_id_i, + input logic instr_perf_count_id_i, + input logic instr_is_cheri_i, + input logic cheri_load_i, + input logic cheri_store_i, + + output logic ready_wb_o, + output logic rf_write_wb_o, + output logic outstanding_load_wb_o, + output logic outstanding_store_wb_o, + output logic [31:0] pc_wb_o, + output logic perf_instr_ret_wb_o, + output logic perf_instr_ret_compressed_wb_o, + output logic perf_instr_ret_wb_spec_o, + output logic perf_instr_ret_compressed_wb_spec_o, + + input logic [4:0] rf_waddr_id_i, + input logic [31:0] rf_wdata_id_i, + input logic rf_we_id_i, + + input logic cheri_rf_we_i, + input logic [31:0] cheri_rf_wdata_i, + input reg_cap_t cheri_rf_wcap_i, + + input logic [31:0] rf_wdata_lsu_i, + input reg_cap_t rf_wcap_lsu_i, + input logic rf_we_lsu_i, + + output logic [31:0] rf_wdata_fwd_wb_o, + output reg_cap_t rf_wcap_fwd_wb_o, + + output logic [4:0] rf_waddr_wb_o, + output logic [31:0] rf_wdata_wb_o, + output reg_cap_t rf_wcap_wb_o, + output logic rf_we_wb_o, + + input logic lsu_resp_valid_i, + input logic lsu_resp_err_i, + + output logic instr_done_wb_o +); + + import cheriot_pkg::*; + + // 0 == RF write from ID + // 1 == RF write from LSU + logic [31:0] rf_wdata_wb_mux [2]; + logic [1:0] rf_wdata_wb_mux_we; + + reg_cap_t rf_wcap_wb; + + if (WritebackStage) begin : g_writeback_stage + logic [31:0] rf_wdata_wb_q; + logic rf_we_wb_q; + logic [4:0] rf_waddr_wb_q; + + logic wb_done; + + logic wb_valid_q; + logic [31:0] wb_pc_q; + logic wb_compressed_q; + logic wb_count_q; + wb_instr_type_e wb_instr_type_q; + + logic wb_valid_d; + + logic wb_is_cheri_q; + logic wb_cheri_load_q, wb_cheri_store_q; + logic cheri_rf_we_q; + logic [31:0] cheri_rf_wdata_q; + reg_cap_t cheri_rf_wcap_q; + + // Stage becomes valid if an instruction enters for ID/EX and valid is cleared when instruction + // is done + assign wb_valid_d = (en_wb_i & ready_wb_o) | (wb_valid_q & ~wb_done); + + // Writeback for non load/store instructions always completes in a cycle (so instantly done) + // Writeback for load/store must wait for response to be received by the LSU + // Signal only relevant if wb_valid_q set + + // note cheri_load/store doesn't just come from the decoder, but includes bound/permission check results + assign wb_done = (wb_instr_type_q == WB_INSTR_OTHER && ~wb_cheri_load_q && ~wb_cheri_store_q) | lsu_resp_valid_i; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + wb_valid_q <= 1'b0; + end else begin + wb_valid_q <= wb_valid_d; + end + end + + if (ResetAll) begin : g_wb_regs_ra + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + rf_we_wb_q <= '0; + rf_waddr_wb_q <= '0; + rf_wdata_wb_q <= '0; + wb_instr_type_q <= wb_instr_type_e'(0); + wb_pc_q <= '0; + wb_compressed_q <= '0; + wb_count_q <= '0; + + wb_is_cheri_q <= 1'b0; + wb_cheri_load_q <= 1'b0; + wb_cheri_store_q <= 1'b0; + cheri_rf_we_q <= 1'b0; + cheri_rf_wdata_q <= 32'h0; + cheri_rf_wcap_q <= NULL_REG_CAP; + end else if (en_wb_i) begin + rf_we_wb_q <= rf_we_id_i; + rf_waddr_wb_q <= rf_waddr_id_i; + rf_wdata_wb_q <= rf_wdata_id_i; + wb_instr_type_q <= instr_type_wb_i; + wb_pc_q <= pc_id_i; + wb_compressed_q <= instr_is_compressed_id_i; + wb_count_q <= instr_perf_count_id_i; + + wb_is_cheri_q <= instr_is_cheri_i; + wb_cheri_load_q <= cheri_load_i; + wb_cheri_store_q <= cheri_store_i; + cheri_rf_we_q <= cheri_rf_we_i; + cheri_rf_wdata_q <= cheri_rf_wdata_i; + cheri_rf_wcap_q <= cheri_rf_wcap_i; + end + end + end else begin : g_wb_regs_nr + always_ff @(posedge clk_i) begin + if (en_wb_i) begin + rf_we_wb_q <= rf_we_id_i; + rf_waddr_wb_q <= rf_waddr_id_i; + rf_wdata_wb_q <= rf_wdata_id_i; + wb_instr_type_q <= instr_type_wb_i; + wb_pc_q <= pc_id_i; + wb_compressed_q <= instr_is_compressed_id_i; + wb_count_q <= instr_perf_count_id_i; + + wb_is_cheri_q <= instr_is_cheri_i; + wb_cheri_load_q <= cheri_load_i; + wb_cheri_store_q <= cheri_store_i; + cheri_rf_we_q <= cheri_rf_we_i; + cheri_rf_wdata_q <= cheri_rf_wdata_i; + cheri_rf_wcap_q <= cheri_rf_wcap_i; + end + end + end + + assign rf_waddr_wb_o = rf_waddr_wb_q; + assign rf_wdata_wb_mux[0] = wb_is_cheri_q ? cheri_rf_wdata_q : rf_wdata_wb_q; + assign rf_wdata_wb_mux_we[0] = (wb_is_cheri_q ? cheri_rf_we_q : rf_we_wb_q) & wb_valid_q; + + assign ready_wb_o = ~wb_valid_q | wb_done; + + // This is used for determining RF read hazards & forwarding in ID/EX + // Instruction in writeback will be writing to register file if either rf_we is set or writeback + // is awaiting load data. + assign rf_write_wb_o = wb_valid_q & (rf_we_wb_q | cheri_rf_we_q | (wb_instr_type_q == WB_INSTR_LOAD) | wb_cheri_load_q); + + assign outstanding_load_wb_o = wb_valid_q & ((wb_instr_type_q == WB_INSTR_LOAD) | wb_cheri_load_q); + assign outstanding_store_wb_o = wb_valid_q & ((wb_instr_type_q == WB_INSTR_STORE) | wb_cheri_store_q); + + assign pc_wb_o = wb_pc_q; + + assign instr_done_wb_o = wb_valid_q & wb_done; + + // Increment instruction retire counters for valid instructions which are not lsu errors. + // Speculative versions of the signals do not factor in exceptions and whether the instruction + // is done yet. These are used to get correct values for instructions reading the relevant + // performance counters in the ID stage. + assign perf_instr_ret_wb_spec_o = wb_count_q; + assign perf_instr_ret_compressed_wb_spec_o = perf_instr_ret_wb_spec_o & wb_compressed_q; + assign perf_instr_ret_wb_o = instr_done_wb_o & wb_count_q & + ~(lsu_resp_valid_i & lsu_resp_err_i); + assign perf_instr_ret_compressed_wb_o = perf_instr_ret_wb_o & wb_compressed_q; + + // Forward data that will be written to the RF back to ID to resolve data hazards. The flopped + // rf_wdata_wb_q is used rather than rf_wdata_wb_o as the latter includes read data from memory + // that returns too late to be used on the forwarding path. + assign rf_wdata_fwd_wb_o = wb_is_cheri_q ? cheri_rf_wdata_q : rf_wdata_wb_q; + assign rf_wcap_fwd_wb_o = wb_is_cheri_q ? cheri_rf_wcap_q : NULL_REG_CAP; + assign rf_wcap_wb = (wb_is_cheri_q && (~wb_cheri_load_q)) ? cheri_rf_wcap_q : NULL_REG_CAP; + + end else begin : g_bypass_wb + // without writeback stage just pass through register write signals + assign rf_waddr_wb_o = rf_waddr_id_i; + assign rf_wdata_wb_mux[0] = instr_is_cheri_i ? cheri_rf_wdata_i : rf_wdata_id_i; + assign rf_wdata_wb_mux_we[0] = instr_is_cheri_i ? cheri_rf_we_i : rf_we_id_i; + assign rf_wcap_wb = (instr_is_cheri_i && (~cheri_load_i)) ? cheri_rf_wcap_i : NULL_REG_CAP; + + // Increment instruction retire counters for valid instructions which are not lsu errors. + // The speculative signals are always 0 when no writeback stage is present as the raw counter + // values will be correct. + assign perf_instr_ret_wb_spec_o = 1'b0; + assign perf_instr_ret_compressed_wb_spec_o = 1'b0; + assign perf_instr_ret_wb_o = instr_perf_count_id_i & en_wb_i & + ~(lsu_resp_valid_i & lsu_resp_err_i); + assign perf_instr_ret_compressed_wb_o = perf_instr_ret_wb_o & instr_is_compressed_id_i; + + // ready needs to be constant 1 without writeback stage (otherwise ID/EX stage will stall) + assign ready_wb_o = 1'b1; + + // Unused Writeback stage only IO & wiring + // Assign inputs and internal wiring to unused signals to satisfy lint checks + // Tie-off outputs to constant values + logic unused_clk; + logic unused_rst; + wb_instr_type_e unused_instr_type_wb; + logic [31:0] unused_pc_id; + + assign unused_clk = clk_i; + assign unused_rst = rst_ni; + assign unused_instr_type_wb = instr_type_wb_i; + assign unused_pc_id = pc_id_i; + + assign outstanding_load_wb_o = 1'b0; + assign outstanding_store_wb_o = 1'b0; + assign pc_wb_o = '0; + assign rf_write_wb_o = 1'b0; + assign rf_wdata_fwd_wb_o = 32'b0; + assign rf_wcap_fwd_wb_o = NULL_REG_CAP; + assign instr_done_wb_o = 1'b0; + end + + assign rf_wdata_wb_mux[1] = rf_wdata_lsu_i; + assign rf_wdata_wb_mux_we[1] = rf_we_lsu_i; + + // RF write data can come from ID results (all RF writes that aren't because of loads will come + // from here) or the LSU (RF writes for load data) + assign rf_wdata_wb_o = ({32{rf_wdata_wb_mux_we[0]}} & rf_wdata_wb_mux[0]) | + ({32{rf_wdata_wb_mux_we[1]}} & rf_wdata_wb_mux[1]); + assign rf_we_wb_o = |rf_wdata_wb_mux_we; + + assign rf_wcap_wb_o = rf_wdata_wb_mux_we[0] ? rf_wcap_wb : + (rf_wdata_wb_mux_we[1] ? rf_wcap_lsu_i : NULL_REG_CAP); + + `DV_FCOV_SIGNAL_GEN_IF(logic, wb_valid, g_writeback_stage.wb_valid_q, WritebackStage) + + `ASSERT(RFWriteFromOneSourceOnly, $onehot0(rf_wdata_wb_mux_we)) +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriotc_top.sv b/hw/ip/cheriot-ibex/rtl/cheriotc_top.sv new file mode 100644 index 0000000..5d949bf --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriotc_top.sv
@@ -0,0 +1,465 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +`ifdef RISCV_FORMAL + `define RVFI +`endif + +`include "prim_assert.sv" + + +/** + * Top level module of the ibex RISC-V core + */ +module cheriot_top import cheriot_pkg::*; import cheri_pkg::*; #( + parameter int unsigned DmHaltAddr = 32'h1A110800, + parameter int unsigned DmExceptionAddr = 32'h1A110808, + parameter bit DbgTriggerEn = 1'b1, + parameter int unsigned DbgHwBreakNum = 2, + parameter int unsigned MHPMCounterNum = 0, + parameter int unsigned MHPMCounterWidth = 40, + parameter bit RV32E = 1'b0, + parameter rv32b_e RV32B = RV32BNone, + parameter rv32m_e RV32M = RV32MFast, + parameter bit WritebackStage = 1'b1, + parameter bit BranchPredictor = 1'b0, + parameter bit SecureIbex = 1'b0, // placeholder for TB compatbility + parameter bit CHERIoTEn = 1'b1, + parameter int unsigned DataWidth = 33, + parameter int unsigned HeapBase = 32'h2001_0000, + parameter int unsigned TSMapBase = 32'h2002_f000, // 4kB default + parameter int unsigned TSMapSize = 1024, + parameter bit MemCapFmt = 1'b0, + parameter bit CheriPPLBC = 1'b1, + parameter bit CheriSBND2 = 1'b0, + parameter bit CheriTBRE = 1'b1, + parameter bit CheriStkZ = 1'b1, + parameter int unsigned MMRegDinW = 128, + parameter int unsigned MMRegDoutW = 64, + parameter bit CheriCapIT8 = 1'b0 +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + input logic test_en_i, // enable all clock gates for testing + input prim_ram_1p_pkg::ram_1p_cfg_t ram_cfg_i, + + input logic cheri_pmode_i, + input logic cheri_tsafe_en_i, + input logic [31:0] hart_id_i, + input logic [31:0] boot_addr_i, + + // Instruction memory interface + output logic instr_req_o, + input logic instr_gnt_i, + input logic instr_rvalid_i, + output logic [31:0] instr_addr_o, + input logic [31:0] instr_rdata_i, + input logic [6:0] instr_rdata_intg_i, + input logic instr_err_i, + + // Data memory interface + output logic data_req_o, + output logic data_is_cap_o, + input logic data_gnt_i, + input logic data_rvalid_i, + output logic data_we_o, + output logic [3:0] data_be_o, + output logic [31:0] data_addr_o, + output logic [32:0] data_wdata_o, + output logic [6:0] data_wdata_intg_o, + input logic [32:0] data_rdata_i, + input logic [6:0] data_rdata_intg_i, + input logic data_err_i, + + // TS map memory interface + output logic tsmap_cs_o, + output logic [15:0] tsmap_addr_o, + input logic [31:0] tsmap_rdata_i, + input logic [MMRegDinW-1:0] mmreg_corein_i, + output logic [MMRegDoutW-1:0] mmreg_coreout_o, + output logic cheri_fatal_err_o, + + // Interrupt inputs + input logic irq_software_i, + input logic irq_timer_i, + input logic irq_external_i, + input logic [14:0] irq_fast_i, + input logic irq_nm_i, // non-maskeable interrupt + + // Scrambling Interface + input logic scramble_key_valid_i, + input logic [SCRAMBLE_KEY_W-1:0] scramble_key_i, + input logic [SCRAMBLE_NONCE_W-1:0] scramble_nonce_i, + output logic scramble_req_o, + + // Debug Interface + input logic debug_req_i, + output crash_dump_t crash_dump_o, + output logic double_fault_seen_o, + + // RISC-V Formal Interface + // Does not comply with the coding standards of _i/_o suffixes, but follows + // the convention of RISC-V Formal Interface Specification. +`ifdef RVFI + output logic rvfi_valid, + output logic [63:0] rvfi_order, + output logic [31:0] rvfi_insn, + output logic rvfi_trap, + output logic rvfi_halt, + output logic rvfi_intr, + output logic [ 1:0] rvfi_mode, + output logic [ 1:0] rvfi_ixl, + output logic [ 4:0] rvfi_rs1_addr, + output logic [ 4:0] rvfi_rs2_addr, + output logic [ 4:0] rvfi_rs3_addr, + output logic [31:0] rvfi_rs1_rdata, + output reg_cap_t rvfi_rs1_rcap, + output logic [31:0] rvfi_rs2_rdata, + output reg_cap_t rvfi_rs2_rcap, + output logic [31:0] rvfi_rs3_rdata, + output logic [ 4:0] rvfi_rd_addr, + output logic [31:0] rvfi_rd_wdata, + output reg_cap_t rvfi_rd_wcap, + output logic [31:0] rvfi_pc_rdata, + output logic [31:0] rvfi_pc_wdata, + output logic [31:0] rvfi_mem_addr, + output logic [ 3:0] rvfi_mem_rmask, + output logic [ 3:0] rvfi_mem_wmask, + output logic [32:0] rvfi_mem_rdata, + output logic [32:0] rvfi_mem_wdata, + output logic rvfi_mem_is_cap, + output reg_cap_t rvfi_mem_rcap, + output reg_cap_t rvfi_mem_wcap, + output logic [31:0] rvfi_ext_mip, + output logic rvfi_ext_nmi, + output logic rvfi_ext_debug_req, + output logic [63:0] rvfi_ext_mcycle, +`endif + + // CPU Control Signals + input fetch_enable_t fetch_enable_i, + output logic core_sleep_o, + output logic alert_minor_o, + output logic alert_major_internal_o, + output logic alert_major_bus_o, + + + // DFT bypass controls + input logic scan_rst_ni +); + + localparam bit ResetAll = 1'b1; + localparam int unsigned RegFileDataWidth = 32; + + // Clock signals + logic clk; + logic core_busy_d, core_busy_q; + logic clock_en; + logic irq_pending; + // Core <-> Register file signals + logic [4:0] rf_raddr_a; + logic [4:0] rf_raddr_b; + logic [4:0] rf_waddr_wb; + logic rf_we_wb; + logic [RegFileDataWidth-1:0] rf_wdata_wb_ecc; + logic [RegFileDataWidth-1:0] rf_rdata_a_ecc, rf_rdata_a_ecc_buf; + logic [RegFileDataWidth-1:0] rf_rdata_b_ecc, rf_rdata_b_ecc_buf; + reg_cap_t rf_rcap_a, rf_rcap_b; + reg_cap_t rf_wcap; + + logic [31:0] rf_reg_rdy; + logic [4:0] rf_trvk_addr; + logic rf_trvk_en; + logic rf_trvk_clrtag; + logic [4:0] rf_trsv_addr; + logic rf_trsv_en; + + fetch_enable_t fetch_enable_buf; + + ///////////////////// + // Main clock gate // + ///////////////////// + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + core_busy_q <= 1'b0; + end else begin + core_busy_q <= core_busy_d; + end + end + + assign clock_en = core_busy_q | debug_req_i | irq_pending | irq_nm_i; + assign core_sleep_o = ~clock_en; + + // let's not worry about clock gating for now. kliu + assign clk = clk_i; + +// prim_clock_gating core_clock_gate_i ( +// .clk_i (clk_i), +// .en_i (clock_en), +// .test_en_i(test_en_i), +// .clk_o (clk) +// ); + + //////////////////////// + // Core instantiation // + //////////////////////// + +`ifdef FPGA + // Buffer security critical signals to prevent synthesis optimisation removing them + prim_buf #(.Width($bits(fetch_enable_t))) u_fetch_enable_buf ( + .in_i (fetch_enable_i), + .out_o(fetch_enable_buf) + ); + + prim_buf #(.Width(RegFileDataWidth)) u_rf_rdata_a_ecc_buf ( + .in_i (rf_rdata_a_ecc), + .out_o(rf_rdata_a_ecc_buf) + ); + + prim_buf #(.Width(RegFileDataWidth)) u_rf_rdata_b_ecc_buf ( + .in_i (rf_rdata_b_ecc), + .out_o(rf_rdata_b_ecc_buf) + ); +`else + assign fetch_enable_buf = fetch_enable_i; + assign rf_rdata_a_ecc_buf = rf_rdata_a_ecc; + assign rf_rdata_b_ecc_buf = rf_rdata_b_ecc; +`endif + + cheriot_core #( + .PMPEnable (1'b0), + .PMPGranularity (0), + .PMPNumRegions (4), + .MHPMCounterNum (MHPMCounterNum ), + .MHPMCounterWidth (MHPMCounterWidth), + .RV32E (RV32E), + .RV32M (RV32M), + .RV32B (RV32BNone), + .BranchTargetALU (1'b1), + .ICache (1'b0), + .ICacheECC (1'b0), + .BusSizeECC (BUS_SIZE), + .TagSizeECC (IC_TAG_SIZE), + .LineSizeECC (IC_LINE_SIZE), + .BranchPredictor (BranchPredictor), + .DbgTriggerEn (DbgTriggerEn), + .DbgHwBreakNum (DbgHwBreakNum), + .WritebackStage (WritebackStage), + .ResetAll (ResetAll), + .RndCnstLfsrSeed (RndCnstLfsrSeedDefault), + .RndCnstLfsrPerm (RndCnstLfsrPermDefault), + .SecureIbex (1'b0), + .DummyInstructions(1'b0), + .RegFileECC (1'b0), + .RegFileDataWidth (RegFileDataWidth), + .DmHaltAddr (DmHaltAddr), + .DmExceptionAddr (DmExceptionAddr), + .CHERIoTEn (CHERIoTEn), + .DataWidth (DataWidth), + .HeapBase (HeapBase), + .TSMapBase (TSMapBase), + .TSMapSize (TSMapSize), + .MemCapFmt (MemCapFmt), + .CheriPPLBC (CheriPPLBC), + .CheriSBND2 (CheriSBND2), + .CheriTBRE (CheriTBRE), + .CheriStkZ (CheriStkZ), + .MMRegDinW (MMRegDinW), + .MMRegDoutW (MMRegDoutW), + .CheriCapIT8 (CheriCapIT8) + ) u_cheriot_core ( + .clk_i (clk), + .rst_ni (rst_ni), + + .cheri_pmode_i (cheri_pmode_i), + .cheri_tsafe_en_i (cheri_tsafe_en_i), + .hart_id_i (hart_id_i ) , + .boot_addr_i (boot_addr_i ) , + + .instr_req_o (instr_req_o ), + .instr_gnt_i (instr_gnt_i ), + .instr_rvalid_i (instr_rvalid_i), + .instr_addr_o (instr_addr_o ), + .instr_rdata_i (instr_rdata_i ), + .instr_err_i (instr_err_i ), + + .data_req_o (data_req_o ), + .data_is_cap_o (data_is_cap_o ), + .data_gnt_i (data_gnt_i ), + .data_rvalid_i (data_rvalid_i ), + .data_we_o (data_we_o ), + .data_be_o (data_be_o ), + .data_addr_o (data_addr_o ), + .data_wdata_o (data_wdata_o ), + .data_rdata_i (data_rdata_i ), + .data_err_i (data_err_i ), + + .dummy_instr_id_o (), + .rf_raddr_a_o (rf_raddr_a), + .rf_raddr_b_o (rf_raddr_b), + .rf_waddr_wb_o (rf_waddr_wb), + .rf_we_wb_o (rf_we_wb), + .rf_wdata_wb_ecc_o(rf_wdata_wb_ecc), + .rf_rdata_a_ecc_i (rf_rdata_a_ecc_buf), + .rf_rdata_b_ecc_i (rf_rdata_b_ecc_buf), + .rf_wcap_wb_o (rf_wcap), + .rf_rcap_a_i (rf_rcap_a), + .rf_rcap_b_i (rf_rcap_b), + .rf_reg_rdy_i (rf_reg_rdy), + .rf_trsv_en_o (rf_trsv_en), + .rf_trsv_addr_o (rf_trsv_addr), + .rf_trvk_addr_o (rf_trvk_addr), + .rf_trvk_en_o (rf_trvk_en ), + .rf_trvk_clrtag_o (rf_trvk_clrtag), + .rf_trvk_par_o (), + .rf_trsv_par_o (), + .tsmap_cs_o (tsmap_cs_o ), + .tsmap_addr_o (tsmap_addr_o ), + .tsmap_rdata_i (tsmap_rdata_i), + .mmreg_corein_i (mmreg_corein_i), + .mmreg_coreout_o (mmreg_coreout_o), + .cheri_fatal_err_o(cheri_fatal_err_o), + + .irq_software_i (irq_software_i), + .irq_timer_i (irq_timer_i ), + .irq_external_i (irq_external_i), + .irq_fast_i (irq_fast_i ), + .irq_nm_i (irq_nm_i ), + .irq_pending_o(irq_pending), + + .debug_req_i, + .crash_dump_o, + .double_fault_seen_o, + +`ifdef RVFI + .rvfi_valid, + .rvfi_order, + .rvfi_insn, + .rvfi_trap, + .rvfi_halt, + .rvfi_intr, + .rvfi_mode, + .rvfi_ixl, + .rvfi_rs1_addr, + .rvfi_rs2_addr, + .rvfi_rs3_addr, + .rvfi_rs1_rdata, + .rvfi_rs1_rcap, + .rvfi_rs2_rdata, + .rvfi_rs2_rcap, + .rvfi_rs3_rdata, + .rvfi_rd_addr, + .rvfi_rd_wdata, + .rvfi_rd_wcap, + .rvfi_pc_rdata, + .rvfi_pc_wdata, + .rvfi_mem_addr, + .rvfi_mem_rmask, + .rvfi_mem_wmask, + .rvfi_mem_rdata, + .rvfi_mem_wdata, + .rvfi_mem_is_cap, + .rvfi_mem_rcap, + .rvfi_mem_wcap, + .rvfi_ext_mip, + .rvfi_ext_nmi, + .rvfi_ext_debug_req, + .rvfi_ext_mcycle, +`endif + + .fetch_enable_i(fetch_enable_buf), + .alert_minor_o(alert_minor_o), + .alert_major_o(alert_major_internal_o), + .icache_inval_o(), + .core_busy_o (core_busy_d), + .ic_scr_key_valid_i (1'b0), + .ic_data_rdata_i (), + .ic_data_wdata_o (), + .ic_data_addr_o (), + .ic_data_write_o (), + .ic_data_req_o (), + .ic_tag_rdata_i (), + .ic_tag_wdata_o (), + .ic_tag_addr_o (), + .ic_tag_write_o (), + .ic_tag_req_o () + ); + + assign data_wdata_intg_o = 7'h0; + assign alert_major_bus_o = 1'b0; + + ///////////////////////////////// + // Register file Instantiation // + ///////////////////////////////// + if (RV32E) begin + cheri_regfile #( + .NREGS(16), + .NCAPS(16), + .CheriPPLBC(CheriPPLBC) + ) register_file_i ( + .clk_i (clk), + .rst_ni (rst_ni), + .raddr_a_i (rf_raddr_a), + .rdata_a_o (rf_rdata_a_ecc), + .rcap_a_o (rf_rcap_a), + .raddr_b_i (rf_raddr_b), + .rdata_b_o (rf_rdata_b_ecc), + .rcap_b_o (rf_rcap_b), + .waddr_a_i (rf_waddr_wb), + .wdata_a_i (rf_wdata_wb_ecc), + .wcap_a_i (rf_wcap), + .we_a_i (rf_we_wb), + .reg_rdy_o (rf_reg_rdy), + .trvk_addr_i (rf_trvk_addr), + .trvk_en_i (rf_trvk_en), + .trvk_clrtag_i (rf_trvk_clrtag), + .trsv_addr_i (rf_trsv_addr), + .trsv_en_i (rf_trsv_en), + .trsv_par_i (7'h0), + .trvk_par_i (7'h0), + .par_rst_ni (1'b0), + .alert_o () + ); + end else begin + cheri_regfile #( + .NREGS(32), + .NCAPS(16), + .CheriPPLBC(CheriPPLBC) + ) register_file_i ( + .clk_i (clk), + .rst_ni (rst_ni), + .raddr_a_i (rf_raddr_a), + .rdata_a_o (rf_rdata_a_ecc), + .rcap_a_o (rf_rcap_a), + .raddr_b_i (rf_raddr_b), + .rdata_b_o (rf_rdata_b_ecc), + .rcap_b_o (rf_rcap_b), + .waddr_a_i (rf_waddr_wb), + .wdata_a_i (rf_wdata_wb_ecc), + .wcap_a_i (rf_wcap), + .we_a_i (rf_we_wb), + .reg_rdy_o (rf_reg_rdy), + .trvk_addr_i (rf_trvk_addr), + .trvk_en_i (rf_trvk_en), + .trvk_clrtag_i (rf_trvk_clrtag), + .trsv_addr_i (rf_trsv_addr), + .trsv_en_i (rf_trsv_en), + .trsv_par_i (7'h0), + .trvk_par_i (7'h0), + .par_rst_ni (1'b0), + .alert_o () + ); + end + + assign scramble_req_o = 0; + +endmodule
diff --git a/hw/ip/cheriot-ibex/rtl/cheriotc_top_tracing.sv b/hw/ip/cheriot-ibex/rtl/cheriotc_top_tracing.sv new file mode 100644 index 0000000..5840322 --- /dev/null +++ b/hw/ip/cheriot-ibex/rtl/cheriotc_top_tracing.sv
@@ -0,0 +1,316 @@ +// Copyright Microsoft Corporation +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Top level module of the ibex RISC-V core with tracing enabled + */ + +module cheriot_top_tracing import cheriot_pkg::*; import cheri_pkg::*; #( + parameter int unsigned DmHaltAddr = 32'h1A110800, + parameter int unsigned DmExceptionAddr = 32'h1A110808, + parameter bit RV32E = 1'b0, + parameter bit CheriTBRE = 1'b1, + parameter bit CheriStkZ = 1'b1, + parameter int unsigned HeapBase = 32'h2001_0000, + parameter int unsigned TSMapBase = 32'h2004_0000, // 4kB default + parameter int unsigned TSMapSize = 1024, // in words + parameter int unsigned MMRegDinW = 128, + parameter int unsigned MMRegDoutW = 64, + parameter int unsigned DataWidth = 33, // this enables testbench to use defparam to override + parameter bit CheriCapIT8 = 1'b0 +) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + + input logic test_en_i, // enable all clock gates for testing + input logic scan_rst_ni, + input prim_ram_1p_pkg::ram_1p_cfg_t ram_cfg_i, + + input logic cheri_pmode_i, + input logic cheri_tsafe_en_i, + input logic [31:0] hart_id_i, + input logic [31:0] boot_addr_i, + + // Instruction memory interface + output logic instr_req_o, + input logic instr_gnt_i, + input logic instr_rvalid_i, + output logic [31:0] instr_addr_o, + input logic [31:0] instr_rdata_i, + input logic [6:0] instr_rdata_intg_i, + input logic instr_err_i, + + // Data memory interface + output logic data_req_o, + output logic data_is_cap_o, + input logic data_gnt_i, + input logic data_rvalid_i, + output logic data_we_o, + output logic [3:0] data_be_o, + output logic [31:0] data_addr_o, + output logic [32:0] data_wdata_o, + output logic [6:0] data_wdata_intg_o, + input logic [32:0] data_rdata_i, + input logic [6:0] data_rdata_intg_i, + input logic data_err_i, + + // TS map memory interface + output logic tsmap_cs_o, + output logic [15:0] tsmap_addr_o, + input logic [31:0] tsmap_rdata_i, + input logic [6:0] tsmap_rdata_intg_i, // not used in cheriotc_top + input logic [MMRegDinW-1:0] mmreg_corein_i, + output logic [MMRegDoutW-1:0] mmreg_coreout_o, + output logic cheri_fatal_err_o, + + // Interrupt inputs + input logic irq_software_i, + input logic irq_timer_i, + input logic irq_external_i, + input logic [14:0] irq_fast_i, + input logic irq_nm_i, // non-maskeable interrupt + + // Scrambling Interface + input logic scramble_key_valid_i, + input logic [SCRAMBLE_KEY_W-1:0] scramble_key_i, + input logic [SCRAMBLE_NONCE_W-1:0] scramble_nonce_i, + output logic scramble_req_o, + + // Debug Interface + input logic debug_req_i, + output crash_dump_t crash_dump_o, + output logic double_fault_seen_o, + + // CPU Control Signals + input fetch_enable_t fetch_enable_i, + output logic core_sleep_o +); + + + logic rvfi_valid; + logic [63:0] rvfi_order; + logic [31:0] rvfi_insn; + logic rvfi_trap; + logic rvfi_halt; + logic rvfi_intr; + logic [ 1:0] rvfi_mode; + logic [ 1:0] rvfi_ixl; + logic [ 4:0] rvfi_rs1_addr; + logic [ 4:0] rvfi_rs2_addr; + logic [ 4:0] rvfi_rs3_addr; + logic [31:0] rvfi_rs1_rdata; + reg_cap_t rvfi_rs1_rcap; + reg_cap_t rvfi_rs2_rcap; + logic [31:0] rvfi_rs2_rdata; + logic [31:0] rvfi_rs3_rdata; + logic [ 4:0] rvfi_rd_addr; + logic [31:0] rvfi_rd_wdata; + reg_cap_t rvfi_rd_wcap; + logic [31:0] rvfi_pc_rdata; + logic [31:0] rvfi_pc_wdata; + logic [31:0] rvfi_mem_addr; + logic [ 3:0] rvfi_mem_rmask; + logic [ 3:0] rvfi_mem_wmask; + logic [DataWidth-1:0] rvfi_mem_rdata; + logic [DataWidth-1:0] rvfi_mem_wdata; + logic rvfi_mem_is_cap; + reg_cap_t rvfi_mem_rcap; + reg_cap_t rvfi_mem_wcap; + logic [31:0] rvfi_ext_mip; + logic rvfi_ext_nmi; + logic rvfi_ext_debug_req; + logic [63:0] rvfi_ext_mcycle; + + logic [31:0] unused_rvfi_ext_mip; + logic unused_rvfi_ext_nmi; + logic unused_rvfi_ext_debug_req; + logic [63:0] unused_rvfi_ext_mcycle; + + // Tracer doesn't use these signals, though other modules may probe down into tracer to observe + // them. + assign unused_rvfi_ext_mip = rvfi_ext_mip; + assign unused_rvfi_ext_nmi = rvfi_ext_nmi; + assign unused_rvfi_ext_debug_req = rvfi_ext_debug_req; + assign unused_rvfi_ext_mcycle = rvfi_ext_mcycle; + + cheriot_top #( + .DmHaltAddr (DmHaltAddr ), + .DmExceptionAddr (DmExceptionAddr ), + .MHPMCounterNum (13 ), + .MHPMCounterWidth (40), + .DbgTriggerEn (1'b1), + .DbgHwBreakNum (4), + .RV32E (RV32E), + .RV32B (RV32BFull), + .WritebackStage (1'b1), + .BranchPredictor (1'b0), + .CHERIoTEn (1'b1), + .DataWidth (DataWidth), + .HeapBase (HeapBase ), + .TSMapBase (TSMapBase), + .TSMapSize (TSMapSize), + .MemCapFmt (1'b0), + .CheriPPLBC (1'b1), + .CheriSBND2 (1'b0), + .CheriTBRE (CheriTBRE), + .CheriStkZ (CheriStkZ), + .MMRegDinW (MMRegDinW), + .MMRegDoutW (MMRegDoutW), + .CheriCapIT8 (CheriCapIT8) + ) u_cheriot_top ( + .clk_i, + .rst_ni, + + .test_en_i, + .scan_rst_ni, + .ram_cfg_i, + + .cheri_pmode_i, + .cheri_tsafe_en_i, + .hart_id_i, + .boot_addr_i, + + .instr_req_o, + .instr_gnt_i, + .instr_rvalid_i, + .instr_addr_o, + .instr_rdata_i, + .instr_rdata_intg_i, + .instr_err_i, + + .data_req_o, + .data_is_cap_o, + .data_gnt_i, + .data_rvalid_i, + .data_we_o, + .data_be_o, + .data_addr_o, + .data_wdata_o, + .data_wdata_intg_o, + .data_rdata_i, + .data_rdata_intg_i, + .data_err_i, + + .tsmap_cs_o, + .tsmap_addr_o, + .tsmap_rdata_i, + .mmreg_corein_i, + .mmreg_coreout_o, + .cheri_fatal_err_o, + + .irq_software_i, + .irq_timer_i, + .irq_external_i, + .irq_fast_i, + .irq_nm_i, + + .scramble_key_valid_i, + .scramble_key_i, + .scramble_nonce_i, + .scramble_req_o, + + .debug_req_i, + .crash_dump_o, + .double_fault_seen_o, + +`ifdef RVFI + .rvfi_valid, + .rvfi_order, + .rvfi_insn, + .rvfi_trap, + .rvfi_halt, + .rvfi_intr, + .rvfi_mode, + .rvfi_ixl, + .rvfi_rs1_addr, + .rvfi_rs2_addr, + .rvfi_rs3_addr, + .rvfi_rs1_rdata, + .rvfi_rs1_rcap, + .rvfi_rs2_rdata, + .rvfi_rs2_rcap, + .rvfi_rs3_rdata, + .rvfi_rd_addr, + .rvfi_rd_wdata, + .rvfi_rd_wcap, + .rvfi_pc_rdata, + .rvfi_pc_wdata, + .rvfi_mem_addr, + .rvfi_mem_rmask, + .rvfi_mem_wmask, + .rvfi_mem_rdata, + .rvfi_mem_wdata, + .rvfi_mem_rcap, + .rvfi_mem_wcap, + .rvfi_mem_is_cap, + .rvfi_ext_mip, + .rvfi_ext_nmi, + .rvfi_ext_debug_req, + .rvfi_ext_mcycle, +`endif + .fetch_enable_i, + .core_sleep_o, + .alert_major_bus_o(), + .alert_major_internal_o(), + .alert_minor_o() + ); + +// cheriot_tracer relies on the signals from the RISC-V Formal Interface +// synthesis translate_off +`ifndef RVFI + $fatal("Fatal error: RVFI needs to be defined globally."); +`endif + +`ifdef RVFI + cheriot_tracer #( + .DataWidth (DataWidth), + .CheriCapIT8 (CheriCapIT8) + ) u_cheriot_tracer ( + .clk_i, + .rst_ni, + + .cheri_pmode_i, + .cheri_tsafe_en_i, + .hart_id_i, + + .rvfi_valid, + .rvfi_order, + .rvfi_insn, + .rvfi_trap, + .rvfi_halt, + .rvfi_intr, + .rvfi_mode, + .rvfi_ixl, + .rvfi_rs1_addr, + .rvfi_rs2_addr, + .rvfi_rs3_addr, + .rvfi_rs1_rdata, + .rvfi_rs2_rdata, + .rvfi_rs3_rdata, + .rvfi_rs1_rcap, + .rvfi_rs2_rcap, + .rvfi_rd_wcap, + .rvfi_rd_addr, + .rvfi_rd_wdata, + .rvfi_pc_rdata, + .rvfi_pc_wdata, + .rvfi_mem_addr, + .rvfi_mem_rmask, + .rvfi_mem_wmask, + .rvfi_mem_rdata, + .rvfi_mem_wdata, + .rvfi_mem_rcap, + .rvfi_mem_wcap, + .rvfi_mem_is_cap + ); +`endif + +// synthesis translate_on + +endmodule