| // Copyright lowRISC contributors. |
| // Licensed under the Apache License, Version 2.0, see LICENSE for details. |
| // SPDX-License-Identifier: Apache-2.0 |
| |
| /** |
| * Instruction cache |
| * |
| * Provides an instruction cache along with cache management, instruction buffering and prefetching |
| */ |
| |
| `include "prim_assert.sv" |
| |
| module ibex_icache import ibex_pkg::*; #( |
| parameter bit ICacheECC = 1'b0, |
| parameter bit ResetAll = 1'b0, |
| parameter int unsigned BusSizeECC = BUS_SIZE, |
| parameter int unsigned TagSizeECC = IC_TAG_SIZE, |
| parameter int unsigned LineSizeECC = IC_LINE_SIZE, |
| // Only cache branch targets |
| parameter bit BranchCache = 1'b0 |
| ) ( |
| // Clock and reset |
| input logic clk_i, |
| input logic rst_ni, |
| |
| // Signal that the core would like instructions |
| input logic req_i, |
| |
| // Set the cache's address counter |
| input logic branch_i, |
| input logic [31:0] addr_i, |
| |
| // IF stage interface: Pass fetched instructions to the core |
| input logic ready_i, |
| output logic valid_o, |
| output logic [31:0] rdata_o, |
| output logic [31:0] addr_o, |
| output logic err_o, |
| output logic err_plus2_o, |
| |
| // Instruction memory / interconnect interface: Fetch instruction data from memory |
| output logic instr_req_o, |
| input logic instr_gnt_i, |
| output logic [31:0] instr_addr_o, |
| input logic [BUS_SIZE-1:0] instr_rdata_i, |
| input logic instr_err_i, |
| input logic instr_rvalid_i, |
| |
| // RAM IO |
| output logic [IC_NUM_WAYS-1:0] ic_tag_req_o, |
| output logic ic_tag_write_o, |
| output logic [IC_INDEX_W-1:0] ic_tag_addr_o, |
| output logic [TagSizeECC-1:0] ic_tag_wdata_o, |
| input logic [TagSizeECC-1:0] ic_tag_rdata_i [IC_NUM_WAYS], |
| output logic [IC_NUM_WAYS-1:0] ic_data_req_o, |
| output logic ic_data_write_o, |
| output logic [IC_INDEX_W-1:0] ic_data_addr_o, |
| output logic [LineSizeECC-1:0] ic_data_wdata_o, |
| input logic [LineSizeECC-1:0] ic_data_rdata_i [IC_NUM_WAYS], |
| input logic ic_scr_key_valid_i, |
| output logic ic_scr_key_req_o, |
| |
| // Cache status |
| input logic icache_enable_i, |
| input logic icache_inval_i, |
| output logic busy_o, |
| output logic ecc_error_o |
| ); |
| |
| // Number of fill buffers (must be >= 2) |
| localparam int unsigned NUM_FB = 4; |
| // Request throttling threshold |
| localparam int unsigned FB_THRESHOLD = NUM_FB - 2; |
| |
| // Prefetch signals |
| logic [ADDR_W-1:0] lookup_addr_aligned; |
| logic [ADDR_W-1:0] prefetch_addr_d, prefetch_addr_q; |
| logic prefetch_addr_en; |
| // Cache pipelipe IC0 signals |
| logic lookup_throttle; |
| logic lookup_req_ic0; |
| logic [ADDR_W-1:0] lookup_addr_ic0; |
| logic [IC_INDEX_W-1:0] lookup_index_ic0; |
| logic fill_req_ic0; |
| logic [IC_INDEX_W-1:0] fill_index_ic0; |
| logic [IC_TAG_SIZE-1:0] fill_tag_ic0; |
| logic [IC_LINE_SIZE-1:0] fill_wdata_ic0; |
| logic lookup_grant_ic0; |
| logic lookup_actual_ic0; |
| logic fill_grant_ic0; |
| logic tag_req_ic0; |
| logic [IC_INDEX_W-1:0] tag_index_ic0; |
| logic [IC_NUM_WAYS-1:0] tag_banks_ic0; |
| logic tag_write_ic0; |
| logic [TagSizeECC-1:0] tag_wdata_ic0; |
| logic data_req_ic0; |
| logic [IC_INDEX_W-1:0] data_index_ic0; |
| logic [IC_NUM_WAYS-1:0] data_banks_ic0; |
| logic data_write_ic0; |
| logic [LineSizeECC-1:0] data_wdata_ic0; |
| // Cache pipelipe IC1 signals |
| logic [TagSizeECC-1:0] tag_rdata_ic1 [IC_NUM_WAYS]; |
| logic [LineSizeECC-1:0] data_rdata_ic1 [IC_NUM_WAYS]; |
| logic [LineSizeECC-1:0] hit_data_ecc_ic1; |
| logic [IC_LINE_SIZE-1:0] hit_data_ic1; |
| logic lookup_valid_ic1; |
| logic [ADDR_W-1:IC_INDEX_HI+1] lookup_addr_ic1; |
| logic [IC_NUM_WAYS-1:0] tag_match_ic1; |
| logic tag_hit_ic1; |
| logic [IC_NUM_WAYS-1:0] tag_invalid_ic1; |
| logic [IC_NUM_WAYS-1:0] lowest_invalid_way_ic1; |
| logic [IC_NUM_WAYS-1:0] round_robin_way_ic1, round_robin_way_q; |
| logic [IC_NUM_WAYS-1:0] sel_way_ic1; |
| logic ecc_err_ic1; |
| logic ecc_write_req; |
| logic [IC_NUM_WAYS-1:0] ecc_write_ways; |
| logic [IC_INDEX_W-1:0] ecc_write_index; |
| // Fill buffer signals |
| logic [$clog2(NUM_FB)-1:0] fb_fill_level; |
| logic fill_cache_new; |
| logic fill_new_alloc; |
| logic fill_spec_req, fill_spec_done, fill_spec_hold; |
| logic [NUM_FB-1:0][NUM_FB-1:0] fill_older_d, fill_older_q; |
| logic [NUM_FB-1:0] fill_alloc_sel, fill_alloc; |
| logic [NUM_FB-1:0] fill_busy_d, fill_busy_q; |
| logic [NUM_FB-1:0] fill_done; |
| logic [NUM_FB-1:0] fill_in_ic1; |
| logic [NUM_FB-1:0] fill_stale_d, fill_stale_q; |
| logic [NUM_FB-1:0] fill_cache_d, fill_cache_q; |
| logic [NUM_FB-1:0] fill_hit_ic1, fill_hit_d, fill_hit_q; |
| logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_ext_cnt_d, fill_ext_cnt_q; |
| logic [NUM_FB-1:0] fill_ext_hold_d, fill_ext_hold_q; |
| logic [NUM_FB-1:0] fill_ext_done_d, fill_ext_done_q; |
| logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_rvd_cnt_d, fill_rvd_cnt_q; |
| logic [NUM_FB-1:0] fill_rvd_done; |
| logic [NUM_FB-1:0] fill_ram_done_d, fill_ram_done_q; |
| logic [NUM_FB-1:0] fill_out_grant; |
| logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_out_cnt_d, fill_out_cnt_q; |
| logic [NUM_FB-1:0] fill_out_done; |
| logic [NUM_FB-1:0] fill_ext_req, fill_rvd_exp, fill_ram_req, fill_out_req; |
| logic [NUM_FB-1:0] fill_data_sel, fill_data_reg; |
| logic [NUM_FB-1:0] fill_data_hit, fill_data_rvd; |
| logic [NUM_FB-1:0][IC_LINE_BEATS_W-1:0] fill_ext_off, fill_rvd_off; |
| logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_ext_beat, fill_rvd_beat; |
| logic [NUM_FB-1:0] fill_ext_arb, fill_ram_arb, fill_out_arb; |
| logic [NUM_FB-1:0] fill_rvd_arb; |
| logic [NUM_FB-1:0] fill_entry_en; |
| logic [NUM_FB-1:0] fill_addr_en; |
| logic [NUM_FB-1:0] fill_way_en; |
| logic [NUM_FB-1:0][IC_LINE_BEATS-1:0] fill_data_en; |
| logic [NUM_FB-1:0][IC_LINE_BEATS-1:0] fill_err_d, fill_err_q; |
| logic [ADDR_W-1:0] fill_addr_q [NUM_FB]; |
| logic [IC_NUM_WAYS-1:0] fill_way_q [NUM_FB]; |
| logic [IC_LINE_SIZE-1:0] fill_data_d [NUM_FB]; |
| logic [IC_LINE_SIZE-1:0] fill_data_q [NUM_FB]; |
| logic [ADDR_W-1:BUS_W] fill_ext_req_addr; |
| logic [ADDR_W-1:0] fill_ram_req_addr; |
| logic [IC_NUM_WAYS-1:0] fill_ram_req_way; |
| logic [IC_LINE_SIZE-1:0] fill_ram_req_data; |
| logic [IC_LINE_SIZE-1:0] fill_out_data; |
| logic [IC_LINE_BEATS-1:0] fill_out_err; |
| // External req signals |
| logic instr_req; |
| logic [ADDR_W-1:BUS_W] instr_addr; |
| // Data output signals |
| logic skid_complete_instr; |
| logic skid_ready; |
| logic output_compressed; |
| logic skid_valid_d, skid_valid_q, skid_en; |
| logic [15:0] skid_data_d, skid_data_q; |
| logic skid_err_q; |
| logic output_valid; |
| logic addr_incr_two; |
| logic output_addr_en; |
| logic [ADDR_W-1:1] output_addr_incr; |
| logic [ADDR_W-1:1] output_addr_d, output_addr_q; |
| logic [15:0] output_data_lo, output_data_hi; |
| logic data_valid, output_ready; |
| logic [IC_LINE_SIZE-1:0] line_data; |
| logic [IC_LINE_BEATS-1:0] line_err; |
| logic [31:0] line_data_muxed; |
| logic line_err_muxed; |
| logic [31:0] output_data; |
| logic output_err; |
| // Invalidations |
| typedef enum logic [1:0] { |
| OUT_OF_RESET, |
| AWAIT_SCRAMBLE_KEY, |
| INVAL_CACHE, |
| IDLE |
| } inval_state_e; |
| |
| inval_state_e inval_state_q, inval_state_d; |
| logic inval_write_req; |
| logic inval_block_cache; |
| logic [IC_INDEX_W-1:0] inval_index_d, inval_index_q; |
| logic inval_index_en; |
| logic inval_active; |
| |
| ////////////////////////// |
| // Instruction prefetch // |
| ////////////////////////// |
| |
| assign lookup_addr_aligned = {lookup_addr_ic0[ADDR_W-1:IC_LINE_W], {IC_LINE_W{1'b0}}}; |
| |
| // The prefetch address increments by one cache line for each granted request. |
| // This address is also updated if there is a branch that is not granted, since the target |
| // address (addr_i) is only valid for one cycle while branch_i is high. |
| |
| // The captured branch target address is not forced to be aligned since the offset in the cache |
| // line must also be recorded for later use by the fill buffers. |
| assign prefetch_addr_d = |
| lookup_grant_ic0 ? (lookup_addr_aligned + |
| {{ADDR_W-IC_LINE_W-1{1'b0}}, 1'b1, {IC_LINE_W{1'b0}}}) : |
| addr_i; |
| |
| assign prefetch_addr_en = branch_i | lookup_grant_ic0; |
| |
| if (ResetAll) begin : g_prefetch_addr_ra |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| prefetch_addr_q <= '0; |
| end else if (prefetch_addr_en) begin |
| prefetch_addr_q <= prefetch_addr_d; |
| end |
| end |
| end else begin : g_prefetch_addr_nr |
| always_ff @(posedge clk_i) begin |
| if (prefetch_addr_en) begin |
| prefetch_addr_q <= prefetch_addr_d; |
| end |
| end |
| end |
| |
| //////////////////////// |
| // Pipeline stage IC0 // |
| //////////////////////// |
| |
| // Cache lookup |
| assign lookup_throttle = (fb_fill_level > FB_THRESHOLD[$clog2(NUM_FB)-1:0]); |
| |
| assign lookup_req_ic0 = req_i & ~&fill_busy_q & (branch_i | ~lookup_throttle) & |
| ~ecc_write_req; |
| assign lookup_addr_ic0 = branch_i ? addr_i : prefetch_addr_q; |
| assign lookup_index_ic0 = lookup_addr_ic0[IC_INDEX_HI:IC_LINE_W]; |
| |
| // Cache write |
| assign fill_req_ic0 = (|fill_ram_req); |
| assign fill_index_ic0 = fill_ram_req_addr[IC_INDEX_HI:IC_LINE_W]; |
| assign fill_tag_ic0 = {(~inval_write_req & ~ecc_write_req), |
| fill_ram_req_addr[ADDR_W-1:IC_INDEX_HI+1]}; |
| assign fill_wdata_ic0 = fill_ram_req_data; |
| |
| // Arbitrated signals - lookups have highest priority |
| assign lookup_grant_ic0 = lookup_req_ic0; |
| assign fill_grant_ic0 = fill_req_ic0 & ~lookup_req_ic0 & ~inval_write_req & |
| ~ecc_write_req; |
| // Qualified lookup grant to mask ram signals in IC1 if access was not made |
| assign lookup_actual_ic0 = lookup_grant_ic0 & icache_enable_i & ~inval_block_cache; |
| |
| // Tagram |
| assign tag_req_ic0 = lookup_req_ic0 | fill_req_ic0 | inval_write_req | ecc_write_req; |
| assign tag_index_ic0 = inval_write_req ? inval_index_q : |
| ecc_write_req ? ecc_write_index : |
| fill_grant_ic0 ? fill_index_ic0 : |
| lookup_index_ic0; |
| assign tag_banks_ic0 = ecc_write_req ? ecc_write_ways : |
| fill_grant_ic0 ? fill_ram_req_way : |
| {IC_NUM_WAYS{1'b1}}; |
| assign tag_write_ic0 = fill_grant_ic0 | inval_write_req | ecc_write_req; |
| |
| // Dataram |
| assign data_req_ic0 = lookup_req_ic0 | fill_req_ic0; |
| assign data_index_ic0 = tag_index_ic0; |
| assign data_banks_ic0 = tag_banks_ic0; |
| assign data_write_ic0 = tag_write_ic0; |
| |
| // Append ECC checkbits to write data if required |
| if (ICacheECC) begin : gen_ecc_wdata |
| // SEC_CM: ICACHE.MEM.INTEGRITY |
| // Tagram ECC |
| // Reuse the same ecc encoding module for larger cache sizes by padding with zeros |
| logic [21:0] tag_ecc_input_padded; |
| logic [27:0] tag_ecc_output_padded; |
| logic [22-IC_TAG_SIZE:0] unused_tag_ecc_output; |
| |
| assign tag_ecc_input_padded = {{22-IC_TAG_SIZE{1'b0}},fill_tag_ic0}; |
| assign unused_tag_ecc_output = tag_ecc_output_padded[21:IC_TAG_SIZE-1]; |
| |
| prim_secded_inv_28_22_enc tag_ecc_enc ( |
| .data_i (tag_ecc_input_padded), |
| .data_o (tag_ecc_output_padded) |
| ); |
| |
| assign tag_wdata_ic0 = {tag_ecc_output_padded[27:22],tag_ecc_output_padded[IC_TAG_SIZE-1:0]}; |
| |
| // Dataram ECC |
| for (genvar bank = 0; bank < IC_LINE_BEATS; bank++) begin : gen_ecc_banks |
| prim_secded_inv_39_32_enc data_ecc_enc ( |
| .data_i (fill_wdata_ic0[bank*BUS_SIZE+:BUS_SIZE]), |
| .data_o (data_wdata_ic0[bank*BusSizeECC+:BusSizeECC]) |
| ); |
| end |
| |
| end else begin : gen_noecc_wdata |
| assign tag_wdata_ic0 = fill_tag_ic0; |
| assign data_wdata_ic0 = fill_wdata_ic0; |
| end |
| |
| //////////////// |
| // IC0 -> IC1 // |
| //////////////// |
| |
| // Tag RAMs outputs |
| assign ic_tag_req_o = {IC_NUM_WAYS{tag_req_ic0}} & tag_banks_ic0; |
| assign ic_tag_write_o = tag_write_ic0; |
| assign ic_tag_addr_o = tag_index_ic0; |
| assign ic_tag_wdata_o = tag_wdata_ic0; |
| |
| // Tag RAMs inputs |
| assign tag_rdata_ic1 = ic_tag_rdata_i; |
| |
| // Data RAMs outputs |
| assign ic_data_req_o = {IC_NUM_WAYS{data_req_ic0}} & data_banks_ic0; |
| assign ic_data_write_o = data_write_ic0; |
| assign ic_data_addr_o = data_index_ic0; |
| assign ic_data_wdata_o = data_wdata_ic0; |
| |
| // Data RAMs inputs |
| assign data_rdata_ic1 = ic_data_rdata_i; |
| |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| lookup_valid_ic1 <= 1'b0; |
| end else begin |
| lookup_valid_ic1 <= lookup_actual_ic0; |
| end |
| end |
| |
| if (ResetAll) begin : g_lookup_addr_ra |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| lookup_addr_ic1 <= '0; |
| fill_in_ic1 <= '0; |
| end else if (lookup_grant_ic0) begin |
| lookup_addr_ic1 <= lookup_addr_ic0[ADDR_W-1:IC_INDEX_HI+1]; |
| fill_in_ic1 <= fill_alloc_sel; |
| end |
| end |
| end else begin : g_lookup_addr_nr |
| always_ff @(posedge clk_i) begin |
| if (lookup_grant_ic0) begin |
| lookup_addr_ic1 <= lookup_addr_ic0[ADDR_W-1:IC_INDEX_HI+1]; |
| fill_in_ic1 <= fill_alloc_sel; |
| end |
| end |
| end |
| |
| //////////////////////// |
| // Pipeline stage IC1 // |
| //////////////////////// |
| |
| // Tag matching |
| for (genvar way = 0; way < IC_NUM_WAYS; way++) begin : gen_tag_match |
| assign tag_match_ic1[way] = (tag_rdata_ic1[way][IC_TAG_SIZE-1:0] == |
| {1'b1,lookup_addr_ic1[ADDR_W-1:IC_INDEX_HI+1]}); |
| assign tag_invalid_ic1[way] = ~tag_rdata_ic1[way][IC_TAG_SIZE-1]; |
| end |
| |
| assign tag_hit_ic1 = |tag_match_ic1; |
| |
| // Hit data mux |
| always_comb begin |
| hit_data_ecc_ic1 = 'b0; |
| for (int way = 0; way < IC_NUM_WAYS; way++) begin |
| if (tag_match_ic1[way]) begin |
| hit_data_ecc_ic1 |= data_rdata_ic1[way]; |
| end |
| end |
| end |
| |
| // Way selection for allocations to the cache (onehot signals) |
| // 1 first invalid way |
| // 2 global round-robin (pseudorandom) way |
| assign lowest_invalid_way_ic1[0] = tag_invalid_ic1[0]; |
| assign round_robin_way_ic1[0] = round_robin_way_q[IC_NUM_WAYS-1]; |
| for (genvar way = 1; way < IC_NUM_WAYS; way++) begin : gen_lowest_way |
| assign lowest_invalid_way_ic1[way] = tag_invalid_ic1[way] & ~|tag_invalid_ic1[way-1:0]; |
| assign round_robin_way_ic1[way] = round_robin_way_q[way-1]; |
| end |
| |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| round_robin_way_q <= {{IC_NUM_WAYS-1{1'b0}}, 1'b1}; |
| end else if (lookup_valid_ic1) begin |
| round_robin_way_q <= round_robin_way_ic1; |
| end |
| end |
| |
| assign sel_way_ic1 = |tag_invalid_ic1 ? lowest_invalid_way_ic1 : |
| round_robin_way_q; |
| |
| // ECC checking logic |
| if (ICacheECC) begin : gen_data_ecc_checking |
| // SEC_CM: ICACHE.MEM.INTEGRITY |
| logic [IC_NUM_WAYS-1:0] tag_err_ic1; |
| logic [IC_LINE_BEATS*2-1:0] data_err_ic1; |
| logic ecc_correction_write_d, ecc_correction_write_q; |
| logic [IC_NUM_WAYS-1:0] ecc_correction_ways_d, ecc_correction_ways_q; |
| logic [IC_INDEX_W-1:0] lookup_index_ic1, ecc_correction_index_q; |
| |
| // Tag ECC checking |
| for (genvar way = 0; way < IC_NUM_WAYS; way++) begin : gen_tag_ecc |
| logic [1:0] tag_err_bank_ic1; |
| logic [27:0] tag_rdata_padded_ic1; |
| |
| // Expand the tag rdata with extra padding if the tag size is less than the maximum |
| assign tag_rdata_padded_ic1 = {tag_rdata_ic1[way][TagSizeECC-1-:6], |
| {22-IC_TAG_SIZE{1'b0}}, |
| tag_rdata_ic1[way][IC_TAG_SIZE-1:0]}; |
| |
| prim_secded_inv_28_22_dec data_ecc_dec ( |
| .data_i (tag_rdata_padded_ic1), |
| .data_o (), |
| .syndrome_o (), |
| .err_o (tag_err_bank_ic1) |
| ); |
| assign tag_err_ic1[way] = |tag_err_bank_ic1; |
| end |
| |
| // Data ECC checking |
| // Note - could generate for all ways and mux after |
| for (genvar bank = 0; bank < IC_LINE_BEATS; bank++) begin : gen_ecc_banks |
| prim_secded_inv_39_32_dec data_ecc_dec ( |
| .data_i (hit_data_ecc_ic1[bank*BusSizeECC+:BusSizeECC]), |
| .data_o (), |
| .syndrome_o (), |
| .err_o (data_err_ic1[bank*2+:2]) |
| ); |
| |
| assign hit_data_ic1[bank*BUS_SIZE+:BUS_SIZE] = |
| hit_data_ecc_ic1[bank*BusSizeECC+:BUS_SIZE]; |
| |
| end |
| |
| // Tag ECC across all ways is always expected to be correct so the check does not need to be |
| // qualified by hit or tag valid. Initial (invalid with correct ECC) tags are written on reset |
| // and all further tag writes produce correct ECC. For data ECC no initialisation is done on |
| // reset so unused data (in particular those ways that don't have a valid tag) may have |
| // incorrect ECC. We only check data ECC where tags indicate it is valid and we have hit on it. |
| assign ecc_err_ic1 = lookup_valid_ic1 & (((|data_err_ic1) & tag_hit_ic1) | (|tag_err_ic1)); |
| |
| // Error correction |
| // All ways will be invalidated on a tag error to prevent X-propagation from data_err_ic1 on |
| // spurious hits. Also prevents the same line being allocated twice when there was a true |
| // hit and a spurious hit. |
| assign ecc_correction_ways_d = {IC_NUM_WAYS{|tag_err_ic1}} | |
| (tag_match_ic1 & {IC_NUM_WAYS{|data_err_ic1}}); |
| assign ecc_correction_write_d = ecc_err_ic1; |
| |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| ecc_correction_write_q <= 1'b0; |
| end else begin |
| ecc_correction_write_q <= ecc_correction_write_d; |
| end |
| end |
| |
| // The index is required in IC1 only when ECC is configured so is registered here |
| if (ResetAll) begin : g_lookup_ind_ra |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| lookup_index_ic1 <= '0; |
| end else if (lookup_grant_ic0) begin |
| lookup_index_ic1 <= lookup_addr_ic0[IC_INDEX_HI-:IC_INDEX_W]; |
| end |
| end |
| end else begin : g_lookup_ind_nr |
| always_ff @(posedge clk_i) begin |
| if (lookup_grant_ic0) begin |
| lookup_index_ic1 <= lookup_addr_ic0[IC_INDEX_HI-:IC_INDEX_W]; |
| end |
| end |
| end |
| |
| // Store the ways with errors to be invalidated |
| if (ResetAll) begin : g_ecc_correction_ra |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| ecc_correction_ways_q <= '0; |
| ecc_correction_index_q <= '0; |
| end else if (ecc_err_ic1) begin |
| ecc_correction_ways_q <= ecc_correction_ways_d; |
| ecc_correction_index_q <= lookup_index_ic1; |
| end |
| end |
| end else begin : g_ecc_correction_nr |
| always_ff @(posedge clk_i) begin |
| if (ecc_err_ic1) begin |
| ecc_correction_ways_q <= ecc_correction_ways_d; |
| ecc_correction_index_q <= lookup_index_ic1; |
| end |
| end |
| end |
| |
| assign ecc_write_req = ecc_correction_write_q; |
| assign ecc_write_ways = ecc_correction_ways_q; |
| assign ecc_write_index = ecc_correction_index_q; |
| |
| assign ecc_error_o = ecc_err_ic1; |
| end else begin : gen_no_data_ecc |
| assign ecc_err_ic1 = 1'b0; |
| assign ecc_write_req = 1'b0; |
| assign ecc_write_ways = '0; |
| assign ecc_write_index = '0; |
| assign hit_data_ic1 = hit_data_ecc_ic1; |
| |
| assign ecc_error_o = 1'b0; |
| end |
| |
| /////////////////////////////// |
| // Cache allocation decision // |
| /////////////////////////////// |
| |
| if (BranchCache) begin : gen_caching_logic |
| |
| // Cache branch target + a number of subsequent lines |
| localparam int unsigned CACHE_AHEAD = 2; |
| localparam int unsigned CACHE_CNT_W = (CACHE_AHEAD == 1) ? 1 : $clog2(CACHE_AHEAD) + 1; |
| logic cache_cnt_dec; |
| logic [CACHE_CNT_W-1:0] cache_cnt_d, cache_cnt_q; |
| |
| assign cache_cnt_dec = lookup_grant_ic0 & (|cache_cnt_q); |
| assign cache_cnt_d = branch_i ? CACHE_AHEAD[CACHE_CNT_W-1:0] : |
| (cache_cnt_q - {{CACHE_CNT_W-1{1'b0}},cache_cnt_dec}); |
| |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| cache_cnt_q <= '0; |
| end else begin |
| cache_cnt_q <= cache_cnt_d; |
| end |
| end |
| |
| assign fill_cache_new = (branch_i | (|cache_cnt_q)) & icache_enable_i & ~inval_block_cache; |
| end else begin : gen_cache_all |
| |
| // Cache all missing fetches |
| assign fill_cache_new = icache_enable_i & ~inval_block_cache; |
| end |
| |
| ////////////////////////// |
| // Fill buffer tracking // |
| ////////////////////////// |
| |
| always_comb begin |
| fb_fill_level = '0; |
| for (int i = 0; i < NUM_FB; i++) begin |
| if (fill_busy_q[i] & ~fill_stale_q[i]) begin |
| fb_fill_level += {{$clog2(NUM_FB) - 1{1'b0}}, 1'b1}; |
| end |
| end |
| end |
| |
| // Allocate a new buffer for every granted lookup |
| assign fill_new_alloc = lookup_grant_ic0; |
| // Track whether a speculative external request was made from IC0, and whether it was granted |
| // Speculative requests are only made for branches, or if the cache is disabled |
| assign fill_spec_req = (~icache_enable_i | branch_i) & ~|fill_ext_req; |
| assign fill_spec_done = fill_spec_req & instr_gnt_i; |
| assign fill_spec_hold = fill_spec_req & ~instr_gnt_i; |
| |
| for (genvar fb = 0; fb < NUM_FB; fb++) begin : gen_fbs |
| |
| ///////////////////////////// |
| // Fill buffer allocations // |
| ///////////////////////////// |
| |
| // Allocate the lowest available buffer |
| if (fb == 0) begin : gen_fb_zero |
| assign fill_alloc_sel[fb] = ~fill_busy_q[fb]; |
| end else begin : gen_fb_rest |
| assign fill_alloc_sel[fb] = ~fill_busy_q[fb] & (&fill_busy_q[fb-1:0]); |
| end |
| |
| assign fill_alloc[fb] = fill_alloc_sel[fb] & fill_new_alloc; |
| assign fill_busy_d[fb] = fill_alloc[fb] | (fill_busy_q[fb] & ~fill_done[fb]); |
| |
| // Track which other fill buffers are older than this one (for age-based arbitration) |
| // TODO sparsify |
| assign fill_older_d[fb] = (fill_alloc[fb] ? fill_busy_q : fill_older_q[fb]) & ~fill_done; |
| |
| // A fill buffer can release once all its actions are completed |
| // all data written to the cache (unless hit or error) |
| assign fill_done[fb] = (fill_ram_done_q[fb] | fill_hit_q[fb] | ~fill_cache_q[fb] | |
| (|fill_err_q[fb])) & |
| // all data output unless stale due to intervening branch |
| (fill_out_done[fb] | fill_stale_q[fb] | branch_i) & |
| // all external requests completed |
| fill_rvd_done[fb]; |
| |
| ///////////////////////////////// |
| // Fill buffer status tracking // |
| ///////////////////////////////// |
| |
| // Track staleness (requests become stale when a branch intervenes) |
| assign fill_stale_d[fb] = fill_busy_q[fb] & (branch_i | fill_stale_q[fb]); |
| // Track whether or not this request should allocate to the cache |
| // Any invalidation or disabling of the cache while the buffer is busy will stop allocation |
| assign fill_cache_d[fb] = (fill_alloc[fb] & fill_cache_new) | |
| (fill_cache_q[fb] & fill_busy_q[fb] & |
| icache_enable_i & ~icache_inval_i); |
| // Record whether the request hit in the cache |
| assign fill_hit_ic1[fb] = lookup_valid_ic1 & fill_in_ic1[fb] & tag_hit_ic1 & ~ecc_err_ic1; |
| assign fill_hit_d[fb] = fill_hit_ic1[fb] | (fill_hit_q[fb] & fill_busy_q[fb]); |
| |
| /////////////////////////////////////////// |
| // Fill buffer external request tracking // |
| /////////////////////////////////////////// |
| |
| // Make an external request |
| assign fill_ext_req[fb] = fill_busy_q[fb] & ~fill_ext_done_d[fb]; |
| |
| // Count the number of completed external requests (each line requires IC_LINE_BEATS requests) |
| assign fill_ext_cnt_d[fb] = fill_alloc[fb] ? |
| {{IC_LINE_BEATS_W{1'b0}},fill_spec_done} : |
| (fill_ext_cnt_q[fb] + {{IC_LINE_BEATS_W{1'b0}}, |
| fill_ext_arb[fb] & instr_gnt_i}); |
| // External request must be held until granted |
| assign fill_ext_hold_d[fb] = (fill_alloc[fb] & fill_spec_hold) | |
| (fill_ext_arb[fb] & ~instr_gnt_i); |
| // External requests are completed when the counter is filled or when the request is cancelled |
| assign fill_ext_done_d[fb] = (fill_ext_cnt_q[fb][IC_LINE_BEATS_W] | |
| // external requests are considered complete if the request hit |
| fill_hit_ic1[fb] | fill_hit_q[fb] | |
| // cancel if the line won't be cached and, it is stale |
| (~fill_cache_q[fb] & (branch_i | fill_stale_q[fb] | |
| // or we're already at the end of the line |
| fill_ext_beat[fb][IC_LINE_BEATS_W]))) & |
| // can't cancel while we are waiting for a grant on the bus |
| ~fill_ext_hold_q[fb] & fill_busy_q[fb]; |
| // Track whether this fill buffer expects to receive beats of data |
| assign fill_rvd_exp[fb] = fill_busy_q[fb] & ~fill_rvd_done[fb]; |
| // Count the number of rvalid beats received |
| assign fill_rvd_cnt_d[fb] = fill_alloc[fb] ? '0 : |
| (fill_rvd_cnt_q[fb] + |
| {{IC_LINE_BEATS_W{1'b0}},fill_rvd_arb[fb]}); |
| // External data is complete when all issued external requests have received their data |
| assign fill_rvd_done[fb] = (fill_ext_done_q[fb] & ~fill_ext_hold_q[fb]) & |
| (fill_rvd_cnt_q[fb] == fill_ext_cnt_q[fb]); |
| |
| ////////////////////////////////////// |
| // Fill buffer data output tracking // |
| ////////////////////////////////////// |
| |
| // Send data to the IF stage for requests that are not stale, have not completed their |
| // data output, and have data available to send. |
| // Data is available if: |
| // - The request hit in the cache |
| // - Buffered data is available (fill_rvd_cnt_q is ahead of fill_out_cnt_q) |
| // - Data is available from the bus this cycle (fill_rvd_arb) |
| assign fill_out_req[fb] = fill_busy_q[fb] & ~fill_stale_q[fb] & ~fill_out_done[fb] & |
| (fill_hit_ic1[fb] | fill_hit_q[fb] | |
| (fill_rvd_beat[fb] > fill_out_cnt_q[fb]) | fill_rvd_arb[fb]); |
| |
| // Calculate when a beat of data is output. Any ECC error squashes the output that cycle. |
| assign fill_out_grant[fb] = fill_out_arb[fb] & output_ready; |
| |
| // Count the beats of data output to the IF stage |
| assign fill_out_cnt_d[fb] = fill_alloc[fb] ? {1'b0,lookup_addr_ic0[IC_LINE_W-1:BUS_W]} : |
| (fill_out_cnt_q[fb] + |
| {{IC_LINE_BEATS_W{1'b0}},fill_out_grant[fb]}); |
| // Data output complete when the counter fills |
| assign fill_out_done[fb] = fill_out_cnt_q[fb][IC_LINE_BEATS_W]; |
| |
| ////////////////////////////////////// |
| // Fill buffer ram request tracking // |
| ////////////////////////////////////// |
| |
| // make a fill request once all data beats received |
| assign fill_ram_req[fb] = fill_busy_q[fb] & fill_rvd_cnt_q[fb][IC_LINE_BEATS_W] & |
| // unless the request hit, was non-allocating or got an error |
| ~fill_hit_q[fb] & fill_cache_q[fb] & ~|fill_err_q[fb] & |
| // or the request was already completed |
| ~fill_ram_done_q[fb]; |
| |
| // Record when a cache allocation request has been completed |
| assign fill_ram_done_d[fb] = fill_ram_arb[fb] | (fill_ram_done_q[fb] & fill_busy_q[fb]); |
| |
| ////////////////////////////// |
| // Fill buffer line offsets // |
| ////////////////////////////// |
| |
| // When we branch into the middle of a line, the output count will not start from zero. This |
| // beat count is used to know which incoming rdata beats are relevant. |
| assign fill_ext_beat[fb] = {1'b0,fill_addr_q[fb][IC_LINE_W-1:BUS_W]} + |
| fill_ext_cnt_q[fb][IC_LINE_BEATS_W:0]; |
| assign fill_ext_off[fb] = fill_ext_beat[fb][IC_LINE_BEATS_W-1:0]; |
| assign fill_rvd_beat[fb] = {1'b0,fill_addr_q[fb][IC_LINE_W-1:BUS_W]} + |
| fill_rvd_cnt_q[fb][IC_LINE_BEATS_W:0]; |
| assign fill_rvd_off[fb] = fill_rvd_beat[fb][IC_LINE_BEATS_W-1:0]; |
| |
| ///////////////////////////// |
| // Fill buffer arbitration // |
| ///////////////////////////// |
| |
| // Age based arbitration - all these signals are one-hot |
| assign fill_ext_arb[fb] = fill_ext_req[fb] & ~|(fill_ext_req & fill_older_q[fb]); |
| assign fill_ram_arb[fb] = fill_ram_req[fb] & fill_grant_ic0 & |
| ~|(fill_ram_req & fill_older_q[fb]); |
| // Calculate which fill buffer is the oldest one which still needs to output data to IF |
| assign fill_data_sel[fb] = ~|(fill_busy_q & ~fill_out_done & ~fill_stale_q & |
| fill_older_q[fb]); |
| // Arbitrate the request which has data available to send, and is the oldest outstanding |
| assign fill_out_arb[fb] = fill_out_req[fb] & fill_data_sel[fb]; |
| // Assign incoming rvalid data to the oldest fill buffer expecting it |
| assign fill_rvd_arb[fb] = instr_rvalid_i & fill_rvd_exp[fb] & |
| ~|(fill_rvd_exp & fill_older_q[fb]); |
| |
| ///////////////////////////// |
| // Fill buffer data muxing // |
| ///////////////////////////// |
| |
| // Output data muxing controls |
| // 1. Select data from the fill buffer data register |
| assign fill_data_reg[fb] = fill_busy_q[fb] & ~fill_stale_q[fb] & |
| ~fill_out_done[fb] & fill_data_sel[fb] & |
| // The incoming data is already ahead of the output count |
| ((fill_rvd_beat[fb] > fill_out_cnt_q[fb]) | fill_hit_q[fb] | |
| (|fill_err_q[fb])); |
| // 2. Select IC1 hit data |
| assign fill_data_hit[fb] = fill_busy_q[fb] & fill_hit_ic1[fb] & fill_data_sel[fb]; |
| // 3. Select incoming instr_rdata_i |
| assign fill_data_rvd[fb] = fill_busy_q[fb] & fill_rvd_arb[fb] & ~fill_hit_q[fb] & |
| ~fill_hit_ic1[fb] & ~fill_stale_q[fb] & ~fill_out_done[fb] & |
| // The incoming data lines up with the output count |
| (fill_rvd_beat[fb] == fill_out_cnt_q[fb]) & fill_data_sel[fb]; |
| |
| |
| /////////////////////////// |
| // Fill buffer registers // |
| /////////////////////////// |
| |
| // Fill buffer general enable |
| assign fill_entry_en[fb] = fill_alloc[fb] | fill_busy_q[fb]; |
| |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| fill_busy_q[fb] <= 1'b0; |
| fill_older_q[fb] <= '0; |
| fill_stale_q[fb] <= 1'b0; |
| fill_cache_q[fb] <= 1'b0; |
| fill_hit_q[fb] <= 1'b0; |
| fill_ext_cnt_q[fb] <= '0; |
| fill_ext_hold_q[fb] <= 1'b0; |
| fill_ext_done_q[fb] <= 1'b0; |
| fill_rvd_cnt_q[fb] <= '0; |
| fill_ram_done_q[fb] <= 1'b0; |
| fill_out_cnt_q[fb] <= '0; |
| end else if (fill_entry_en[fb]) begin |
| fill_busy_q[fb] <= fill_busy_d[fb]; |
| fill_older_q[fb] <= fill_older_d[fb]; |
| fill_stale_q[fb] <= fill_stale_d[fb]; |
| fill_cache_q[fb] <= fill_cache_d[fb]; |
| fill_hit_q[fb] <= fill_hit_d[fb]; |
| fill_ext_cnt_q[fb] <= fill_ext_cnt_d[fb]; |
| fill_ext_hold_q[fb] <= fill_ext_hold_d[fb]; |
| fill_ext_done_q[fb] <= fill_ext_done_d[fb]; |
| fill_rvd_cnt_q[fb] <= fill_rvd_cnt_d[fb]; |
| fill_ram_done_q[fb] <= fill_ram_done_d[fb]; |
| fill_out_cnt_q[fb] <= fill_out_cnt_d[fb]; |
| end |
| end |
| |
| //////////////////////////////////////// |
| // Fill buffer address / data storage // |
| //////////////////////////////////////// |
| |
| assign fill_addr_en[fb] = fill_alloc[fb]; |
| assign fill_way_en[fb] = (lookup_valid_ic1 & fill_in_ic1[fb]); |
| |
| if (ResetAll) begin : g_fill_addr_ra |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| fill_addr_q[fb] <= '0; |
| end else if (fill_addr_en[fb]) begin |
| fill_addr_q[fb] <= lookup_addr_ic0; |
| end |
| end |
| end else begin : g_fill_addr_nr |
| always_ff @(posedge clk_i) begin |
| if (fill_addr_en[fb]) begin |
| fill_addr_q[fb] <= lookup_addr_ic0; |
| end |
| end |
| end |
| |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| fill_way_q[fb] <= '0; |
| end else if (fill_way_en[fb]) begin |
| fill_way_q[fb] <= sel_way_ic1; |
| end |
| end |
| |
| // Data either comes from the cache or the bus. If there was an ECC error, we must take |
| // the incoming bus data since the cache hit data is corrupted. |
| assign fill_data_d[fb] = fill_hit_ic1[fb] ? hit_data_ic1 : |
| {IC_LINE_BEATS{instr_rdata_i}}; |
| |
| for (genvar b = 0; b < IC_LINE_BEATS; b++) begin : gen_data_buf |
| // Error tracking (per beat) |
| assign fill_err_d[fb][b] = (fill_rvd_arb[fb] & instr_err_i & |
| (fill_rvd_off[fb] == b[IC_LINE_BEATS_W-1:0])) | |
| // Hold the error once recorded |
| (fill_busy_q[fb] & fill_err_q[fb][b]); |
| |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| fill_err_q[fb][b] <= '0; |
| end else if (fill_entry_en[fb]) begin |
| fill_err_q[fb][b] <= fill_err_d[fb][b]; |
| end |
| end |
| |
| // Enable the relevant part of the data register (or all for cache hits) |
| // Ignore incoming rvalid data when we already have cache hit data |
| assign fill_data_en[fb][b] = fill_hit_ic1[fb] | |
| (fill_rvd_arb[fb] & ~fill_hit_q[fb] & |
| (fill_rvd_off[fb] == b[IC_LINE_BEATS_W-1:0])); |
| |
| if (ResetAll) begin : g_fill_data_ra |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| fill_data_q[fb][b*BUS_SIZE+:BUS_SIZE] <= '0; |
| end else if (fill_data_en[fb][b]) begin |
| fill_data_q[fb][b*BUS_SIZE+:BUS_SIZE] <= fill_data_d[fb][b*BUS_SIZE+:BUS_SIZE]; |
| end |
| end |
| end else begin : g_fill_data_nr |
| always_ff @(posedge clk_i) begin |
| if (fill_data_en[fb][b]) begin |
| fill_data_q[fb][b*BUS_SIZE+:BUS_SIZE] <= fill_data_d[fb][b*BUS_SIZE+:BUS_SIZE]; |
| end |
| end |
| end |
| |
| end |
| end |
| |
| //////////////////////////////// |
| // Fill buffer one-hot muxing // |
| //////////////////////////////// |
| |
| // External req info |
| always_comb begin |
| fill_ext_req_addr = '0; |
| for (int i = 0; i < NUM_FB; i++) begin |
| if (fill_ext_arb[i]) begin |
| fill_ext_req_addr |= {fill_addr_q[i][ADDR_W-1:IC_LINE_W], fill_ext_off[i]}; |
| end |
| end |
| end |
| |
| // Cache req info |
| always_comb begin |
| fill_ram_req_addr = '0; |
| fill_ram_req_way = '0; |
| fill_ram_req_data = '0; |
| for (int i = 0; i < NUM_FB; i++) begin |
| if (fill_ram_arb[i]) begin |
| fill_ram_req_addr |= fill_addr_q[i]; |
| fill_ram_req_way |= fill_way_q[i]; |
| fill_ram_req_data |= fill_data_q[i]; |
| end |
| end |
| end |
| |
| // IF stage output data |
| always_comb begin |
| fill_out_data = '0; |
| fill_out_err = '0; |
| for (int i = 0; i < NUM_FB; i++) begin |
| if (fill_data_reg[i]) begin |
| fill_out_data |= fill_data_q[i]; |
| // Ignore any speculative errors accumulated on cache hits |
| fill_out_err |= (fill_err_q[i] & ~{IC_LINE_BEATS{fill_hit_q[i]}}); |
| end |
| end |
| end |
| |
| /////////////////////// |
| // External requests // |
| /////////////////////// |
| |
| assign instr_req = ((~icache_enable_i | branch_i) & lookup_grant_ic0) | |
| (|fill_ext_req); |
| |
| assign instr_addr = |fill_ext_req ? fill_ext_req_addr : |
| lookup_addr_ic0[ADDR_W-1:BUS_W]; |
| |
| assign instr_req_o = instr_req; |
| assign instr_addr_o = {instr_addr[ADDR_W-1:BUS_W],{BUS_W{1'b0}}}; |
| |
| //////////////////////// |
| // Output data muxing // |
| //////////////////////// |
| |
| // Mux between line-width data sources |
| assign line_data = |fill_data_hit ? hit_data_ic1 : fill_out_data; |
| assign line_err = |fill_data_hit ? {IC_LINE_BEATS{1'b0}} : fill_out_err; |
| |
| // Mux the relevant beat of line data, based on the output address |
| always_comb begin |
| line_data_muxed = '0; |
| line_err_muxed = 1'b0; |
| for (int unsigned i = 0; i < IC_LINE_BEATS; i++) begin |
| // When data has been skidded, the output address is behind by one |
| if ((output_addr_q[IC_LINE_W-1:BUS_W] + {{IC_LINE_BEATS_W-1{1'b0}},skid_valid_q}) == |
| i[IC_LINE_BEATS_W-1:0]) begin |
| line_data_muxed |= line_data[i*32+:32]; |
| line_err_muxed |= line_err[i]; |
| end |
| end |
| end |
| |
| // Mux between incoming rdata and the muxed line data |
| assign output_data = |fill_data_rvd ? instr_rdata_i : line_data_muxed; |
| assign output_err = |fill_data_rvd ? instr_err_i : line_err_muxed; |
| |
| // Output data is valid (from any of the three possible sources). Note that fill_out_arb |
| // must be used here rather than fill_out_req because data can become valid out of order |
| // (e.g. cache hit data can become available ahead of an older outstanding miss). |
| assign data_valid = |fill_out_arb; |
| |
| // Skid buffer data |
| assign skid_data_d = output_data[31:16]; |
| |
| assign skid_en = data_valid & (ready_i | skid_ready); |
| |
| if (ResetAll) begin : g_skid_data_ra |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| skid_data_q <= '0; |
| skid_err_q <= '0; |
| end else if (skid_en) begin |
| skid_data_q <= skid_data_d; |
| skid_err_q <= output_err; |
| end |
| end |
| end else begin : g_skid_data_nr |
| always_ff @(posedge clk_i) begin |
| if (skid_en) begin |
| skid_data_q <= skid_data_d; |
| skid_err_q <= output_err; |
| end |
| end |
| end |
| |
| // The data in the skid buffer is ready if it's a complete compressed instruction or if there's |
| // an error (no need to wait for the second half) |
| assign skid_complete_instr = skid_valid_q & ((skid_data_q[1:0] != 2'b11) | skid_err_q); |
| |
| // Data can be loaded into the skid buffer for an unaligned uncompressed instruction |
| assign skid_ready = output_addr_q[1] & ~skid_valid_q & (~output_compressed | output_err); |
| |
| assign output_ready = (ready_i | skid_ready) & ~skid_complete_instr; |
| |
| assign output_compressed = (rdata_o[1:0] != 2'b11); |
| |
| assign skid_valid_d = |
| // Branches invalidate the skid buffer |
| branch_i ? 1'b0 : |
| // Once valid, the skid buffer stays valid until a compressed instruction realigns the stream |
| (skid_valid_q ? ~(ready_i & ((skid_data_q[1:0] != 2'b11) | skid_err_q)) : |
| // The skid buffer becomes valid when: |
| // - we branch to an unaligned uncompressed instruction |
| (data_valid & |
| (((output_addr_q[1] & (~output_compressed | output_err)) | |
| // - a compressed instruction misaligns the stream |
| (~output_addr_q[1] & output_compressed & ~output_err & ready_i))))); |
| |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| skid_valid_q <= 1'b0; |
| end else begin |
| skid_valid_q <= skid_valid_d; |
| end |
| end |
| |
| // Signal that valid data is available to the IF stage |
| // Note that if the first half of an unaligned instruction reports an error, we do not need |
| // to wait for the second half |
| // Compressed instruction completely satisfied by skid buffer |
| assign output_valid = skid_complete_instr | |
| // Output data available and, output stream aligned, or skid data available, |
| (data_valid & (~output_addr_q[1] | skid_valid_q | |
| // or this is an error or an unaligned compressed instruction |
| output_err | (output_data[17:16] != 2'b11))); |
| |
| // Update the address on branches and every time an instruction is driven |
| assign output_addr_en = branch_i | (ready_i & valid_o); |
| |
| // Increment the address by two every time a compressed instruction is popped |
| assign addr_incr_two = output_compressed & ~err_o; |
| |
| // Next IF stage PC |
| assign output_addr_incr = (output_addr_q[31:1] + |
| // Increment address by 4 or 2 |
| {29'd0, ~addr_incr_two, addr_incr_two}); |
| |
| // Redirect the address on branches |
| assign output_addr_d = branch_i ? addr_i[31:1] : output_addr_incr; |
| |
| if (ResetAll) begin : g_output_addr_ra |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| output_addr_q <= '0; |
| end else if (output_addr_en) begin |
| output_addr_q <= output_addr_d; |
| end |
| end |
| end else begin : g_output_addr_nr |
| always_ff @(posedge clk_i) begin |
| if (output_addr_en) begin |
| output_addr_q <= output_addr_d; |
| end |
| end |
| end |
| |
| // Mux the data from BUS_SIZE to halfword |
| // This muxing realigns data when instruction words are split across BUS_W e.g. |
| // word 1 |----|*h1*| |
| // word 0 |*h0*|----| --> |*h1*|*h0*| |
| // 31 15 0 31 15 0 |
| always_comb begin |
| output_data_lo = '0; |
| for (int unsigned i = 0; i < IC_OUTPUT_BEATS; i++) begin |
| if (output_addr_q[BUS_W-1:1] == i[BUS_W-2:0]) begin |
| output_data_lo |= output_data[i*16+:16]; |
| end |
| end |
| end |
| |
| always_comb begin |
| output_data_hi = '0; |
| for (int unsigned i = 0; i < IC_OUTPUT_BEATS - 1; i++) begin |
| if (output_addr_q[BUS_W-1:1] == i[BUS_W-2:0]) begin |
| output_data_hi |= output_data[(i+1)*16+:16]; |
| end |
| end |
| if (&output_addr_q[BUS_W-1:1]) begin |
| output_data_hi |= output_data[15:0]; |
| end |
| end |
| |
| assign valid_o = output_valid; |
| assign rdata_o = {output_data_hi, (skid_valid_q ? skid_data_q : output_data_lo)}; |
| assign addr_o = {output_addr_q, 1'b0}; |
| assign err_o = (skid_valid_q & skid_err_q) | (~skid_complete_instr & output_err); |
| // Error caused by the second half of a misaligned uncompressed instruction |
| // (only relevant when err_o is set) |
| assign err_plus2_o = skid_valid_q & ~skid_err_q; |
| |
| /////////////////// |
| // Invalidations // |
| /////////////////// |
| |
| // Invalidation (writing all entries in the tag RAM with an invalid tag) occurs straight out of |
| // reset and after any invalidation request (signalled via icache_inval_i). An invalidation |
| // request coming whilst another is writing tags causes the invalidation to start again. This |
| // ensures a new scramble key is requested where a previous one is in use. |
| // TODO: Ditch this behaviour for non-secure ibex? |
| always_comb begin |
| inval_state_d = inval_state_q; |
| inval_index_d = inval_index_q; |
| inval_index_en = 1'b0; |
| inval_write_req = 1'b0; |
| ic_scr_key_req_o = 1'b0; |
| |
| // Prevent other cache activity (cache lookups and cache allocations) whilst an invalidation is |
| // in progress. Set to 1 by default as the only time we don't block is when the state machine is |
| // IDLE. |
| inval_block_cache = 1'b1; |
| |
| unique case (inval_state_q) |
| OUT_OF_RESET: begin |
| // Initial state, this initialises the tag RAMs out of reset before the icache can be used |
| inval_state_d = AWAIT_SCRAMBLE_KEY; |
| |
| if (~ic_scr_key_valid_i) begin |
| ic_scr_key_req_o = 1'b1; |
| end |
| end |
| AWAIT_SCRAMBLE_KEY: begin |
| // When invalidating a new scrambling key is requested on all invalidation requests. Wait |
| // for that new key to be available before beginning with the actual invalidation (cannot |
| // write to the tag RAM until we have the new scrambling key that will be used). Ignore any |
| // requests in this phase (once a scramble key request has started we cannot request a new |
| // one until the on-going request is done). |
| if (ic_scr_key_valid_i) begin |
| inval_state_d = INVAL_CACHE; |
| inval_index_d = '0; |
| inval_index_en = 1'b1; |
| end |
| end |
| INVAL_CACHE: begin |
| // Actually invalidate the cache. Write every entry in the tag RAM with an invalid tag. Once |
| // all are written we're done. |
| inval_write_req = 1'b1; |
| inval_index_d = (inval_index_q + {{IC_INDEX_W-1{1'b0}},1'b1}); |
| inval_index_en = 1'b1; |
| |
| if (icache_inval_i) begin |
| // If a new invalidaiton requests comes in go back to the beginning with a new scramble |
| // key |
| ic_scr_key_req_o = 1'b1; |
| inval_state_d = AWAIT_SCRAMBLE_KEY; |
| end else if (&inval_index_q) begin |
| // When the final index is written we're done |
| inval_state_d = IDLE; |
| end |
| end |
| IDLE: begin |
| // Usual running state |
| if (icache_inval_i) begin |
| ic_scr_key_req_o = 1'b1; |
| inval_state_d = AWAIT_SCRAMBLE_KEY; |
| end else begin |
| // Allow other cache activies whilst in IDLE and no invalidation has been requested |
| inval_block_cache = 1'b0; |
| end |
| end |
| default: ; |
| endcase |
| end |
| |
| assign inval_active = inval_state_q != IDLE; |
| |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| inval_state_q <= OUT_OF_RESET; |
| end else begin |
| inval_state_q <= inval_state_d; |
| end |
| end |
| |
| if (ResetAll) begin : g_inval_index_ra |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| inval_index_q <= '0; |
| end else if (inval_index_en) begin |
| inval_index_q <= inval_index_d; |
| end |
| end |
| end else begin : g_inval_index_nr |
| always_ff @(posedge clk_i) begin |
| if (inval_index_en) begin |
| inval_index_q <= inval_index_d; |
| end |
| end |
| end |
| |
| ///////////////// |
| // Busy status // |
| ///////////////// |
| |
| // Only busy (for WFI purposes) while an invalidation is in-progress, or external requests are |
| // outstanding. |
| assign busy_o = inval_active | (|(fill_busy_q & ~fill_rvd_done)); |
| |
| //////////////// |
| // Assertions // |
| //////////////// |
| |
| `ASSERT_INIT(size_param_legal, (IC_LINE_SIZE > 32)) |
| |
| // ECC primitives will need to be changed for different sizes |
| `ASSERT_INIT(ecc_tag_param_legal, (IC_TAG_SIZE <= 27)) |
| `ASSERT_INIT(ecc_data_param_legal, !ICacheECC || (BUS_SIZE == 32)) |
| |
| // Lookups in the tag ram should always give a known result |
| `ASSERT_KNOWN(TagHitKnown, lookup_valid_ic1 & tag_hit_ic1) |
| `ASSERT_KNOWN(TagInvalidKnown, lookup_valid_ic1 & tag_invalid_ic1) |
| |
| // This is only used for the Yosys-based formal flow. Once we have working bind support, we can |
| // get rid of it. |
| `ifdef FORMAL |
| `ifdef YOSYS |
| // Unfortunately, Yosys doesn't support passing unpacked arrays as ports. Explicitly pack up the |
| // signals we need. |
| logic [NUM_FB-1:0][ADDR_W-1:0] packed_fill_addr_q; |
| always_comb begin |
| for (int i = 0; i < NUM_FB; i++) begin |
| packed_fill_addr_q[i][ADDR_W-1:0] = fill_addr_q[i]; |
| end |
| end |
| |
| `include "formal_tb_frag.svh" |
| `endif |
| `endif |
| |
| |
| endmodule |