| // description: |
| // 1. Dispatch unit receives uop instructions from uop queue |
| // 2. Dispatch unit check rules to determine if the uops are sent to reservation stations(RS). |
| // There are two ways to solve: |
| // a. stall pipeline |
| // b. foreward data from ROB |
| // 3. Dispatch unit read vector data from VRF for uops. |
| // |
| // feature list: |
| // 1. Dispatch module can issue 2 uops at most. |
| // a. Uop sequence must be in-order. |
| // b. Issuing uop(s) use valid-ready handshake mechanism. |
| // 2. Dispatch rules |
| // a. RAW data hazard: |
| // I. uop0 Vs rob_entry(s). if rob_entry.vd_valid is 'b0, then stall pipeline (do not issue uop0) |
| // II.uop1 Vs rob_entry(s). if rob_entry.vd_valid is 'b0, then do not issue uop1 |
| // II.uop1 Vs uop0. if uop0.vd_valid is the src of uop1, then do not issue uop0 |
| // b. Structure hazard: |
| // I. the src-operand number of uops is more than 4, then only issue uop0 |
| |
| `include "rvv_backend.svh" |
| `include "rvv_backend_dispatch.svh" |
| |
| module rvv_backend_dispatch |
| ( |
| clk, |
| rst_n, |
| uop_valid_uop2dp, |
| uop_uop2dp, |
| uop_ready_dp2uop, |
| rs_valid_dp2alu, |
| rs_dp2alu, |
| rs_ready_alu2dp, |
| rs_valid_dp2pmtrdt, |
| rs_dp2pmtrdt, |
| rs_ready_pmtrdt2dp, |
| rs_valid_dp2mul, |
| rs_dp2mul, |
| rs_ready_mul2dp, |
| rs_valid_dp2div, |
| rs_dp2div, |
| rs_ready_div2dp, |
| rs_valid_dp2lsu, |
| rs_dp2lsu, |
| rs_ready_lsu2dp, |
| uop_valid_dp2rob, |
| uop_dp2rob, |
| uop_ready_rob2dp, |
| uop_index_rob2dp, |
| rd_index_dp2vrf, |
| rd_data_vrf2dp, |
| v0_mask_vrf2dp, |
| rob_entry |
| ); |
| // ---port definition------------------------------------------------- |
| // global signal |
| input logic clk; |
| input logic rst_n; |
| |
| // Uops Queue to Dispatch unit |
| input logic [`NUM_DP_UOP-1:0] uop_valid_uop2dp; |
| input UOP_QUEUE_t [`NUM_DP_UOP-1:0] uop_uop2dp; |
| output logic [`NUM_DP_UOP-1:0] uop_ready_dp2uop; |
| |
| // Dispatch unit sends oprations to reservation stations |
| // Dispatch unit to ALU reservation station |
| // rs_*: reservation station |
| output logic [`NUM_DP_UOP-1:0] rs_valid_dp2alu; |
| output ALU_RS_t [`NUM_DP_UOP-1:0] rs_dp2alu; |
| input logic [`NUM_DP_UOP-1:0] rs_ready_alu2dp; |
| |
| // Dispatch unit to PMT+RDT reservation station |
| output logic [`NUM_DP_UOP-1:0] rs_valid_dp2pmtrdt; |
| output PMT_RDT_RS_t [`NUM_DP_UOP-1:0] rs_dp2pmtrdt; |
| input logic [`NUM_DP_UOP-1:0] rs_ready_pmtrdt2dp; |
| |
| // Dispatch unit to MUL reservation station |
| output logic [`NUM_DP_UOP-1:0] rs_valid_dp2mul; |
| output MUL_RS_t [`NUM_DP_UOP-1:0] rs_dp2mul; |
| input logic [`NUM_DP_UOP-1:0] rs_ready_mul2dp; |
| |
| // Dispatch unit to DIV reservation station |
| output logic [`NUM_DP_UOP-1:0] rs_valid_dp2div; |
| output DIV_RS_t [`NUM_DP_UOP-1:0] rs_dp2div; |
| input logic [`NUM_DP_UOP-1:0] rs_ready_div2dp; |
| |
| // Dispatch unit to LSU reservation station |
| output logic [`NUM_DP_UOP-1:0] rs_valid_dp2lsu; |
| output LSU_RS_t [`NUM_DP_UOP-1:0] rs_dp2lsu; |
| input logic [`NUM_DP_UOP-1:0] rs_ready_lsu2dp; |
| |
| // Dispatch unit pushes operations to ROB unit |
| output logic [`NUM_DP_UOP-1:0] uop_valid_dp2rob; |
| output DP2ROB_t [`NUM_DP_UOP-1:0] uop_dp2rob; |
| input logic [`NUM_DP_UOP-1:0] uop_ready_rob2dp; |
| input logic [`ROB_DEPTH_WIDTH-1:0] uop_index_rob2dp; |
| |
| // Dispatch unit sends read request to VRF for vector data. |
| // Dispatch unit to VRF unit |
| // rd_data would be return from VRF at the current cycle. |
| output logic [`NUM_DP_VRF-1:0][`REGFILE_INDEX_WIDTH-1:0] rd_index_dp2vrf; |
| input logic [`NUM_DP_VRF-1:0][`VLEN-1:0] rd_data_vrf2dp; |
| input logic [`VLEN-1:0] v0_mask_vrf2dp; |
| |
| // Dispatch unit accept all ROB entry to determine if vs_data of RS is from ROB or not |
| // ROB unit to Dispatch unit |
| input ROB2DP_t [`ROB_DEPTH-1:0] rob_entry; |
| |
| // ---internal signal definition-------------------------------------- |
| SUC_UOP_RAW_t [`NUM_DP_UOP-1:0] suc_uop; |
| PRE_UOP_RAW_t [`ROB_DEPTH-1:0] pre_uop_rob; |
| PRE_UOP_RAW_t [`NUM_DP_UOP-2:0] pre_uop_uop; |
| RAW_UOP_ROB_t [`NUM_DP_UOP-1:0] raw_uop_rob; |
| // uop0 is the first uop so no need raw check between uops for it |
| RAW_UOP_UOP_t [`NUM_DP_UOP-1:1] raw_uop_uop; |
| |
| STRCT_UOP_t [`NUM_DP_UOP-1:0] strct_uop; |
| ARCH_HAZARD_t arch_hazard; |
| |
| UOP_OPN_t [`NUM_DP_UOP-1:0] uop_operand; |
| UOP_OPN_t [`NUM_DP_UOP-1:0] vrf_byp; |
| ROB_BYP_t [`ROB_DEPTH-1:0] rob_byp; |
| |
| UOP_CTRL_t [`NUM_DP_UOP-1:0] uop_ctrl; |
| |
| UOP_INFO_t [`NUM_DP_UOP-1:0] uop_info; |
| UOP_OPN_BYTE_TYPE_t [`NUM_DP_UOP-1:0] uop_operand_byte_type; |
| |
| // ---code start------------------------------------------------------ |
| genvar i; |
| generate |
| for (i=0; i<`NUM_DP_UOP; i++) begin : gen_suc_uop |
| assign suc_uop[i].vs1_index = uop_uop2dp[i].vs1; |
| assign suc_uop[i].vs1_valid = uop_uop2dp[i].vs1_index_valid; |
| assign suc_uop[i].vs2_index = uop_uop2dp[i].vs2_index; |
| assign suc_uop[i].vs2_valid = uop_uop2dp[i].vs2_valid; |
| assign suc_uop[i].vd_index = uop_uop2dp[i].vd_index; |
| assign suc_uop[i].vs3_valid = uop_uop2dp[i].vs3_valid; |
| assign suc_uop[i].vm = uop_uop2dp[i].vm; |
| end |
| endgenerate |
| // RAW data hazard check between uop[*] and ROB |
| generate |
| for (i=0; i<`ROB_DEPTH; i++) begin : gen_pre_uop_rob |
| assign pre_uop_rob[i].w_index = rob_entry[i].w_index; |
| assign pre_uop_rob[i].w_valid = rob_entry[i].w_valid; |
| assign pre_uop_rob[i].valid = rob_entry[i].valid; |
| end |
| for (i=0; i<`NUM_DP_UOP; i++) begin : gen_raw_uop_rob |
| rvv_backend_dispatch_raw_uop_rob #( |
| ) u_raw_uop_rob ( |
| .raw_uop_rob (raw_uop_rob[i]), |
| .suc_uop (suc_uop[i]), |
| .pre_uop (pre_uop_rob) |
| ); |
| end |
| endgenerate |
| |
| // RAW data hazard check between uop(s) |
| generate |
| for (i=0; i<`NUM_DP_UOP-1; i++) begin : gen_pre_uop_uop |
| assign pre_uop_uop[i].w_index = uop_uop2dp[i].vd_index; |
| assign pre_uop_uop[i].w_valid = 1'b0; |
| assign pre_uop_uop[i].valid = uop_uop2dp[i].vd_valid & uop_valid_uop2dp[i]; |
| end |
| for (i=1; i<`NUM_DP_UOP; i++) begin : gen_raw_uop_uop |
| rvv_backend_dispatch_raw_uop_uop #( |
| .PREUOP_NUM (i) |
| ) u_raw_uop_uop ( |
| .raw_uop_uop (raw_uop_uop[i]), |
| .suc_uop (suc_uop[i]), |
| .pre_uop (pre_uop_uop[i-1:0]) |
| ); |
| end |
| endgenerate |
| |
| // Structure hazard check and set read index for VRF |
| generate |
| for (i=0; i<`NUM_DP_UOP; i++) begin : gen_strct_uop |
| assign strct_uop[i].vs1_index = uop_uop2dp[i].vs1; |
| assign strct_uop[i].vs2_index = uop_uop2dp[i].vs2_index; |
| assign strct_uop[i].vd_index = uop_uop2dp[i].vd_index; |
| assign strct_uop[i].uop_exe_unit = uop_uop2dp[i].uop_exe_unit; |
| assign strct_uop[i].uop_class = uop_uop2dp[i].uop_class; |
| end |
| endgenerate |
| |
| rvv_backend_dispatch_structure_hazard #( |
| ) u_structure_hazard ( |
| .rd_index (rd_index_dp2vrf), |
| .arch_hazard (arch_hazard), |
| .strct_uop (strct_uop) |
| ); |
| |
| // Bypass data for source operand of uop(s) |
| generate |
| for (i=0; i<`ROB_DEPTH; i++) begin : gen_rob_byp |
| assign rob_byp[i].w_data = rob_entry[i].w_data; |
| assign rob_byp[i].byte_type = rob_entry[i].byte_type; |
| `ifdef AGNOSTIC_ONE |
| assign rob_byp[i].tail_one = rob_entry[i].vector_csr.vtype.vta; |
| assign rob_byp[i].inactive_one = rob_entry[i].vector_csr.vtype.vma; |
| `else |
| assign rob_byp[i].tail_one = 1'b0; |
| assign rob_byp[i].inactive_one = 1'b0; |
| `endif |
| end |
| for (i=0; i<`NUM_DP_UOP; i++) begin : gen_bypass |
| assign vrf_byp[i].vs1 = rd_data_vrf2dp[2*i+1]; |
| assign vrf_byp[i].vs2 = rd_data_vrf2dp[2*i]; |
| if (i == 0) assign vrf_byp[i].vd = rd_data_vrf2dp[3]; // i == 0 |
| else assign vrf_byp[i].vd = rd_data_vrf2dp[1]; // i == 1 |
| assign vrf_byp[i].v0 = v0_mask_vrf2dp; |
| |
| rvv_backend_dispatch_bypass #( |
| ) u_bypass ( |
| .uop_operand (uop_operand[i]), |
| .rob_byp (rob_byp), |
| .vrf_byp (vrf_byp[i]), |
| .raw_uop_rob (raw_uop_rob[i]) |
| ); |
| end |
| endgenerate |
| |
| // Control handshae mechanism for uop_queue <-> dispath, dispatch <-> rs and dispatch <-> rob |
| generate |
| for (i=0; i<`NUM_DP_UOP; i++) begin : gen_uop_ctrl |
| assign uop_ctrl[i] = uop_uop2dp[i].uop_exe_unit; |
| end |
| endgenerate |
| |
| rvv_backend_dispatch_ctrl #( |
| ) u_ctrl ( |
| // ctrl input signal |
| .raw_uop_rob (raw_uop_rob), |
| .raw_uop_uop (raw_uop_uop), |
| .arch_hazard (arch_hazard), |
| .uop_ctrl (uop_ctrl), |
| // handshake signals |
| .uop_valid_uop2dp (uop_valid_uop2dp), |
| .uop_ready_dp2uop (uop_ready_dp2uop), |
| .rs_valid_dp2alu (rs_valid_dp2alu), |
| .rs_ready_alu2dp (rs_ready_alu2dp), |
| .rs_valid_dp2pmtrdt (rs_valid_dp2pmtrdt), |
| .rs_ready_pmtrdt2dp (rs_ready_pmtrdt2dp), |
| .rs_valid_dp2mul (rs_valid_dp2mul), |
| .rs_ready_mul2dp (rs_ready_mul2dp), |
| .rs_valid_dp2div (rs_valid_dp2div), |
| .rs_ready_div2dp (rs_ready_div2dp), |
| .rs_valid_dp2lsu (rs_valid_dp2lsu), |
| .rs_ready_lsu2dp (rs_ready_lsu2dp), |
| .uop_valid_dp2rob (uop_valid_dp2rob), |
| .uop_ready_rob2dp (uop_ready_rob2dp) |
| ); |
| |
| // determine the type for each byte in uop's vector operands |
| generate |
| for (i=0; i<`NUM_DP_UOP; i++) begin : gen_opr_bype_type |
| assign uop_info[i].uop_index = uop_uop2dp[i].uop_index; |
| assign uop_info[i].vd_eew = uop_uop2dp[i].vd_eew; |
| assign uop_info[i].vs1_eew = uop_uop2dp[i].vs1_eew; |
| assign uop_info[i].vs2_eew = uop_uop2dp[i].vs2_eew; |
| assign uop_info[i].vstart = uop_uop2dp[i].vector_csr.vstart; |
| assign uop_info[i].vl = uop_uop2dp[i].vector_csr.vl; |
| assign uop_info[i].vm = uop_uop2dp[i].vm; |
| |
| rvv_backend_dispatch_opr_byte_type #( |
| ) u_opr_byte_type ( |
| .operand_byte_type (uop_operand_byte_type[i]), |
| .uop_info (uop_info[i]), |
| .v0_enable (uop_operand[i].v0) |
| ); |
| end |
| endgenerate |
| |
| // output signals for RS+ROB |
| generate |
| for (i=0; i<`NUM_DP_UOP; i++) begin : gen_output_sig |
| // ALU RS |
| assign rs_dp2alu[i].rob_entry = uop_index_rob2dp + i; |
| assign rs_dp2alu[i].uop_funct6 = uop_uop2dp[i].uop_funct6; |
| assign rs_dp2alu[i].uop_funct3 = uop_uop2dp[i].uop_funct3; |
| assign rs_dp2alu[i].vstart = uop_uop2dp[i].vector_csr.vstart; |
| assign rs_dp2alu[i].vm = uop_uop2dp[i].vm; |
| assign rs_dp2alu[i].vxrm = uop_uop2dp[i].vector_csr.xrm; |
| assign rs_dp2alu[i].v0_data = uop_operand[i].v0; |
| assign rs_dp2alu[i].v0_data_valid = uop_uop2dp[i].v0_valid; |
| assign rs_dp2alu[i].vd_data = uop_operand[i].vd; |
| assign rs_dp2alu[i].vd_data_valid = uop_uop2dp[i].vs3_valid; |
| assign rs_dp2alu[i].vd_eew = uop_uop2dp[i].vd_eew; |
| assign rs_dp2alu[i].vs1 = uop_uop2dp[i].vs1; |
| assign rs_dp2alu[i].vs1_data = uop_operand[i].vs1; |
| assign rs_dp2alu[i].vs1_data_valid= uop_uop2dp[i].vs1_index_valid; |
| assign rs_dp2alu[i].vs2_data = uop_operand[i].vs2; |
| assign rs_dp2alu[i].vs2_data_valid= uop_uop2dp[i].vs2_valid; |
| assign rs_dp2alu[i].vs2_eew = uop_uop2dp[i].vs2_eew; |
| assign rs_dp2alu[i].rs1_data = uop_uop2dp[i].rs1_data; |
| assign rs_dp2alu[i].rs1_data_valid= uop_uop2dp[i].rs1_data_valid; |
| assign rs_dp2alu[i].uop_index = uop_uop2dp[i].uop_index; |
| |
| // PMTRDT RS |
| assign rs_dp2pmtrdt[i].rob_entry = uop_index_rob2dp + i; |
| assign rs_dp2pmtrdt[i].uop_funct6 = uop_uop2dp[i].uop_funct6; |
| assign rs_dp2pmtrdt[i].uop_funct3 = uop_uop2dp[i].uop_funct3; |
| assign rs_dp2pmtrdt[i].vm = uop_uop2dp[i].vm; |
| assign rs_dp2pmtrdt[i].vd_eew = uop_uop2dp[i].vd_eew; |
| assign rs_dp2pmtrdt[i].vs1 = uop_uop2dp[i].vs1; |
| assign rs_dp2pmtrdt[i].vs1_data = uop_operand[i].vs1; |
| assign rs_dp2pmtrdt[i].vs1_eew = uop_uop2dp[i].vs1_eew; |
| assign rs_dp2pmtrdt[i].vs1_data_valid= uop_uop2dp[i].vs1_index_valid; |
| assign rs_dp2pmtrdt[i].vs1_type = uop_operand_byte_type[i].vs1; |
| assign rs_dp2pmtrdt[i].vs2_data = uop_operand[i].vs2; |
| assign rs_dp2pmtrdt[i].vs2_eew = uop_uop2dp[i].vs2_eew; |
| assign rs_dp2pmtrdt[i].vs2_data_valid= uop_uop2dp[i].vs2_valid; |
| assign rs_dp2pmtrdt[i].vs2_type = uop_operand_byte_type[i].vs2; |
| assign rs_dp2pmtrdt[i].rs1_data = uop_uop2dp[i].rs1_data; |
| assign rs_dp2pmtrdt[i].rs1_data_valid= uop_uop2dp[i].rs1_data_valid; |
| assign rs_dp2pmtrdt[i].last_uop_valid= uop_uop2dp[i].last_uop_valid; |
| |
| // MUL/MAC RS |
| assign rs_dp2mul[i].rob_entry = uop_index_rob2dp + i; |
| assign rs_dp2mul[i].uop_funct6 = uop_uop2dp[i].uop_funct6; |
| assign rs_dp2mul[i].uop_funct3 = uop_uop2dp[i].uop_funct3; |
| assign rs_dp2mul[i].vxrm = uop_uop2dp[i].vector_csr.xrm; |
| assign rs_dp2mul[i].vd_eew = uop_uop2dp[i].vd_eew; |
| assign rs_dp2mul[i].vs1_data = uop_operand[i].vs1; |
| assign rs_dp2mul[i].vs_eew = uop_uop2dp[i].vs1_eew; |
| assign rs_dp2mul[i].vs1_data_valid= uop_uop2dp[i].vs1_index_valid; |
| assign rs_dp2mul[i].vs1_type = uop_operand_byte_type[i].vs1; |
| assign rs_dp2mul[i].vs2_data = uop_operand[i].vs2; |
| assign rs_dp2mul[i].vs2_data_valid= uop_uop2dp[i].vs2_valid; |
| assign rs_dp2mul[i].vs2_type = uop_operand_byte_type[i].vs2; |
| assign rs_dp2mul[i].vs3_data = uop_operand[i].vd; |
| assign rs_dp2mul[i].vs3_eew = uop_uop2dp[i].vd_eew; |
| assign rs_dp2mul[i].vs3_data_valid = uop_uop2dp[i].vs3_valid; |
| assign rs_dp2mul[i].vs3_type = uop_operand_byte_type[i].vd; |
| assign rs_dp2mul[i].rs1_data = uop_uop2dp[i].rs1_data; |
| assign rs_dp2mul[i].rs1_data_valid= uop_uop2dp[i].rs1_data_valid; |
| |
| // DIV RS |
| assign rs_dp2div[i].rob_entry = uop_index_rob2dp + i; |
| assign rs_dp2div[i].uop_funct6 = uop_uop2dp[i].uop_funct6; |
| assign rs_dp2div[i].uop_funct3 = uop_uop2dp[i].uop_funct3; |
| assign rs_dp2div[i].vd_eew = uop_uop2dp[i].vd_eew; |
| assign rs_dp2div[i].vs1_data = uop_operand[i].vs1; |
| assign rs_dp2div[i].vs1_eew = uop_uop2dp[i].vs1_eew; |
| assign rs_dp2div[i].vs1_data_valid= uop_uop2dp[i].vs1_index_valid; |
| assign rs_dp2div[i].vs1_type = uop_operand_byte_type[i].vs1; |
| assign rs_dp2div[i].vs2_data = uop_operand[i].vs2; |
| assign rs_dp2div[i].vs2_eew = uop_uop2dp[i].vs2_eew; |
| assign rs_dp2div[i].vs2_data_valid= uop_uop2dp[i].vs2_valid; |
| assign rs_dp2div[i].vs2_type = uop_operand_byte_type[i].vs2; |
| assign rs_dp2div[i].rs1_data = uop_uop2dp[i].rs1_data; |
| assign rs_dp2div[i].rs1_data_valid= uop_uop2dp[i].rs1_data_valid; |
| |
| // LSU RS |
| assign rs_dp2lsu[i].uop_pc = uop_uop2dp[i].uop_pc; |
| assign rs_dp2lsu[i].uop_id = uop_index_rob2dp + i; |
| assign rs_dp2lsu[i].uop_funct6 = uop_uop2dp[i].uop_funct6; |
| assign rs_dp2lsu[i].vidx_valid = uop_uop2dp[i].vs2_valid; |
| assign rs_dp2lsu[i].vidx_addr = uop_uop2dp[i].vs2_index; |
| assign rs_dp2lsu[i].vidx_data = uop_operand[i].vs2; |
| assign rs_dp2lsu[i].vregfile_read_data = uop_operand[i].vd; |
| assign rs_dp2lsu[i].vregfile_read_addr = uop_uop2dp[i].vd_index; |
| assign rs_dp2lsu[i].vregfile_read_valid = uop_uop2dp[i].vs3_valid; |
| assign rs_dp2lsu[i].vs3_type = uop_operand_byte_type[i].vd; |
| |
| // ROB |
| assign uop_dp2rob[i].w_index = uop_operand[i].vd; |
| assign uop_dp2rob[i].byte_type = uop_operand_byte_type[i].vd; |
| assign uop_dp2rob[i].vector_csr = uop_uop2dp[i].vector_csr; |
| end |
| endgenerate |
| |
| endmodule |