blob: 428b252443a35aa593ec3b458026a2481fab620c [file] [log] [blame]
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// A module that assembles RVVInstructions into RVVCmd before storing into the
// RVVInstructionQueue. It's also responsible for handling architectural
// configuration state (ie. LMUL, SEW). Inputs to this module maybe unaligned
// (ie [invalid, valid, valid, invalid]) while outputs will always be aligned
// (ie [valid, valid, invalid, invalid]).
// Arguments from the scalar register file (for vx or configuration
// instructions) arrive one cycle after the Instruction is dispatched, so this
// module introduces one cycle of latency before putting the command into the
// queue.
module RvvFrontEnd#(parameter N = 4,
parameter CAPACITYBITS=$clog2(2*N + 1))
(
input clk,
input rstn,
input logic [`VSTART_WIDTH-1:0] vstart_i,
input logic [`VCSR_VXRM_WIDTH-1:0] vxrm_i,
input logic [`VCSR_VXSAT_WIDTH-1:0] vxsat_i,
// Instruction input.
input logic [N-1:0] inst_valid_i,
input RVVInstruction [N-1:0] inst_data_i,
output logic [N-1:0] inst_ready_o,
// Register file input
input logic [(2*N)-1:0] reg_read_valid_i,
input logic [(2*N)-1:0][31:0] reg_read_data_i,
// Scalar Regfile writeback for configuration functions.
output logic [N-1:0] reg_write_valid_o,
output logic [N-1:0][4:0] reg_write_addr_o,
output logic [N-1:0][31:0] reg_write_data_o,
// Command output.
output logic [N-1:0] cmd_valid_o,
output RVVCmd [N-1:0] cmd_data_o,
input logic [CAPACITYBITS-1:0] queue_capacity_i, // Number of elements that can be enqueued
output logic [CAPACITYBITS-1:0] queue_capacity_o,
// Config state
output config_state_valid,
output RVVConfigState config_state
);
localparam COUNTBITS = $clog2(N + 1);
typedef logic [COUNTBITS-1:0] count_t;
// vtype architectural state
logic vill;
RVVConfigState config_state_q;
// Instructions to assemble into commands
logic [N-1:0] valid_inst_q; // If the instruction in this slot is valid
count_t valid_inst_count_q; // The sum of valid_inst_q
RVVInstruction inst_q [N-1:0]; // The instruction in the slot
// Backpressure
count_t valid_in_psum [N:0];
always_comb begin
valid_in_psum[0] = 0;
for (int i = 0; i < N; i++) begin
valid_in_psum[i+1] = valid_in_psum[i] + inst_valid_i[i];
end
end
// State, for time being lets do not state forwarding for timing
logic config_state_reduction;
always_comb begin
config_state_reduction = 1;
for (int i = 0; i < N; i++) begin
config_state_reduction = config_state_reduction & (!valid_inst_q[i]);
end
end
assign config_state_valid = config_state_reduction;
assign config_state = config_state_q;
logic [CAPACITYBITS-1:0] queue_capacity;
assign queue_capacity_o = queue_capacity;
always_comb begin
queue_capacity = queue_capacity_i - valid_inst_count_q;
end
logic inst_accepted [N-1:0];
count_t valid_inst_count_d;
always_comb begin
for (int i = 0; i < N; i++) begin
inst_accepted[i] = (valid_in_psum[i] < queue_capacity) && inst_valid_i[i];
inst_ready_o[i] = inst_accepted[i];
end
valid_inst_count_d = (valid_in_psum[N] < queue_capacity) ?
valid_in_psum[N] : queue_capacity;
end
always_ff @(posedge clk or negedge rstn) begin
if (!rstn) begin
for (int i = 0; i < N; i++) begin
valid_inst_q[i] <= 0;
valid_inst_count_q <= 0;
end;
end else begin
for (int i = 0; i < N; i++) begin
valid_inst_q[i] <= inst_accepted[i];
valid_inst_count_q <= valid_inst_count_d;
inst_q[i] <= inst_data_i[i];
end
end
end
// Update configuration architectural state
RVVConfigState inst_config_state [N:0];
logic is_setvl [N-1:0];
always_comb begin
inst_config_state[0] = config_state_q;
inst_config_state[0].vstart = vstart_i;
inst_config_state[0].xrm = RVVXRM'(vxrm_i);
//inst_config_state[0].xsat = vxsat_i;
for (int i = 0; i < N; i++) begin
inst_config_state[i+1] = inst_config_state[i];
is_setvl[i] = 0;
if (valid_inst_q[i] &&
(inst_q[i].opcode == RVV) &&
(inst_q[i].bits[7:5] == 3'b111)) begin
if (inst_q[i].bits[24] == 0) begin // vsetvli
inst_config_state[i+1].vl = reg_read_data_i[2*i];
inst_config_state[i+1].lmul = RVVLMUL'(inst_q[i].bits[15:13]);
inst_config_state[i+1].sew = RVVSEW'(inst_q[i].bits[18:16]);
inst_config_state[i+1].ta = inst_q[i].bits[19];
inst_config_state[i+1].ma = inst_q[i].bits[20];
is_setvl[i] = 1;
end else if (inst_q[i].bits[24:23] == 2'b11) begin // vsetivli
inst_config_state[i+1].vl =
{{(`VL_WIDTH - 5){1'b0}}, inst_q[i].bits[12:8]};
inst_config_state[i+1].lmul = RVVLMUL'(inst_q[i].bits[15:13]);
inst_config_state[i+1].sew = RVVSEW'(inst_q[i].bits[18:16]);
inst_config_state[i+1].ta = inst_q[i].bits[19];
inst_config_state[i+1].ma = inst_q[i].bits[20];
is_setvl[i] = 1;
end else if (inst_q[i].bits[24:23] == 2'b10) begin // vsetvl
inst_config_state[i+1].vl = reg_read_data_i[2*i];
inst_config_state[i+1].lmul =
RVVLMUL'(reg_read_data_i[(2*i) + 1][2:0]);
inst_config_state[i+1].sew =
RVVSEW'(reg_read_data_i[(2*i) + 1][5:3]);
inst_config_state[i+1].ta = reg_read_data_i[(2*i) + 1][6];
inst_config_state[i+1].ma = reg_read_data_i[(2*i) + 1][7];
is_setvl[i] = 1;
end
end
end
end
always_ff @(posedge clk or negedge rstn) begin
if (!rstn) begin
// TODO(derekjchow): check if RVV spec specifies arch state on reset.
config_state_q.ma <= 0;
config_state_q.ta <= 0;
config_state_q.sew <= SEW8;
config_state_q.lmul <= LMUL1;
config_state_q.vl <= 16;
end else begin
// Update config state next cycle
config_state_q <= inst_config_state[N];
end
end
// Propagate outputs
logic [N-1:0] unaligned_cmd_valid;
RVVCmd [N-1:0] unaligned_cmd_data;
always_comb begin
for (int i = 0; i < N; i++) begin
unaligned_cmd_valid[i] = valid_inst_q[i] && !is_setvl[i];
// Combine instruction + arch state into command
`ifdef TB_SUPPORT
unaligned_cmd_data[i].insts_pc = inst_q[i].pc;
`endif
unaligned_cmd_data[i].opcode = inst_q[i].opcode;
unaligned_cmd_data[i].bits = inst_q[i].bits;
unaligned_cmd_data[i].arch_state = inst_config_state[i];
// TODO: Handle rs propagation for loads/stores
unaligned_cmd_data[i].rs1 =
inst_q[i].bits[7] ? reg_read_data_i[2*i] : 0;
// Write new value of vl into rd for configuration function.
reg_write_valid_o[i] = is_setvl[i];
reg_write_addr_o[i] = inst_q[i].bits[4:0];
reg_write_data_o[i] =
{{(`XLEN-`VL_WIDTH){1'b0}}, inst_config_state[i].vl};
end
end
// Align outputs
Aligner#(.T(RVVCmd), .N(N)) cmd_aligner(
.valid_in(unaligned_cmd_valid),
.data_in(unaligned_cmd_data),
.valid_out(cmd_valid_o),
.data_out(cmd_data_o)
);
// Assertions
`ifndef SYNTHESIS
logic [N-1:0] lsu_requires_rs1_read;
logic [N-1:0] non_lsu_requires_rs1_read;
logic [N-1:0] requires_rs1_read;
logic [N-1:0] lsu_requires_rs2_read;
logic [N-1:0] non_lsu_requires_rs2_read;
logic [N-1:0] requires_rs2_read;
always_comb begin
for (int i = 0; i < N; i++) begin
// All LSU instructions read from rs1
lsu_requires_rs1_read[i] = (inst_q[i].opcode != RVV);
// Non LSU rs1 check
non_lsu_requires_rs1_read[i] = (inst_q[i].opcode == RVV) &&
(inst_q[i].bits[7] && inst_q[i].bits[6:5] != 2'b11);
requires_rs1_read[i] =
lsu_requires_rs1_read[i] || non_lsu_requires_rs1_read[i];
// Only strided loads/stores (mop=0b10) read rs2
lsu_requires_rs2_read[i] = (inst_q[i].opcode != RVV) &&
(inst_q[i].bits[20:19] == 2'b10);
// vsetvl is only non LSU instruction that reads rs2
non_lsu_requires_rs2_read[i] = (inst_q[i].opcode == RVV) &&
(inst_q[i].bits[7:5] == 3'b111) &&
(inst_q[i].bits[24:18] == 7'b1000000);
requires_rs2_read[i] =
lsu_requires_rs2_read[i] || non_lsu_requires_rs2_read[i];
end
end
always @(posedge clk) begin
for (int i = 0; i < N; i++) begin
assert(!valid_inst_q[i] || !requires_rs1_read[i] ||
reg_read_valid_i[2*i]);
assert(!valid_inst_q[i] || !requires_rs2_read[i] ||
reg_read_valid_i[(2*i) + 1]);
end
end
`endif // not def SYNTHESIS
endmodule