[otbn] Add support for base load and store
Signed-off-by: Greg Chadwick <gac@lowrisc.org>
diff --git a/hw/ip/otbn/dv/smoke/smoke_expected.txt b/hw/ip/otbn/dv/smoke/smoke_expected.txt
index f1c24f3..cb07e49 100644
--- a/hw/ip/otbn/dv/smoke/smoke_expected.txt
+++ b/hw/ip/otbn/dv/smoke/smoke_expected.txt
@@ -17,8 +17,8 @@
14 | 0x4c000000
15 | 0x00000034
16 | 0xfffffff4
-17 | 0x00000000
-18 | 0x00000000
+17 | 0xfacefeed
+18 | 0xd0beb533
19 | 0x00000000
20 | 0x00000000
21 | 0x00000000
diff --git a/hw/ip/otbn/dv/smoke/smoke_test.S b/hw/ip/otbn/dv/smoke/smoke_test.S
index 994f382..f1db4f3 100644
--- a/hw/ip/otbn/dv/smoke/smoke_test.S
+++ b/hw/ip/otbn/dv/smoke/smoke_test.S
@@ -5,6 +5,8 @@
# OTBN Smoke test, runs various instructions which are expected to produce the
# final register state see in smoke_expected.txt
+.section .text
+
# x1 = 0xd0beb513
lui x1, 0xd0beb
add x1, x1, 0x513
@@ -54,4 +56,20 @@
# x16 = x1 >>> x2[4:0] = 0xfffffff4
sra x16, x1, x2
+# x17 = dmem[16] = 0xfacefeed
+lw x17, 16(x0)
+
+# dmem[4] = x8 = 0xd0beb533
+sw x8, 4(x0)
+
+# x18 = dmem[4] = 0xd0beb533
+lw x18, 4(x0)
+
ecall
+
+.section .data
+.word 0x1234abcd
+.word 0xbaadf00d
+.word 0xcafed00d
+.word 0xdeadbeef
+.word 0xfacefeed
diff --git a/hw/ip/otbn/lint/otbn.vlt b/hw/ip/otbn/lint/otbn.vlt
index 4965640..dabd103 100644
--- a/hw/ip/otbn/lint/otbn.vlt
+++ b/hw/ip/otbn/lint/otbn.vlt
@@ -8,3 +8,6 @@
// complete
lint_off -rule UNUSED -file "*/rtl/otbn_*"
+// Operator EQ expects 32 bits on the LHS, but LHS's VARREF 'addr' generates 3
+// bits.
+lint_off -rule WIDTH -file "*/rtl/otbn_lsu.sv" -match "*'addr' generates 3 bits*"
diff --git a/hw/ip/otbn/rtl/otbn_controller.sv b/hw/ip/otbn/rtl/otbn_controller.sv
index 0d42984..d9fd588 100644
--- a/hw/ip/otbn/rtl/otbn_controller.sv
+++ b/hw/ip/otbn/rtl/otbn_controller.sv
@@ -52,40 +52,90 @@
output alu_base_operation_t alu_base_operation_o,
output alu_base_comparison_t alu_base_comparison_o,
input logic [31:0] alu_base_operation_result_i,
- input logic alu_base_comparison_result_i
+ input logic alu_base_comparison_result_i,
+
+ output logic lsu_load_req_o,
+ output logic lsu_store_req_o,
+ output insn_subset_e lsu_req_subset_o,
+ output logic [DmemAddrWidth-1:0] lsu_addr_o,
+
+ output logic [31:0] lsu_base_wdata_o,
+ output logic [WLEN-1:0] lsu_bignum_wdata_o,
+
+ input logic [31:0] lsu_base_rdata_i,
+ input logic [WLEN-1:0] lsu_bignum_rdata_i,
+ input logic [1:0] lsu_rdata_err_i // Bit1: Uncorrectable, Bit0: Correctable
);
+ typedef enum logic [1:0] {
+ OtbnStateHalt,
+ OtbnStateRun,
+ OtbnStateStall
+ } otbn_state_e;
- logic running_q, running_d;
+ otbn_state_e state_q, state_d;
+ logic stall;
+ logic mem_stall;
+
+ // Stall a cycle on loads to allow load data writeback to happen the following cycle. Stall not
+ // required on stores as there is no response to deal with.
+ // TODO: Possibility of error response on store? Probably still don't need to stall in that case
+ // just ensure incoming store error stops anything else happening.
+ assign mem_stall = lsu_load_req_o;
+
+ assign stall = mem_stall;
assign done_o = insn_valid_i && insn_dec_ctrl_i.ecall_insn;
always_comb begin
- running_d = running_q;
+ state_d = state_q;
insn_fetch_req_valid_o = 1'b0;
insn_fetch_req_addr_o = start_addr_i;
- if (start_i) begin
- running_d = 1'b1;
- insn_fetch_req_addr_o = start_addr_i;
- insn_fetch_req_valid_o = 1'b01;
- end else if (running_q) begin
- insn_fetch_req_valid_o = 1'b1;
- insn_fetch_req_addr_o = insn_addr_i + 'd4;
- end
-
- if (done_o) begin
- running_d = 1'b0;
- insn_fetch_req_valid_o = 1'b0;
- end
+ // TODO: Harden state machine
// TODO: Jumps/branches
+ unique case (state_q)
+ OtbnStateHalt: begin
+ if (start_i) begin
+ state_d = OtbnStateRun;
+ insn_fetch_req_addr_o = start_addr_i;
+ insn_fetch_req_valid_o = 1'b01;
+ end
+ end
+ OtbnStateRun: begin
+ insn_fetch_req_valid_o = 1'b1;
+
+ if (done_o) begin
+ state_d = OtbnStateHalt;
+ insn_fetch_req_valid_o = 1'b0;
+ end else begin
+ // When stalling refetch the same instruction to keep decode inputs constant
+ if (stall) begin
+ state_d = OtbnStateStall;
+ insn_fetch_req_addr_o = insn_addr_i;
+ end else begin
+ insn_fetch_req_addr_o = insn_addr_i + 'd4;
+ end
+ end
+ end
+ OtbnStateStall: begin
+ // Only ever stall for a single cycle
+ // TODO: Any more than one cycle stall cases?
+ insn_fetch_req_valid_o = 1'b1;
+ insn_fetch_req_addr_o = insn_addr_i + 'd4;
+ state_d = OtbnStateRun;
+ end
+ default: ;
+ endcase
end
+ `ASSERT(ControllerStateValid, state_q inside {OtbnStateHalt, OtbnStateRun, OtbnStateStall});
+
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
- running_q <= 1'b0;
+ state_q <= OtbnStateHalt;
end else begin
- running_q <= running_d;
+ state_q <= state_d;
end
end
@@ -123,8 +173,28 @@
// Register file write MUX
// TODO: Switch between CSR/ALU/LSU writeback.
- assign rf_base_wr_en_o = insn_dec_ctrl_i.rf_we;
- assign rf_base_wr_addr_o = insn_dec_base_i.d;
- assign rf_base_wr_data_o = alu_base_operation_result_i;
+ assign rf_base_wr_en_o =
+ insn_dec_ctrl_i.rf_we | (insn_dec_ctrl_i.ld_insn & (state_q == OtbnStateStall));
+ assign rf_base_wr_addr_o = insn_dec_base_i.d;
+
+ always_comb begin
+ rf_base_wr_data_o = alu_base_operation_result_i;
+
+ unique case (1'b1)
+ insn_dec_ctrl_i.ld_insn: rf_base_wr_data_o = lsu_base_rdata_i;
+ default: ;
+ endcase
+ end
+
+ // TODO: Add error on unaligned/out of bounds
+ assign lsu_load_req_o = insn_valid_i & insn_dec_ctrl_i.ld_insn & (state_q == OtbnStateRun);
+ assign lsu_store_req_o = insn_valid_i & insn_dec_ctrl_i.st_insn & (state_q == OtbnStateRun);
+ assign lsu_req_subset_o = insn_dec_ctrl_i.subset;
+ // TODO: Switch between address from base/bignum
+ assign lsu_addr_o = alu_base_operation_result_i[DmemAddrWidth-1:0];
+ assign lsu_base_wdata_o = rf_base_rd_data_b_i;
+
+ // TODO: Bignum load/store
+ assign lsu_bignum_wdata_o = '0;
endmodule
diff --git a/hw/ip/otbn/rtl/otbn_core.sv b/hw/ip/otbn/rtl/otbn_core.sv
index b44ba6a..2641858 100644
--- a/hw/ip/otbn/rtl/otbn_core.sv
+++ b/hw/ip/otbn/rtl/otbn_core.sv
@@ -83,6 +83,18 @@
logic [31:0] alu_base_operation_result;
logic alu_base_comparison_result;
+ logic lsu_load_req;
+ logic lsu_store_req;
+ insn_subset_e lsu_req_subset;
+ logic [DmemAddrWidth-1:0] lsu_addr;
+
+ logic [31:0] lsu_base_wdata;
+ logic [WLEN-1:0] lsu_bignum_wdata;
+
+ logic [31:0] lsu_base_rdata;
+ logic [WLEN-1:0] lsu_bignum_rdata;
+ logic [1:0] lsu_rdata_err; // Bit1: Uncorrectable, Bit0: Correctable
+
// Depending on its usage, the instruction address (program counter) is qualified by two valid
// signals: insn_fetch_resp_valid (together with the undecoded instruction data), and insn_valid
// for valid decoded (i.e. legal) instructions. Duplicate the signal in the source code for
@@ -170,7 +182,19 @@
.alu_base_operation_o (alu_base_operation),
.alu_base_comparison_o (alu_base_comparison),
.alu_base_operation_result_i (alu_base_operation_result),
- .alu_base_comparison_result_i (alu_base_comparison_result)
+ .alu_base_comparison_result_i (alu_base_comparison_result),
+
+ .lsu_load_req_o (lsu_load_req),
+ .lsu_store_req_o (lsu_store_req),
+ .lsu_req_subset_o (lsu_req_subset),
+ .lsu_addr_o (lsu_addr),
+
+ .lsu_base_wdata_o (lsu_base_wdata),
+ .lsu_bignum_wdata_o (lsu_bignum_wdata),
+
+ .lsu_base_rdata_i (lsu_base_rdata),
+ .lsu_bignum_rdata_i (lsu_bignum_rdata),
+ .lsu_rdata_err_i (lsu_rdata_err)
);
// Load store unit: read and write data from data memory
@@ -186,10 +210,19 @@
.dmem_wmask_o,
.dmem_rdata_i,
.dmem_rvalid_i,
- .dmem_rerror_i
+ .dmem_rerror_i,
- // Data from base and bn execute blocks.
- // TODO: Add signals to controller
+ .lsu_load_req_i (lsu_load_req),
+ .lsu_store_req_i (lsu_store_req),
+ .lsu_req_subset_i (lsu_req_subset),
+ .lsu_addr_i (lsu_addr),
+
+ .lsu_base_wdata_i (lsu_base_wdata),
+ .lsu_bignum_wdata_i (lsu_bignum_wdata),
+
+ .lsu_base_rdata_o (lsu_base_rdata),
+ .lsu_bignum_rdata_o (lsu_bignum_rdata),
+ .lsu_rdata_err_o (lsu_rdata_err)
);
// Control and Status registers
diff --git a/hw/ip/otbn/rtl/otbn_decoder.sv b/hw/ip/otbn/rtl/otbn_decoder.sv
index 8cd4814..9956cd1 100644
--- a/hw/ip/otbn/rtl/otbn_decoder.sv
+++ b/hw/ip/otbn/rtl/otbn_decoder.sv
@@ -87,6 +87,8 @@
rf_wd_sel_e rf_wdata_sel;
logic ecall_insn;
+ logic ld_insn;
+ logic st_insn;
// Reduced main ALU immediate MUX for Operand B
logic [31:0] imm_b;
@@ -116,7 +118,9 @@
alu_op: alu_operator,
rf_we: rf_we,
rf_wdata_sel: rf_wdata_sel,
- ecall_insn: ecall_insn
+ ecall_insn: ecall_insn,
+ ld_insn: ld_insn,
+ st_insn: st_insn
};
/////////////
@@ -131,6 +135,8 @@
illegal_insn = 1'b0;
ecall_insn = 1'b0;
+ ld_insn = 1'b0;
+ st_insn = 1'b0;
opcode = insn_opcode_e'(insn[6:0]);
@@ -204,6 +210,34 @@
end
end
+ //////////////////
+ // Loads/Stores //
+ //////////////////
+
+ InsnOpcodeBaseLoad: begin
+ insn_subset = InsnSubsetBase;
+ rf_ren_a = 1'b1;
+ ld_insn = 1'b1;
+
+ if (insn[14:12] != 3'b010) begin
+ illegal_insn = 1'b1;
+ end
+ end
+
+ InsnOpcodeBaseStore: begin
+ insn_subset = InsnSubsetBase;
+ rf_ren_a = 1'b1;
+ rf_ren_b = 1'b1;
+ st_insn = 1'b1;
+
+ if (insn[14:12] != 3'b010) begin
+ illegal_insn = 1'b1;
+ end
+ end
+
+ /////////////
+ // Special //
+ /////////////
InsnOpcodeBaseSystem: begin
insn_subset = InsnSubsetBase;
@@ -319,6 +353,24 @@
end
end
+ //////////////////
+ // Loads/Stores //
+ //////////////////
+
+ InsnOpcodeBaseLoad: begin
+ alu_op_a_mux_sel = OpASelRegister;
+ alu_op_b_mux_sel = OpBSelImmediate;
+ alu_operator = AluOpAdd;
+ imm_b_mux_sel = ImmBI;
+ end
+
+ InsnOpcodeBaseStore: begin
+ alu_op_a_mux_sel = OpASelRegister;
+ alu_op_b_mux_sel = OpBSelImmediate;
+ alu_operator = AluOpAdd;
+ imm_b_mux_sel = ImmBS;
+ end
+
/////////////
// Special //
/////////////
diff --git a/hw/ip/otbn/rtl/otbn_lsu.sv b/hw/ip/otbn/rtl/otbn_lsu.sv
index d00fcd9..e7d2f81 100644
--- a/hw/ip/otbn/rtl/otbn_lsu.sv
+++ b/hw/ip/otbn/rtl/otbn_lsu.sv
@@ -32,15 +32,73 @@
output logic [WLEN-1:0] dmem_wmask_o,
input logic [WLEN-1:0] dmem_rdata_i,
input logic dmem_rvalid_i,
- input logic [1:0] dmem_rerror_i // Bit1: Uncorrectable, Bit0: Correctable
+ input logic [1:0] dmem_rerror_i, // Bit1: Uncorrectable, Bit0: Correctable
+ input logic lsu_load_req_i,
+ input logic lsu_store_req_i,
+ input insn_subset_e lsu_req_subset_i,
+ input logic [DmemAddrWidth-1:0] lsu_addr_i,
+
+ input logic [31:0] lsu_base_wdata_i,
+ input logic [WLEN-1:0] lsu_bignum_wdata_i,
+
+ output logic [31:0] lsu_base_rdata_o,
+ output logic [WLEN-1:0] lsu_bignum_rdata_o,
+ output logic [1:0] lsu_rdata_err_o // Bit1: Uncorrectable, Bit0: Correctable
);
+ localparam int BaseWordsPerWLen = WLEN / 32;
+ localparam int BaseWordAddrW = prim_util_pkg::vbits(WLEN/8);
- // tie-off to 0 to prevent warnings until implementation is done
- assign dmem_req_o = 1'b0;
- assign dmem_write_o = 1'b0;
- assign dmem_addr_o = '0;
- assign dmem_wdata_o = '0;
- assign dmem_wmask_o = '0;
+ // Produce a WLEN bit mask for 32-bit writes given the 32-bit word write address. This doesn't
+ // propagate X so a seperate assertion must be used to check the input isn't X when a valid output
+ // is desired.
+ function automatic logic [WLEN-1:0] mask_from_word_addr(logic [BaseWordAddrW-1:2] addr);
+ logic [WLEN-1:0] mask;
+
+ mask = '0;
+
+ // Use of logic == int comparison in this loop works as BaseWordsPerWLen is a constant, so the
+ // loop can be unrolled. Due to the use of '==' any X or Z in addr will result in an X result
+ // for the comparison (so mask will remain 0).
+ for (int i = 0; i < BaseWordsPerWLen; i++) begin
+ if (addr == i) begin
+ mask[i*32+:32] = 32'hFFFFFFFF;
+ end
+ end
+
+ return mask;
+ endfunction
+
+ assign dmem_req_o = lsu_load_req_i | lsu_store_req_i;
+ assign dmem_write_o = lsu_store_req_i;
+ assign dmem_addr_o = lsu_addr_i;
+
+ // For base 32-bit writes replicate write data across dmem_wdata. dmem_wmask will be set
+ // appropriately so only the target word is written.
+ assign dmem_wdata_o = lsu_req_subset_i == InsnSubsetBase ?
+ {BaseWordsPerWLen{lsu_base_wdata_i}} : lsu_bignum_wdata_i;
+
+ assign dmem_wmask_o = lsu_req_subset_i == InsnSubsetBase ?
+ mask_from_word_addr(lsu_addr_i[BaseWordAddrW-1:2]) : {WLEN{1'b1}};
+
+ // From the WLEN word read from DMem select out a 32-bit word for base instructions.
+ for (genvar i_bit = 0; i_bit < 32; i_bit++) begin : g_base_rdata
+ logic [BaseWordsPerWLen-1:0] bit_mux;
+
+ for (genvar j_word = 0; j_word < BaseWordsPerWLen; j_word++) begin : g_bit_mux
+ assign bit_mux[j_word] =
+ (lsu_addr_i[BaseWordAddrW-1:2] == j_word) & dmem_rdata_i[i_bit + j_word * 32];
+ end
+
+ assign lsu_base_rdata_o[i_bit] = |bit_mux;
+ end
+
+ // Data appears the cycle following the request, LSU assume lsu_addr_i is kept stable by the
+ // controller to mux out the required 32-bit word.
+ `ASSERT(LsuLoadAddrStable, lsu_load_req_i |=> $stable(lsu_addr_i));
+ `ASSERT_KNOWN_IF(LsuAddrKnown, lsu_addr_i, lsu_load_req_i | lsu_store_req_i);
+
+ assign lsu_bignum_rdata_o = dmem_rdata_i;
+ assign lsu_rdata_err_o = dmem_rerror_i;
endmodule
diff --git a/hw/ip/otbn/rtl/otbn_pkg.sv b/hw/ip/otbn/rtl/otbn_pkg.sv
index 96bded4..288b476 100644
--- a/hw/ip/otbn/rtl/otbn_pkg.sv
+++ b/hw/ip/otbn/rtl/otbn_pkg.sv
@@ -163,6 +163,8 @@
logic rf_we;
rf_wd_sel_e rf_wdata_sel;
logic ecall_insn;
+ logic ld_insn;
+ logic st_insn;
} insn_dec_ctrl_t;
typedef struct packed {