[otbn] Add bignum RF & ALU along with CSR & WSR
Adds all bignum side arithemetic and logic instructions (no MULQACC,
this will be a seperate unit) as well as CSR and WSR access (included as
they live in the bignum ALU).
Signed-off-by: Greg Chadwick <gac@lowrisc.org>
diff --git a/hw/ip/otbn/rtl/otbn_alu_bignum.sv b/hw/ip/otbn/rtl/otbn_alu_bignum.sv
new file mode 100644
index 0000000..c38a19aa
--- /dev/null
+++ b/hw/ip/otbn/rtl/otbn_alu_bignum.sv
@@ -0,0 +1,392 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+`include "prim_assert.sv"
+
+/**
+ * OTBN alu block for the bignum instruction subset
+ *
+ * This ALU supports all of the 'plain' arithmetic and logic bignum instructions, BN.MULQACC is
+ * implemented in a seperate block.
+ *
+ * One barrel shifter and two adders (X and Y) are implemented along with the logic operators
+ * (AND,OR,XOR,NOT).
+ *
+ * The adders have 256-bit operands with a carry_in and optional invert on the second operand. This
+ * can be used to implement subtraction (a - b == a + ~b + 1). BN.SUBB/BN.ADDC are implemented by
+ * feeding in the carry flag as carry in rather than a fixed 0 or 1.
+ *
+ * The shifter takes a 512-bit input (to implement BN.RSHI, concatenate and right shift) and shifts
+ * right by up to 256-bits. The lower (256-bit) half of the input and output can be reversed to
+ * allow left shift implementation. There is no concatenate and left shift instruction so reversing
+ * isn't required over the full width.
+ *
+ * The dataflow between the adders and shifter is in the diagram below. This arrangement allows the
+ * implementation of the pseudo-mod (BN.ADDM/BN.SUBM) instructions in a single cycle whilst
+ * minimising the critical path. The pseudo-mod instructions do not have a shifted input so X can
+ * compute the initial add/sub and Y computes the pseudo-mod result. For all other add/sub
+ * operations Y computes the operation with one of the inputs supplied by the shifter and the other
+ * from operand_a.
+ *
+ * Both adder X and the shifter get supplied with operand_a and operand_b from the operation_i
+ * input. In addition the shifter gets a shift amount (shift_amt) and can use 0 instead of
+ * operand_a. The shifter concatenates operand_a (or 0) and operand_b together before shifting with
+ * operand_a in the upper (256-bit) half {operand_a/0, operand_b}. This allows the shifter to pass
+ * through operand_b simply by not performing a shift.
+ *
+ * A 0
+ * | |
+ * \-----/
+ * \---/
+ * A B | B shift_amt
+ * | | | | |
+ * +-----------+ +-----------+
+ * | Adder X | | Shifter |
+ * +-----------+ +-----------+
+ * | |
+ * |----+ +----|
+ * | | | |
+ * X result | | Shifter result
+ * | |
+ * | | +-----------+
+ * A | | +---| MOD WSR |
+ * | | | | +-----------+
+ * \-----/ \-----/
+ * \---/ \---/
+ * | |
+ * | |
+ * +-----------+
+ * | Adder Y |
+ * +-----------+
+ * |
+ * Y result
+ */
+
+
+module otbn_alu_bignum
+ import otbn_pkg::*;
+(
+ input logic clk_i,
+ input logic rst_ni,
+
+ input alu_bignum_operation_t operation_i,
+ output logic [WLEN-1:0] operation_result_o,
+
+ input ispr_e ispr_addr_i,
+ input logic [31:0] ispr_base_wdata_i,
+ input logic [BaseWordsPerWLEN-1:0] ispr_base_wr_en_i,
+ input logic [WLEN-1:0] ispr_bignum_wdata_i,
+ input logic ispr_bignum_wr_en_i,
+ output logic [WLEN-1:0] ispr_rdata_o,
+
+ input logic [WLEN-1:0] rnd_i
+);
+ ///////////
+ // ISPRs //
+ ///////////
+
+ flags_t flags_q [NFlagGroups];
+ flags_t flags_d [NFlagGroups];
+ logic [NFlagGroups*FlagsWidth-1:0] flags_flattened;
+ logic [NFlagGroups-1:0] flags_en;
+ logic [NFlagGroups-1:0] is_operation_flag_group;
+ flags_t selected_flags;
+ flags_t update_flags;
+ logic update_flags_en;
+
+ for (genvar i_fg = 0; i_fg < NFlagGroups; i_fg++) begin : g_flag_groups
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ flags_q[i_fg] <= '{Z : 1'b0, M : 1'b0, L : 1'b0, C : 1'b0};
+ end else if (flags_en[i_fg]) begin
+ flags_q[i_fg] <= flags_d[i_fg];
+ end
+ end
+
+ assign is_operation_flag_group[i_fg] = operation_i.flag_group == i_fg;
+
+ assign flags_d[i_fg] = update_flags_en & is_operation_flag_group[i_fg] ?
+ update_flags : ispr_base_wdata_i[i_fg * FlagsWidth +: FlagsWidth];
+
+ assign flags_en[i_fg] = (update_flags_en & is_operation_flag_group[i_fg]) |
+ (ispr_base_wr_en_i[0] & (ispr_addr_i == IsprFlags));
+
+ assign flags_flattened[i_fg * FlagsWidth +: FlagsWidth] = flags_q[i_fg];
+ end
+
+ assign selected_flags = flags_q[operation_i.flag_group];
+
+ logic [WLEN-1:0] mod_q;
+ logic [WLEN-1:0] mod_d;
+ logic [BaseWordsPerWLEN-1:0] mod_wr_en;
+
+ for (genvar i_word = 0; i_word < BaseWordsPerWLEN; i_word++) begin : g_mod_words
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ mod_q[i_word*32+:32] <= '0;
+ end else if (mod_wr_en[i_word]) begin
+ mod_q[i_word*32+:32] <= mod_d[i_word*32+:32];
+ end
+ end
+
+ assign mod_d[i_word*32+:32] = ispr_base_wr_en_i[i_word] ? ispr_base_wdata_i :
+ ispr_bignum_wdata_i[i_word*32+:32];
+
+ assign mod_wr_en[i_word] = (ispr_addr_i == IsprMod) & (ispr_base_wr_en_i[i_word] |
+ ispr_bignum_wr_en_i);
+ end
+
+ always_comb begin
+ ispr_rdata_o = mod_q;
+
+ unique case (ispr_addr_i)
+ IsprMod: ispr_rdata_o = mod_q;
+ IsprRnd: ispr_rdata_o = rnd_i;
+ IsprAcc: ispr_rdata_o = 256'h0;
+ IsprFlags: ispr_rdata_o = {{(WLEN - (NFlagGroups * FlagsWidth)){1'b0}}, flags_flattened};
+ default: ;
+ endcase
+ end
+
+ /////////////
+ // Shifter //
+ /////////////
+
+ logic shift_right;
+ logic [WLEN-1:0] shifter_in_upper, shifter_in_lower, shifter_in_lower_reverse;
+ logic [WLEN*2-1:0] shifter_in;
+ logic [WLEN*2-1:0] shifter_out;
+ logic [WLEN-1:0] shifter_out_lower_reverse, shifter_res;
+
+ assign shifter_in_upper = operation_i.op == AluOpBignumRshi ? operation_i.operand_a : '0;
+ assign shifter_in_lower = operation_i.operand_b;
+
+ for (genvar i = 0; i < WLEN; i++) begin : g_shifter_in_lower_reverse
+ assign shifter_in_lower_reverse[i] = shifter_in_lower[WLEN - i - 1];
+ end
+
+ assign shifter_in = {shifter_in_upper, shift_right ? shifter_in_lower :
+ shifter_in_lower_reverse};
+
+ assign shifter_out = shifter_in >> operation_i.shift_amt;
+
+ for (genvar i = 0; i < WLEN; i++) begin : g_shifter_out_lower_reverse
+ assign shifter_out_lower_reverse[i] = shifter_out[WLEN - i - 1];
+ end
+
+ assign shifter_res = shift_right ? shifter_out[WLEN-1:0] : shifter_out_lower_reverse;
+
+ //////////////////
+ // Adders X & Y //
+ //////////////////
+
+ logic [WLEN:0] adder_x_op_a, adder_x_op_b;
+ logic adder_x_carry_in;
+ logic adder_x_op_b_invert;
+ logic [WLEN+1:0] adder_x_res;
+
+ logic [WLEN:0] adder_y_op_a, adder_y_op_b;
+ logic adder_y_carry_in;
+ logic adder_y_op_b_invert;
+ logic [WLEN+1:0] adder_y_res;
+
+ logic shift_mod_sel;
+ logic [WLEN-1:0] shift_mod_mux_out;
+ logic x_res_operand_a_sel;
+ logic [WLEN-1:0] x_res_operand_a_mux_out;
+
+ assign adder_x_op_a = {operation_i.operand_a, 1'b1};
+ assign adder_x_op_b = {adder_x_op_b_invert ? ~operation_i.operand_b : operation_i.operand_b,
+ adder_x_carry_in};
+
+ assign adder_x_res = adder_x_op_a + adder_x_op_b;
+
+ assign x_res_operand_a_mux_out = x_res_operand_a_sel ? adder_x_res[WLEN:1] : operation_i.operand_a;
+ assign shift_mod_mux_out = shift_mod_sel ? shifter_res : mod_q;
+
+ assign adder_y_op_a = {x_res_operand_a_mux_out, 1'b1};
+ assign adder_y_op_b = {adder_y_op_b_invert ? ~shift_mod_mux_out : shift_mod_mux_out,
+ adder_y_carry_in};
+
+ assign adder_y_res = adder_y_op_a + adder_y_op_b;
+
+ assign update_flags.C = (operation_i.op == AluOpBignumAdd ||
+ operation_i.op == AluOpBignumAddc) ? adder_y_res[WLEN+1] :
+ ~adder_y_res[WLEN+1];
+ assign update_flags.M = adder_y_res[WLEN];
+ assign update_flags.L = adder_y_res[1];
+ assign update_flags.Z = ~|adder_y_res[WLEN:1];
+
+ //////////////////////////////
+ // Shifter & Adders control //
+ //////////////////////////////
+
+ always_comb begin
+ shift_right = 1'b0;
+ adder_x_carry_in = 1'b0;
+ adder_x_op_b_invert = 1'b0;
+ x_res_operand_a_sel = 1'b0;
+ shift_mod_sel = 1'b0;
+ adder_y_carry_in = 1'b0;
+ adder_y_op_b_invert = 1'b0;
+ update_flags_en = 1'b0;
+
+ unique case (operation_i.op)
+ AluOpBignumAdd: begin
+ // Shifter computes B [>>|<<] shift_amt
+ // Y computes A + shifter_res
+ // X ignored
+ shift_right = operation_i.shift_right;
+ x_res_operand_a_sel = 1'b0;
+ shift_mod_sel = 1'b1;
+ adder_y_carry_in = 1'b0;
+ adder_y_op_b_invert = 1'b0;
+ update_flags_en = 1'b1;
+ end
+ AluOpBignumAddc: begin
+ // Shifter computes B [>>|<<] shift_amt
+ // Y computes A + shifter_res + flags.C
+ // X ignored
+ shift_right = operation_i.shift_right;
+ x_res_operand_a_sel = 1'b0;
+ shift_mod_sel = 1'b1;
+ adder_y_carry_in = selected_flags.C;
+ adder_y_op_b_invert = 1'b0;
+ update_flags_en = 1'b1;
+ end
+ AluOpBignumAddm: begin
+ // X computes A + B
+ // Y computes adder_x_res - mod = adder_x_res + ~mod + 1
+ // Shifter ignored
+ // Output mux chooses result based on top bit of X result (whether mod subtraction in
+ // Y should be applied or not)
+ adder_x_carry_in = 1'b0;
+ adder_x_op_b_invert = 1'b0;
+ x_res_operand_a_sel = 1'b1;
+ shift_mod_sel = 1'b0;
+ adder_y_carry_in = 1'b1;
+ adder_y_op_b_invert = 1'b1;
+ end
+ AluOpBignumSub: begin
+ // Shifter computes B [>>|<<] shift_amt
+ // Y computes A - shifter_res = A + ~shifter_res + 1
+ // X ignored
+ shift_right = operation_i.shift_right;
+ x_res_operand_a_sel = 1'b0;
+ shift_mod_sel = 1'b1;
+ adder_y_carry_in = 1'b1;
+ adder_y_op_b_invert = 1'b1;
+ update_flags_en = 1'b1;
+ end
+ AluOpBignumSubb: begin
+ // Shifter computes B [>>|<<] shift_amt
+ // Y computes A - shifter_res + ~flags.C = A + ~shifter_res + flags.C
+ // X ignored
+ shift_right = operation_i.shift_right;
+ x_res_operand_a_sel = 1'b0;
+ shift_mod_sel = 1'b1;
+ adder_y_carry_in = ~selected_flags.C;
+ adder_y_op_b_invert = 1'b1;
+ update_flags_en = 1'b1;
+ end
+ AluOpBignumSubm: begin
+ // X computes A - B = A + ~B + 1
+ // Y computes adder_x_res + mod
+ // Shifter ignored
+ // Output mux chooses result based on top bit of X result (whether subtraction in Y should
+ // be applied or not)
+ adder_x_carry_in = 1'b1;
+ adder_x_op_b_invert = 1'b1;
+ x_res_operand_a_sel = 1'b1;
+ shift_mod_sel = 1'b0;
+ adder_y_carry_in = 1'b0;
+ adder_y_op_b_invert = 1'b0;
+ end
+ AluOpBignumRshi: begin
+ // Shifter computes {A, B} >> shift_amt
+ // X, Y ignored
+ shift_right = 1'b1;
+ end
+ AluOpBignumXor,
+ AluOpBignumOr,
+ AluOpBignumAnd,
+ AluOpBignumNot: begin
+ // Shift computes one operand for the logical operation
+ // X & Y ignored
+ shift_right = operation_i.shift_right;
+ end
+ default: ;
+ endcase
+ end
+
+ ////////////////////////
+ // Logical operations //
+ ////////////////////////
+
+ logic [WLEN-1:0] logical_res;
+
+ always_comb begin
+ logical_res = ~operation_i.operand_a;
+
+ unique case (operation_i.op)
+ AluOpBignumXor: logical_res = operation_i.operand_a ^ shifter_res;
+ AluOpBignumOr: logical_res = operation_i.operand_a | shifter_res;
+ AluOpBignumAnd: logical_res = operation_i.operand_a & shifter_res;
+ AluOpBignumNot: logical_res = ~shifter_res;
+ default:;
+ endcase
+ end
+
+ ////////////////////////
+ // Output multiplexer //
+ ////////////////////////
+
+ always_comb begin
+ operation_result_o = adder_y_res[WLEN:1];
+
+ unique case(operation_i.op)
+ AluOpBignumAdd,
+ AluOpBignumAddc,
+ AluOpBignumSub,
+ AluOpBignumSubb: operation_result_o = adder_y_res[WLEN:1];
+
+ // For pseudo-mod operations the result depends upon initial a + b / a - b result that is
+ // computed in X. Operation to add/subtract mod (X + mod / X - mod) is computed in Y.
+ // Subtraction is computed using in the X & Y adders as a - b == a + ~b + 1. Note that for
+ // a - b the top bit of the result will be set if a - b >= 0 and otherwise clear.
+
+ // BN.ADDM - X = a + b, Y = X - mod, subtract mod if a + b >= mod
+ // * If X generates carry a + b > mod (as mod is 256-bit) - Select Y result
+ // * If Y generates carry X - mod == (a + b) - mod >= 0 hence a + b >= mod, note this is only valid if
+ // X does not generate carry - Select Y result
+ // * If neither happen a + b < mod - Select X result
+ AluOpBignumAddm: begin
+ if (adder_x_res[WLEN+1] || adder_y_res[WLEN+1]) begin
+ operation_result_o = adder_y_res[WLEN:1];
+ end else begin
+ operation_result_o = adder_x_res[WLEN:1];
+ end
+ end
+
+ // BN.SUBM - X = a - b, Y = X + mod, add mod if a - b < 0
+ // * If X generates carry a - b >= 0 - Select X result
+ // * Otherwise select Y result
+ AluOpBignumSubm: begin
+ if (adder_x_res[WLEN+1]) begin
+ operation_result_o = adder_x_res[WLEN:1];
+ end else begin
+ operation_result_o = adder_y_res[WLEN:1];
+ end
+ end
+
+ AluOpBignumRshi: operation_result_o = shifter_res[WLEN-1:0];
+
+ AluOpBignumXor,
+ AluOpBignumOr,
+ AluOpBignumAnd,
+ AluOpBignumNot: operation_result_o = logical_res;
+ default: ;
+ endcase
+ end
+endmodule