blob: 9819970654b413734ee3a08122575d59fb1acebf [file] [log] [blame]
// Copyright lowRISC contributors.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
`include "prim_assert.sv"
/**
* OTBN alu block for the bignum instruction subset
*
* This ALU supports all of the 'plain' arithmetic and logic bignum instructions, BN.MULQACC is
* implemented in a separate block.
*
* One barrel shifter and two adders (X and Y) are implemented along with the logic operators
* (AND,OR,XOR,NOT).
*
* The adders have 256-bit operands with a carry_in and optional invert on the second operand. This
* can be used to implement subtraction (a - b == a + ~b + 1). BN.SUBB/BN.ADDC are implemented by
* feeding in the carry flag as carry in rather than a fixed 0 or 1.
*
* The shifter takes a 512-bit input (to implement BN.RSHI, concatenate and right shift) and shifts
* right by up to 256-bits. The lower (256-bit) half of the input and output can be reversed to
* allow left shift implementation. There is no concatenate and left shift instruction so reversing
* isn't required over the full width.
*
* The dataflow between the adders and shifter is in the diagram below. This arrangement allows the
* implementation of the pseudo-mod (BN.ADDM/BN.SUBM) instructions in a single cycle whilst
* minimising the critical path. The pseudo-mod instructions do not have a shifted input so X can
* compute the initial add/sub and Y computes the pseudo-mod result. For all other add/sub
* operations Y computes the operation with one of the inputs supplied by the shifter and the other
* from operand_a.
*
* Both adder X and the shifter get supplied with operand_a and operand_b from the operation_i
* input. In addition the shifter gets a shift amount (shift_amt) and can use 0 instead of
* operand_a. The shifter concatenates operand_a (or 0) and operand_b together before shifting with
* operand_a in the upper (256-bit) half {operand_a/0, operand_b}. This allows the shifter to pass
* through operand_b simply by not performing a shift.
*
* A 0
* | |
* \-----/
* \---/
* A B | B shift_amt
* | | | | |
* +-----------+ +-----------+
* | Adder X | | Shifter |
* +-----------+ +-----------+
* | |
* |----+ +----|
* | | | |
* X result | | Shifter result
* | |
* | | +-----------+
* A | | +---| MOD WSR |
* | | | | +-----------+
* \-----/ \-----/
* \---/ \---/
* | |
* | |
* +-----------+
* | Adder Y |
* +-----------+
* |
* Y result
*/
module otbn_alu_bignum
import otbn_pkg::*;
(
input logic clk_i,
input logic rst_ni,
input alu_bignum_operation_t operation_i,
output logic [WLEN-1:0] operation_result_o,
output logic selection_flag_o,
input ispr_e ispr_addr_i,
input logic [31:0] ispr_base_wdata_i,
input logic [BaseWordsPerWLEN-1:0] ispr_base_wr_en_i,
input logic [WLEN-1:0] ispr_bignum_wdata_i,
input logic ispr_bignum_wr_en_i,
input logic ispr_init_i,
output logic [WLEN-1:0] ispr_rdata_o,
input logic [WLEN-1:0] ispr_acc_i,
output logic [WLEN-1:0] ispr_acc_wr_data_o,
output logic ispr_acc_wr_en_o,
input logic sec_wipe_mod_urnd_i,
input logic sec_wipe_zero_i,
input flags_t mac_operation_flags_i,
input flags_t mac_operation_flags_en_i,
input logic [WLEN-1:0] rnd_data_i,
input logic [WLEN-1:0] urnd_data_i,
input logic [1:0][SideloadKeyWidth-1:0] sideload_key_shares_i
);
///////////
// ISPRs //
///////////
flags_t flags_q [NFlagGroups];
flags_t flags_d [NFlagGroups];
logic [NFlagGroups*FlagsWidth-1:0] flags_flattened;
logic [NFlagGroups-1:0] flags_en;
logic [NFlagGroups-1:0] is_operation_flag_group;
flags_t selected_flags;
flags_t adder_update_flags;
logic adder_update_flags_en, adder_update_flags_en_raw;
flags_t logic_update_flags;
logic logic_update_flags_en, logic_update_flags_en_raw;
flags_t mac_update_flags;
logic mac_update_flags_en;
logic ispr_update_flags_en;
assign adder_update_flags_en = operation_i.alu_flag_en & adder_update_flags_en_raw;
assign logic_update_flags_en = operation_i.alu_flag_en & logic_update_flags_en_raw;
assign mac_update_flags_en = operation_i.mac_flag_en;
assign ispr_update_flags_en = ispr_base_wr_en_i[0] & (ispr_addr_i == IsprFlags);
`ASSERT(UpdateFlagsOnehot, $onehot0({ispr_init_i, adder_update_flags_en, logic_update_flags_en,
mac_update_flags_en, ispr_update_flags_en}))
assign selected_flags = flags_q[operation_i.flag_group];
assign mac_update_flags = (selected_flags & ~mac_operation_flags_en_i) |
(mac_operation_flags_i & mac_operation_flags_en_i);
for (genvar i_fg = 0; i_fg < NFlagGroups; i_fg++) begin : g_flag_groups
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
flags_q[i_fg] <= '{Z : 1'b0, L : 1'b0, M : 1'b0, C : 1'b0};
end else if (flags_en[i_fg]) begin
flags_q[i_fg] <= flags_d[i_fg];
end
end
assign is_operation_flag_group[i_fg] = operation_i.flag_group == i_fg;
assign flags_flattened[i_fg*FlagsWidth+:FlagsWidth] = flags_q[i_fg];
// Flag updates can come from the Y adder result, the logical operation result or from an ISPR
// write.
always_comb begin
flags_d[i_fg] = adder_update_flags;
unique case (1'b1)
ispr_init_i: flags_d[i_fg] = '0;
adder_update_flags_en: flags_d[i_fg] = adder_update_flags;
logic_update_flags_en: flags_d[i_fg] = logic_update_flags;
mac_update_flags_en: flags_d[i_fg] = mac_update_flags;
ispr_update_flags_en: flags_d[i_fg] = ispr_base_wdata_i[i_fg*FlagsWidth+:FlagsWidth];
sec_wipe_zero_i: flags_d[i_fg] = '0;
default: ;
endcase
end
assign flags_en[i_fg] = ispr_init_i | ispr_update_flags_en |
(adder_update_flags_en & is_operation_flag_group[i_fg]) |
(logic_update_flags_en & is_operation_flag_group[i_fg]) |
(mac_update_flags_en & is_operation_flag_group[i_fg]) |
sec_wipe_zero_i;
end
logic [WLEN-1:0] mod_q;
logic [WLEN-1:0] mod_d;
logic [BaseWordsPerWLEN-1:0] mod_wr_en;
for (genvar i_word = 0; i_word < BaseWordsPerWLEN; i_word++) begin : g_mod_words
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
mod_q[i_word*32+:32] <= '0;
end else if (mod_wr_en[i_word]) begin
mod_q[i_word*32+:32] <= mod_d[i_word*32+:32];
end
end
always_comb begin
unique case (1'b1)
sec_wipe_mod_urnd_i: mod_d[i_word*32+:32] = urnd_data_i[i_word*32+:32];
sec_wipe_zero_i: mod_d[i_word*32+:32] = 32'd0;
default: mod_d[i_word*32+:32] = ispr_bignum_wdata_i[i_word*32+:32];
endcase
`ASSERT(ModSecWipeSelOneHot, $onehot0({sec_wipe_mod_urnd_i, sec_wipe_zero_i}))
unique case (1'b1)
ispr_init_i: mod_d[i_word*32+:32] = '0;
ispr_base_wr_en_i[i_word]: mod_d[i_word*32+:32] = ispr_base_wdata_i;
default: ;
endcase
end
`ASSERT(ModWrSelOneHot, $onehot0({ispr_init_i, ispr_base_wr_en_i[i_word]}))
assign mod_wr_en[i_word] = ispr_init_i |
((ispr_addr_i == IsprMod) & (ispr_base_wr_en_i[i_word] | ispr_bignum_wr_en_i)) |
sec_wipe_mod_urnd_i | sec_wipe_zero_i;
end
assign ispr_acc_wr_en_o = ((ispr_addr_i == IsprAcc) & ispr_bignum_wr_en_i) | ispr_init_i;
assign ispr_acc_wr_data_o = ispr_init_i ? '0 : ispr_bignum_wdata_i;
always_comb begin
ispr_rdata_o = mod_q;
unique case (ispr_addr_i)
IsprMod: ispr_rdata_o = mod_q;
IsprRnd: ispr_rdata_o = rnd_data_i;
IsprUrnd: ispr_rdata_o = urnd_data_i;
IsprAcc: ispr_rdata_o = ispr_acc_i;
IsprFlags: ispr_rdata_o = {{(WLEN - (NFlagGroups * FlagsWidth)){1'b0}}, flags_flattened};
IsprKeyS0L: ispr_rdata_o = sideload_key_shares_i[0][255:0];
IsprKeyS0H: ispr_rdata_o = {{(WLEN - (SideloadKeyWidth - 256)){1'b0}},
sideload_key_shares_i[0][SideloadKeyWidth-1:256]};
IsprKeyS1L: ispr_rdata_o = sideload_key_shares_i[1][255:0];
IsprKeyS1H: ispr_rdata_o = {{(WLEN - (SideloadKeyWidth - 256)){1'b0}},
sideload_key_shares_i[1][SideloadKeyWidth-1:256]};
default: ;
endcase
end
/////////////
// Shifter //
/////////////
logic shift_right;
logic [WLEN-1:0] shifter_in_upper, shifter_in_lower, shifter_in_lower_reverse;
logic [WLEN*2-1:0] shifter_in;
logic [WLEN*2-1:0] shifter_out;
logic [WLEN-1:0] shifter_out_lower_reverse, shifter_res, unused_shifter_out_upper;
assign shifter_in_upper = operation_i.op == AluOpBignumRshi ? operation_i.operand_a : '0;
assign shifter_in_lower = operation_i.operand_b;
for (genvar i = 0; i < WLEN; i++) begin : g_shifter_in_lower_reverse
assign shifter_in_lower_reverse[i] = shifter_in_lower[WLEN-i-1];
end
assign shifter_in = {shifter_in_upper, shift_right ? shifter_in_lower : shifter_in_lower_reverse};
assign shifter_out = shifter_in >> operation_i.shift_amt;
for (genvar i = 0; i < WLEN; i++) begin : g_shifter_out_lower_reverse
assign shifter_out_lower_reverse[i] = shifter_out[WLEN-i-1];
end
assign shifter_res = shift_right ? shifter_out[WLEN-1:0] : shifter_out_lower_reverse;
// Only the lower WLEN bits of the shift result are returned.
assign unused_shifter_out_upper = shifter_out[WLEN*2-1:WLEN];
//////////////////
// Adders X & Y //
//////////////////
logic [WLEN:0] adder_x_op_a, adder_x_op_b;
logic adder_x_carry_in;
logic adder_x_op_b_invert;
logic [WLEN+1:0] adder_x_res;
logic [WLEN:0] adder_y_op_a, adder_y_op_b;
logic adder_y_carry_in;
logic adder_y_op_b_invert;
logic [WLEN+1:0] adder_y_res;
logic shift_mod_sel;
logic [WLEN-1:0] shift_mod_mux_out;
logic x_res_operand_a_sel;
logic [WLEN-1:0] x_res_operand_a_mux_out;
assign adder_x_op_a = {operation_i.operand_a, 1'b1};
assign adder_x_op_b = {adder_x_op_b_invert ? ~operation_i.operand_b : operation_i.operand_b,
adder_x_carry_in};
assign adder_x_res = adder_x_op_a + adder_x_op_b;
assign x_res_operand_a_mux_out = x_res_operand_a_sel ? adder_x_res[WLEN:1] :
operation_i.operand_a;
assign shift_mod_mux_out = shift_mod_sel ? shifter_res : mod_q;
assign adder_y_op_a = {x_res_operand_a_mux_out, 1'b1};
assign adder_y_op_b = {adder_y_op_b_invert ? ~shift_mod_mux_out : shift_mod_mux_out,
adder_y_carry_in};
assign adder_y_res = adder_y_op_a + adder_y_op_b;
assign adder_update_flags.C = (operation_i.op == AluOpBignumAdd ||
operation_i.op == AluOpBignumAddc) ? adder_y_res[WLEN+1] :
~adder_y_res[WLEN+1];
assign adder_update_flags.M = adder_y_res[WLEN];
assign adder_update_flags.L = adder_y_res[1];
assign adder_update_flags.Z = ~|adder_y_res[WLEN:1];
// The LSb of the adder results are unused.
logic unused_adder_x_res_lsb, unused_adder_y_res_lsb;
assign unused_adder_x_res_lsb = adder_x_res[0];
assign unused_adder_y_res_lsb = adder_y_res[0];
//////////////////////////////
// Shifter & Adders control //
//////////////////////////////
always_comb begin
shift_right = 1'b0;
adder_x_carry_in = 1'b0;
adder_x_op_b_invert = 1'b0;
x_res_operand_a_sel = 1'b0;
shift_mod_sel = 1'b0;
adder_y_carry_in = 1'b0;
adder_y_op_b_invert = 1'b0;
adder_update_flags_en_raw = 1'b0;
logic_update_flags_en_raw = 1'b0;
unique case (operation_i.op)
AluOpBignumAdd: begin
// Shifter computes B [>>|<<] shift_amt
// Y computes A + shifter_res
// X ignored
shift_right = operation_i.shift_right;
x_res_operand_a_sel = 1'b0;
shift_mod_sel = 1'b1;
adder_y_carry_in = 1'b0;
adder_y_op_b_invert = 1'b0;
adder_update_flags_en_raw = 1'b1;
end
AluOpBignumAddc: begin
// Shifter computes B [>>|<<] shift_amt
// Y computes A + shifter_res + flags.C
// X ignored
shift_right = operation_i.shift_right;
x_res_operand_a_sel = 1'b0;
shift_mod_sel = 1'b1;
adder_y_carry_in = selected_flags.C;
adder_y_op_b_invert = 1'b0;
adder_update_flags_en_raw = 1'b1;
end
AluOpBignumAddm: begin
// X computes A + B
// Y computes adder_x_res - mod = adder_x_res + ~mod + 1
// Shifter ignored
// Output mux chooses result based on top bit of X result (whether mod subtraction in
// Y should be applied or not)
adder_x_carry_in = 1'b0;
adder_x_op_b_invert = 1'b0;
x_res_operand_a_sel = 1'b1;
shift_mod_sel = 1'b0;
adder_y_carry_in = 1'b1;
adder_y_op_b_invert = 1'b1;
end
AluOpBignumSub: begin
// Shifter computes B [>>|<<] shift_amt
// Y computes A - shifter_res = A + ~shifter_res + 1
// X ignored
shift_right = operation_i.shift_right;
x_res_operand_a_sel = 1'b0;
shift_mod_sel = 1'b1;
adder_y_carry_in = 1'b1;
adder_y_op_b_invert = 1'b1;
adder_update_flags_en_raw = 1'b1;
end
AluOpBignumSubb: begin
// Shifter computes B [>>|<<] shift_amt
// Y computes A - shifter_res + ~flags.C = A + ~shifter_res + flags.C
// X ignored
shift_right = operation_i.shift_right;
x_res_operand_a_sel = 1'b0;
shift_mod_sel = 1'b1;
adder_y_carry_in = ~selected_flags.C;
adder_y_op_b_invert = 1'b1;
adder_update_flags_en_raw = 1'b1;
end
AluOpBignumSubm: begin
// X computes A - B = A + ~B + 1
// Y computes adder_x_res + mod
// Shifter ignored
// Output mux chooses result based on top bit of X result (whether subtraction in Y should
// be applied or not)
adder_x_carry_in = 1'b1;
adder_x_op_b_invert = 1'b1;
x_res_operand_a_sel = 1'b1;
shift_mod_sel = 1'b0;
adder_y_carry_in = 1'b0;
adder_y_op_b_invert = 1'b0;
end
AluOpBignumRshi: begin
// Shifter computes {A, B} >> shift_amt
// X, Y ignored
shift_right = 1'b1;
end
AluOpBignumXor,
AluOpBignumOr,
AluOpBignumAnd,
AluOpBignumNot: begin
// Shift computes one operand for the logical operation
// X & Y ignored
shift_right = operation_i.shift_right;
logic_update_flags_en_raw = 1'b1;
end
default: ;
endcase
end
////////////////////////
// Logical operations //
////////////////////////
logic [WLEN-1:0] logical_res;
always_comb begin
logical_res = ~operation_i.operand_a;
unique case (operation_i.op)
AluOpBignumXor: logical_res = operation_i.operand_a ^ shifter_res;
AluOpBignumOr: logical_res = operation_i.operand_a | shifter_res;
AluOpBignumAnd: logical_res = operation_i.operand_a & shifter_res;
AluOpBignumNot: logical_res = ~shifter_res;
default: ;
endcase
end
// Logical operations only update M, L and Z; C must remain at its old value.
assign logic_update_flags.C = selected_flags.C;
assign logic_update_flags.M = logical_res[WLEN-1];
assign logic_update_flags.L = logical_res[0];
assign logic_update_flags.Z = ~|logical_res;
/////////////////////////////////
// Conditional Select Flag Mux //
/////////////////////////////////
always_comb begin
unique case (operation_i.sel_flag)
FlagC: selection_flag_o = selected_flags.C;
FlagM: selection_flag_o = selected_flags.M;
FlagL: selection_flag_o = selected_flags.L;
// FlagZ case
default: selection_flag_o = selected_flags.Z;
endcase
end
////////////////////////
// Output multiplexer //
////////////////////////
always_comb begin
operation_result_o = adder_y_res[WLEN:1];
unique case(operation_i.op)
AluOpBignumAdd,
AluOpBignumAddc,
AluOpBignumSub,
AluOpBignumSubb: begin
operation_result_o = adder_y_res[WLEN:1];
end
// For pseudo-mod operations the result depends upon initial a + b / a - b result that is
// computed in X. Operation to add/subtract mod (X + mod / X - mod) is computed in Y.
// Subtraction is computed using in the X & Y adders as a - b == a + ~b + 1. Note that for
// a - b the top bit of the result will be set if a - b >= 0 and otherwise clear.
// BN.ADDM - X = a + b, Y = X - mod, subtract mod if a + b >= mod
// * If X generates carry a + b > mod (as mod is 256-bit) - Select Y result
// * If Y generates carry X - mod == (a + b) - mod >= 0 hence a + b >= mod, note this is only
// valid if X does not generate carry - Select Y result
// * If neither happen a + b < mod - Select X result
AluOpBignumAddm: begin
if (adder_x_res[WLEN+1] || adder_y_res[WLEN+1]) begin
operation_result_o = adder_y_res[WLEN:1];
end else begin
operation_result_o = adder_x_res[WLEN:1];
end
end
// BN.SUBM - X = a - b, Y = X + mod, add mod if a - b < 0
// * If X generates carry a - b >= 0 - Select X result
// * Otherwise select Y result
AluOpBignumSubm: begin
if (adder_x_res[WLEN+1]) begin
operation_result_o = adder_x_res[WLEN:1];
end else begin
operation_result_o = adder_y_res[WLEN:1];
end
end
AluOpBignumRshi: begin
operation_result_o = shifter_res[WLEN-1:0];
end
AluOpBignumXor,
AluOpBignumOr,
AluOpBignumAnd,
AluOpBignumNot: begin
operation_result_o = logical_res;
end
default: ;
endcase
end
endmodule