[otbn] Allow mulqacc to set flags
The bn.mulqacc.wo and bn.mulqacc.so instructions now set M, L and Z
flags in a specified flag group. We needed a little bit of shuffling
in the encoding to make this fit.
The changes are as follows:
- There's now a flag group field at bit 31 (the same as other
instructions that take a flag group)
- The "wb" field, which chooses between the three writeback modes,
has moved from bits 31-30 to bits 30-29 and has been split into two
parts: "so" and "wb0". "so" is just set for bn.mulqacc.so (just like
the top bit of "wb"). If "so" is not set, then "wb0" selects
between bn.mulqacc and bn.mulqacc.wo. If "so" is set, "wb0" selects
the destination halfword.
This commit includes all ISS, RTL and specification changes required to
implement the above.
Fixes #2979
Signed-off-by: Rupert Swarbrick <rswarbrick@lowrisc.org>
Signed-off-by: Greg Chadwick <gac@lowrisc.org>
diff --git a/hw/ip/otbn/data/bignum-insns.yml b/hw/ip/otbn/data/bignum-insns.yml
index 3cb5841..a855473 100644
--- a/hw/ip/otbn/data/bignum-insns.yml
+++ b/hw/ip/otbn/data/bignum-insns.yml
@@ -208,11 +208,10 @@
[<zero_acc>] <wrs1>.<wrs1_qwsel>, <wrs2>.<wrs2_qwsel>, <acc_shift_imm>
glued-ops: true
doc: |
- Multiplies two `WLEN/4` WDR values, shifts the product by `acc_shift_imm` bit, and adds the result to the accumulator.
+ Multiplies two `WLEN/4` WDR values, shifts the product by `acc_shift_imm` bits, and adds the result to the accumulator.
For versions of the instruction with writeback, see `BN.MULQACC.WO` and `BN.MULQACC.SO`.
decode: |
- writeback_variant = None
zero_accumulator = DecodeMulqaccZeroacc(zero_acc)
d = None
@@ -222,7 +221,7 @@
d_hwsel = None
a_qwsel = DecodeQuarterWordSelect(wrs1_qwsel)
b_qwsel = DecodeQuarterWordSelect(wrs2_qwsel)
- operation: &mulqacc-operation |
+ operation: |
a_qw = GetQuarterWord(a, a_qwsel)
b_qw = GetQuarterWord(b, b_qwsel)
@@ -232,26 +231,17 @@
ACC = 0
ACC = ACC + (mul_res << acc_shift_imm)
-
- if writeback_variant == 'shiftout':
- if d_hwsel == 'L':
- WDR[d][WLEN/2-1:0] = ACC[WLEN/2-1:0]
- elif d_hwsel == 'U':
- WDR[d][WLEN-1:WLEN/2] = ACC[WLEN/2-1:0]
- ACC = ACC >> (WLEN/2)
-
- elif writeback_variant == 'writeout':
- WDR[d] = ACC
encoding:
scheme: bnaq
mapping:
- wb: b00
- dh: bx
+ fg: bx
+ so: b0
+ wb0: b0
qs2: wrs2_qwsel
qs1: wrs1_qwsel
wrs2: wrs2
wrs1: wrs1
- acc: acc_shift_imm
+ shift: acc_shift_imm
z: zero_acc
wrd: bxxxxx
@@ -267,14 +257,15 @@
- *mulqacc-wrs2
- *mulqacc-wrs2-qwsel
- *mulqacc-acc-shift-imm
+ - *bn-flag-group-operand
syntax: |
- [<zero_acc>] <wrd>, <wrs1>.<wrs1_qwsel>, <wrs2>.<wrs2_qwsel>, <acc_shift_imm>
+ [<zero_acc>] <wrd>, <wrs1>.<wrs1_qwsel>, <wrs2>.<wrs2_qwsel>, <acc_shift_imm>[, FG<flag_group>]
glued-ops: true
doc: |
- Multiplies two `WLEN/4` WDR values, shifts the product by `acc_shift_imm` bit, and adds the result to the accumulator.
+ Multiplies two `WLEN/4` WDR values, shifts the product by `acc_shift_imm` bits, and adds the result to the accumulator.
Writes the resulting accumulator to `wrd`.
+ Updates the M, L and Z flags of `flag_group`.
decode: |
- writeback_variant = 'writeout'
zero_accumulator = DecodeMulqaccZeroacc(zero_acc)
d = UInt(wrd)
@@ -284,17 +275,34 @@
d_hwsel = None
a_qwsel = DecodeQuarterWordSelect(wrs1_qwsel)
b_qwsel = DecodeQuarterWordSelect(wrs2_qwsel)
- operation: *mulqacc-operation
+
+ fg = DecodeFlagGroup(flag_group)
+ operation: |
+ a_qw = GetQuarterWord(a, a_qwsel)
+ b_qw = GetQuarterWord(b, b_qwsel)
+
+ mul_res = a_qw * b_qw
+
+ if zero_accumulator:
+ ACC = 0
+
+ ACC = ACC + (mul_res << acc_shift_imm)
+
+ WDR[d] = ACC
+ FLAGS[fg].M = ACC[WLEN-1]
+ FLAGS[fg].L = ACC[0]
+ FLAGS[fg].Z = (ACC == 0)
encoding:
scheme: bnaq
mapping:
- wb: b01
- dh: bx
+ fg: flag_group
+ so: b0
+ wb0: b1
qs2: wrs2_qwsel
qs1: wrs1_qwsel
wrs2: wrs2
wrs1: wrs1
- acc: acc_shift_imm
+ shift: acc_shift_imm
z: zero_acc
wrd: wrd
@@ -313,17 +321,25 @@
- *mulqacc-wrs2
- *mulqacc-wrs2-qwsel
- *mulqacc-acc-shift-imm
+ - *bn-flag-group-operand
syntax: |
[<zero_acc>] <wrd>.<wrd_hwsel>,
- <wrs1>.<wrs1_qwsel>, <wrs2>.<wrs2_qwsel>, <acc_shift_imm>
+ <wrs1>.<wrs1_qwsel>, <wrs2>.<wrs2_qwsel>, <acc_shift_imm>[, FG<flag_group>]
glued-ops: true
doc: |
- Multiplies two `WLEN/4` WDR values, shifts the product by `<acc_shift_imm>` and adds the result to the accumulator.
- Next, shifts the resulting accumulator right by half a word.
- The bits that are shifted out are written to a half-word of `<wrd>`, selected with `<wrd_hwsel>`.
+ Multiplies two `WLEN/4` WDR values, shifts the product by `acc_shift_imm` bits and adds the result to the accumulator.
+ Next, shifts the resulting accumulator right by half a word (128 bits).
+ The bits that are shifted out are written to a half-word of `wrd`, selected with `wrd_hwsel`.
+ This instruction never changes the `C` flag.
+ If `wrd_hwsel` is zero (so the instruction is updating the lower half-word of `wrd`), it updates the `L` and `Z` flags and leaves `M` unchanged.
+ The `L` flag is set iff the bottom bit of the shifted-out result is zero.
+ The `Z` flag is set iff the shifted-out result is zero.
+
+ If `wrd_hwsel` is one (so the instruction is updating the upper half-word of `wrd`), it updates the `M` and `Z` flags and leaves `L` unchanged.
+ The `M` flag is set iff the top bit of the shifted-out result is zero.
+ The `Z` flag is left unchanged if the shifted-out result is zero and cleared if not.
decode: |
- writeback_variant = 'shiftout'
zero_accumulator = DecodeMulqaccZeroacc(zero_acc)
d = UInt(wrd)
@@ -333,17 +349,42 @@
d_hwsel = DecodeHalfWordSelect(wrd_hwsel)
a_qwsel = DecodeQuarterWordSelect(wrs1_qwsel)
b_qwsel = DecodeQuarterWordSelect(wrs2_qwsel)
- operation: *mulqacc-operation
+
+ fg = DecodeFlagGroup(flag_group)
+ operation: |
+ a_qw = GetQuarterWord(a, a_qwsel)
+ b_qw = GetQuarterWord(b, b_qwsel)
+
+ mul_res = a_qw * b_qw
+
+ if zero_accumulator:
+ ACC = 0
+
+ ACC = ACC + (mul_res << acc_shift_imm)
+
+ shifted = ACC[WLEN/2-1:0]
+ ACC = ACC >> (WLEN/2)
+
+ if d_hwsel == 'L':
+ WDR[d][WLEN/2-1:0] = shifted
+ FLAGS[fg].L = shifted[0]
+ FLAGS[fg].Z = (shifted == 0)
+ elif d_hwsel == 'U':
+ WDR[d][WLEN-1:WLEN/2] = shifted
+ FLAGS[fg].M = shifted[WLEN/2-1]
+ if (shifted != 0):
+ FLAGS[fg].Z = 0
encoding:
scheme: bnaq
mapping:
- wb: b1x
- dh: wrd_hwsel
+ fg: flag_group
+ so: b1
+ wb0: wrd_hwsel
qs2: wrs2_qwsel
qs1: wrs1_qwsel
wrs2: wrs2
wrs1: wrs1
- acc: acc_shift_imm
+ shift: acc_shift_imm
z: zero_acc
wrd: wrd
diff --git a/hw/ip/otbn/data/enc-schemes.yml b/hw/ip/otbn/data/enc-schemes.yml
index 34f8ae3..0df2a01 100644
--- a/hw/ip/otbn/data/enc-schemes.yml
+++ b/hw/ip/otbn/data/enc-schemes.yml
@@ -269,12 +269,13 @@
parents:
- custom2
- wdr3
+ - fg
fields:
- wb: 31-30
- dh: 29
+ so: 30
+ wb0: 29
qs2: 28-27
qs1: 26-25
- acc: 14-13
+ shift: 14-13
z: 12
# Unusual scheme used for bn.rshi (the immediate bleeds into the usual funct3
diff --git a/hw/ip/otbn/dv/otbnsim/sim/flags.py b/hw/ip/otbn/dv/otbnsim/sim/flags.py
index e07dd33..c195e23 100644
--- a/hw/ip/otbn/dv/otbnsim/sim/flags.py
+++ b/hw/ip/otbn/dv/otbnsim/sim/flags.py
@@ -22,8 +22,8 @@
def __init__(self, C: bool, M: bool, L: bool, Z: bool):
self.C = C
- self.L = L
self.M = M
+ self.L = L
self.Z = Z
self._new_val = None # type: Optional['FlagReg']
diff --git a/hw/ip/otbn/dv/otbnsim/sim/insn.py b/hw/ip/otbn/dv/otbnsim/sim/insn.py
index c36791d..836bd4b 100644
--- a/hw/ip/otbn/dv/otbnsim/sim/insn.py
+++ b/hw/ip/otbn/dv/otbnsim/sim/insn.py
@@ -4,9 +4,10 @@
from typing import Dict
-from .state import OTBNState
+from .flags import FlagReg
from .isa import (OTBNInsn, RV32RegReg, RV32RegImm, RV32ImmShift,
insn_for_mnemonic, logical_byte_shift)
+from .state import OTBNState
class ADD(RV32RegReg):
@@ -452,7 +453,7 @@
class BNMULQACCWO(OTBNInsn):
- insn = insn_for_mnemonic('bn.mulqacc.wo', 7)
+ insn = insn_for_mnemonic('bn.mulqacc.wo', 8)
def __init__(self, op_vals: Dict[str, int]):
super().__init__(op_vals)
@@ -463,6 +464,7 @@
self.wrs2 = op_vals['wrs2']
self.wrs2_qwsel = op_vals['wrs2_qwsel']
self.acc_shift_imm = op_vals['acc_shift_imm']
+ self.flag_group = op_vals['flag_group']
def execute(self, state: OTBNState) -> None:
a_qw = state.get_quarter_word_unsigned(self.wrs1, self.wrs1_qwsel)
@@ -479,10 +481,11 @@
truncated = acc & ((1 << 256) - 1)
state.wdrs.get_reg(self.wrd).write_unsigned(truncated)
state.wsrs.ACC.write_unsigned(truncated)
+ state.set_mlz_flags(self.flag_group, truncated)
class BNMULQACCSO(OTBNInsn):
- insn = insn_for_mnemonic('bn.mulqacc.so', 8)
+ insn = insn_for_mnemonic('bn.mulqacc.so', 9)
def __init__(self, op_vals: Dict[str, int]):
super().__init__(op_vals)
@@ -494,6 +497,7 @@
self.wrs2 = op_vals['wrs2']
self.wrs2_qwsel = op_vals['wrs2_qwsel']
self.acc_shift_imm = op_vals['acc_shift_imm']
+ self.flag_group = op_vals['flag_group']
def execute(self, state: OTBNState) -> None:
a_qw = state.get_quarter_word_unsigned(self.wrs1, self.wrs1_qwsel)
@@ -513,6 +517,19 @@
state.set_half_word_unsigned(self.wrd, self.wrd_hwsel, lo_part)
state.wsrs.ACC.write_unsigned(hi_part)
+ old_flags = state.csrs.flags[self.flag_group]
+ if self.wrd_hwsel:
+ new_flags = FlagReg(C=old_flags.C,
+ M=bool((lo_part >> 127) & 1),
+ L=old_flags.L,
+ Z=old_flags.Z and lo_part == 0)
+ else:
+ new_flags = FlagReg(C=old_flags.C,
+ M=old_flags.M,
+ L=bool(lo_part & 1),
+ Z=lo_part == 0)
+ state.csrs.flags[self.flag_group] = new_flags
+
class BNSUB(OTBNInsn):
insn = insn_for_mnemonic('bn.sub', 6)
diff --git a/hw/ip/otbn/dv/tracer/rtl/otbn_trace_intf.sv b/hw/ip/otbn/dv/tracer/rtl/otbn_trace_intf.sv
index 63a28c0..8004dee 100644
--- a/hw/ip/otbn/dv/tracer/rtl/otbn_trace_intf.sv
+++ b/hw/ip/otbn/dv/tracer/rtl/otbn_trace_intf.sv
@@ -196,14 +196,21 @@
flags_t flags_write_data [NFlagGroups];
logic [NFlagGroups-1:0] flags_read;
flags_t flags_read_data [NFlagGroups];
+ logic flag_group_read_op;
+
+ // Determine if current instruction reads a flag group specified in the instruction.
+ assign flag_group_read_op =
+ alu_bignum_operation.mac_flag_en |
+ (alu_bignum_operation.op inside {AluOpBignumAddc, AluOpBignumSubb, AluOpBignumSel,
+ AluOpBignumXor, AluOpBignumOr, AluOpBignumAnd,
+ AluOpBignumNot});
for (genvar i_fg = 0; i_fg < NFlagGroups; i_fg++) begin : g_flag_group_acceses
assign flags_write[i_fg] = u_otbn_alu_bignum.flags_en[i_fg];
assign flags_write_data[i_fg] = u_otbn_alu_bignum.flags_d[i_fg];
assign flags_read[i_fg] = (any_ispr_read & (ispr_addr == IsprFlags)) |
- ((alu_bignum_operation.op inside {AluOpBignumAddc, AluOpBignumSubb, AluOpBignumSel}) &
- (alu_bignum_operation.flag_group == i_fg) & insn_fetch_resp_valid);
+ (flag_group_read_op & (alu_bignum_operation.flag_group == i_fg) & insn_fetch_resp_valid);
assign flags_read_data[i_fg] = u_otbn_alu_bignum.flags_q[i_fg];
end
diff --git a/hw/ip/otbn/rtl/otbn_alu_bignum.sv b/hw/ip/otbn/rtl/otbn_alu_bignum.sv
index 3522aae..2227a5d 100644
--- a/hw/ip/otbn/rtl/otbn_alu_bignum.sv
+++ b/hw/ip/otbn/rtl/otbn_alu_bignum.sv
@@ -84,6 +84,9 @@
output logic [WLEN-1:0] ispr_acc_wr_data_o,
output logic ispr_acc_wr_en_o,
+ input flags_t mac_operation_flags_i,
+ input flags_t mac_operation_flags_en_i,
+
input logic [WLEN-1:0] rnd_i
);
///////////
@@ -100,16 +103,25 @@
logic adder_update_flags_en, adder_update_flags_en_raw;
flags_t logic_update_flags;
logic logic_update_flags_en, logic_update_flags_en_raw;
+ flags_t mac_update_flags;
+ logic mac_update_flags_en;
logic ispr_update_flags_en;
- assign adder_update_flags_en = operation_i.flag_en & adder_update_flags_en_raw;
- assign logic_update_flags_en = operation_i.flag_en & logic_update_flags_en_raw;
+ assign adder_update_flags_en = operation_i.alu_flag_en & adder_update_flags_en_raw;
+ assign logic_update_flags_en = operation_i.alu_flag_en & logic_update_flags_en_raw;
+ assign mac_update_flags_en = operation_i.mac_flag_en;
assign ispr_update_flags_en = (ispr_base_wr_en_i[0] & (ispr_addr_i == IsprFlags));
`ASSERT(UpdateFlagsOnehot,
- $onehot0({adder_update_flags_en, logic_update_flags_en, ispr_update_flags_en}))
+ $onehot0({adder_update_flags_en, logic_update_flags_en, mac_update_flags_en,
+ ispr_update_flags_en}))
+
+ assign selected_flags = flags_q[operation_i.flag_group];
+
+ assign mac_update_flags = (selected_flags & ~mac_operation_flags_en_i) |
+ (mac_operation_flags_i & mac_operation_flags_en_i);
for (genvar i_fg = 0; i_fg < NFlagGroups; i_fg++) begin : g_flag_groups
always_ff @(posedge clk_i or negedge rst_ni) begin
@@ -132,6 +144,7 @@
unique case (1'b1)
adder_update_flags_en: flags_d[i_fg] = adder_update_flags;
logic_update_flags_en: flags_d[i_fg] = logic_update_flags;
+ mac_update_flags_en: flags_d[i_fg] = mac_update_flags;
ispr_update_flags_en: flags_d[i_fg] = ispr_base_wdata_i[i_fg * FlagsWidth +: FlagsWidth];
default: ;
endcase
@@ -139,10 +152,10 @@
assign flags_en[i_fg] = ispr_update_flags_en |
(adder_update_flags_en & is_operation_flag_group[i_fg]) |
- (logic_update_flags_en & is_operation_flag_group[i_fg]);
+ (logic_update_flags_en & is_operation_flag_group[i_fg]) |
+ (mac_update_flags_en & is_operation_flag_group[i_fg]);
end
- assign selected_flags = flags_q[operation_i.flag_group];
logic [WLEN-1:0] mod_q;
logic [WLEN-1:0] mod_d;
diff --git a/hw/ip/otbn/rtl/otbn_controller.sv b/hw/ip/otbn/rtl/otbn_controller.sv
index e93e3cd..b0c5e16 100644
--- a/hw/ip/otbn/rtl/otbn_controller.sv
+++ b/hw/ip/otbn/rtl/otbn_controller.sv
@@ -384,12 +384,14 @@
assign alu_bignum_operation_o.shift_amt = insn_dec_bignum_i.alu_shift_amt;
assign alu_bignum_operation_o.flag_group = insn_dec_bignum_i.alu_flag_group;
assign alu_bignum_operation_o.sel_flag = insn_dec_bignum_i.alu_sel_flag;
- assign alu_bignum_operation_o.flag_en = insn_dec_bignum_i.alu_flag_en;
+ assign alu_bignum_operation_o.alu_flag_en = insn_dec_bignum_i.alu_flag_en;
+ assign alu_bignum_operation_o.mac_flag_en = insn_dec_bignum_i.mac_flag_en;
assign mac_bignum_operation_o.operand_a = rf_bignum_rd_data_a_i;
assign mac_bignum_operation_o.operand_b = rf_bignum_rd_data_b_i;
assign mac_bignum_operation_o.operand_a_qw_sel = insn_dec_bignum_i.mac_op_a_qw_sel;
assign mac_bignum_operation_o.operand_b_qw_sel = insn_dec_bignum_i.mac_op_b_qw_sel;
+ assign mac_bignum_operation_o.wr_hw_sel_upper = insn_dec_bignum_i.mac_wr_hw_sel_upper;
assign mac_bignum_operation_o.pre_acc_shift_imm = insn_dec_bignum_i.mac_pre_acc_shift;
assign mac_bignum_operation_o.zero_acc = insn_dec_bignum_i.mac_zero_acc;
assign mac_bignum_operation_o.shift_acc = insn_dec_bignum_i.mac_shift_out;
@@ -407,8 +409,8 @@
if (insn_valid_i && insn_dec_bignum_i.rf_we) begin
if (insn_dec_bignum_i.mac_en && insn_dec_bignum_i.mac_shift_out) begin
// Special handling for BN.MULQACC.SO, only enable upper or lower half depending on
- // mac_wr_hw_sel.
- rf_bignum_wr_en_o = insn_dec_bignum_i.mac_wr_hw_sel ? 2'b10 : 2'b01;
+ // mac_wr_hw_sel_upper.
+ rf_bignum_wr_en_o = insn_dec_bignum_i.mac_wr_hw_sel_upper ? 2'b10 : 2'b01;
end else if (insn_dec_shared_i.ld_insn) begin
// Special handling for BN.LID. Load data is requested in the first cycle of the instruction
// (where state_q == OtbnStateRun) and is available in the second cycle following the
@@ -428,15 +430,15 @@
insn_dec_bignum_i.d;
// For the shift-out variant of BN.MULQACC the bottom half of the MAC result is written to one
- // half of a desintation register specified by the instruction (mac_wr_hw_sel). The bottom half of
+ // half of a desintation register specified by the instruction (mac_wr_hw_sel_upper). The bottom half of
// the MAC result must be placed in the appropriate half of the write data (the RF only accepts
// write data for the top half in the top half of the write data input). Otherwise (shift-out to
// bottom half and all other BN.MULQACC instructions) simply pass the MAC result through unchanged
// as write data.
assign mac_bignum_rf_wr_data[WLEN-1:WLEN/2] =
- insn_dec_bignum_i.mac_wr_hw_sel &&
- insn_dec_bignum_i.mac_shift_out ? mac_bignum_operation_result_i[WLEN/2-1:0] :
- mac_bignum_operation_result_i[WLEN-1:WLEN/2];
+ insn_dec_bignum_i.mac_wr_hw_sel_upper &&
+ insn_dec_bignum_i.mac_shift_out ? mac_bignum_operation_result_i[WLEN/2-1:0] :
+ mac_bignum_operation_result_i[WLEN-1:WLEN/2];
assign mac_bignum_rf_wr_data[WLEN/2-1:0] = mac_bignum_operation_result_i[WLEN/2-1:0];
diff --git a/hw/ip/otbn/rtl/otbn_core.sv b/hw/ip/otbn/rtl/otbn_core.sv
index 2542c83..ff75ef7 100644
--- a/hw/ip/otbn/rtl/otbn_core.sv
+++ b/hw/ip/otbn/rtl/otbn_core.sv
@@ -117,6 +117,8 @@
mac_bignum_operation_t mac_bignum_operation;
logic [WLEN-1:0] mac_bignum_operation_result;
+ flags_t mac_bignum_operation_flags;
+ flags_t mac_bignum_operation_flags_en;
logic mac_bignum_en;
ispr_e ispr_addr;
@@ -356,29 +358,34 @@
.clk_i,
.rst_ni,
- .operation_i (alu_bignum_operation),
- .operation_result_o (alu_bignum_operation_result),
+ .operation_i (alu_bignum_operation),
+ .operation_result_o (alu_bignum_operation_result),
- .ispr_addr_i (ispr_addr),
- .ispr_base_wdata_i (ispr_base_wdata),
- .ispr_base_wr_en_i (ispr_base_wr_en),
- .ispr_bignum_wdata_i (ispr_bignum_wdata),
- .ispr_bignum_wr_en_i (ispr_bignum_wr_en),
- .ispr_rdata_o (ispr_rdata),
+ .ispr_addr_i (ispr_addr),
+ .ispr_base_wdata_i (ispr_base_wdata),
+ .ispr_base_wr_en_i (ispr_base_wr_en),
+ .ispr_bignum_wdata_i (ispr_bignum_wdata),
+ .ispr_bignum_wr_en_i (ispr_bignum_wr_en),
+ .ispr_rdata_o (ispr_rdata),
- .ispr_acc_i (ispr_acc),
- .ispr_acc_wr_data_o (ispr_acc_wr_data),
- .ispr_acc_wr_en_o (ispr_acc_wr_en),
+ .ispr_acc_i (ispr_acc),
+ .ispr_acc_wr_data_o (ispr_acc_wr_data),
+ .ispr_acc_wr_en_o (ispr_acc_wr_en),
- .rnd_i (rnd)
+ .mac_operation_flags_i (mac_bignum_operation_flags),
+ .mac_operation_flags_en_i (mac_bignum_operation_flags_en),
+
+ .rnd_i (rnd)
);
otbn_mac_bignum u_otbn_mac_bignum (
.clk_i,
.rst_ni,
- .operation_i (mac_bignum_operation),
- .operation_result_o (mac_bignum_operation_result),
+ .operation_i (mac_bignum_operation),
+ .operation_result_o (mac_bignum_operation_result),
+ .operation_flags_o (mac_bignum_operation_flags),
+ .operation_flags_en_o (mac_bignum_operation_flags_en),
.mac_en_i (mac_bignum_en),
diff --git a/hw/ip/otbn/rtl/otbn_decoder.sv b/hw/ip/otbn/rtl/otbn_decoder.sv
index 55d042b..ed3a336 100644
--- a/hw/ip/otbn/rtl/otbn_decoder.sv
+++ b/hw/ip/otbn/rtl/otbn_decoder.sv
@@ -79,7 +79,7 @@
logic [1:0] mac_op_a_qw_sel_bignum;
logic [1:0] mac_op_b_qw_sel_bignum;
- logic mac_wr_hw_sel_bignum;
+ logic mac_wr_hw_sel_upper_bignum;
logic [1:0] mac_pre_acc_shift_bignum;
logic mac_zero_acc_bignum;
logic mac_shift_out_bignum;
@@ -131,6 +131,7 @@
assign alu_sel_flag_bignum = flag_e'(insn[26:25]);
logic alu_flag_en_bignum;
+ logic mac_flag_en_bignum;
// source registers
assign insn_rs1 = insn[19:15];
@@ -149,12 +150,12 @@
assign loop_bodysize_base = insn[31:20];
assign loop_immediate_base = insn[12];
- assign mac_op_a_qw_sel_bignum = insn[26:25];
- assign mac_op_b_qw_sel_bignum = insn[28:27];
- assign mac_wr_hw_sel_bignum = insn[29];
- assign mac_pre_acc_shift_bignum = insn[14:13];
- assign mac_zero_acc_bignum = insn[12];
- assign mac_shift_out_bignum = insn[31];
+ assign mac_op_a_qw_sel_bignum = insn[26:25];
+ assign mac_op_b_qw_sel_bignum = insn[28:27];
+ assign mac_wr_hw_sel_upper_bignum = insn[29];
+ assign mac_pre_acc_shift_bignum = insn[14:13];
+ assign mac_zero_acc_bignum = insn[12];
+ assign mac_shift_out_bignum = insn[30];
logic d_inc_bignum;
logic a_inc_bignum;
@@ -216,35 +217,36 @@
};
assign insn_dec_bignum_o = '{
- a: insn_rs1,
- b: insn_rs2,
- d: insn_rd,
- i: imm_i_type_bignum,
- rf_a_indirect: rf_a_indirect_bignum,
- rf_b_indirect: rf_b_indirect_bignum,
- rf_d_indirect: rf_d_indirect_bignum,
- d_inc: d_inc_bignum,
- a_inc: a_inc_bignum,
- a_wlen_word_inc: a_wlen_word_inc_bignum,
- b_inc: b_inc_bignum,
- alu_shift_amt: alu_shift_amt_bignum,
- alu_shift_right: alu_shift_right_bignum,
- alu_flag_group: alu_flag_group_bignum,
- alu_sel_flag: alu_sel_flag_bignum,
- alu_flag_en: alu_flag_en_bignum,
- alu_op: alu_operator_bignum,
- alu_op_b_sel: alu_op_b_mux_sel_bignum,
- mac_op_a_qw_sel: mac_op_a_qw_sel_bignum,
- mac_op_b_qw_sel: mac_op_b_qw_sel_bignum,
- mac_wr_hw_sel: mac_wr_hw_sel_bignum,
- mac_pre_acc_shift: mac_pre_acc_shift_bignum,
- mac_zero_acc: mac_zero_acc_bignum,
- mac_shift_out: mac_shift_out_bignum,
- mac_en: mac_en_bignum,
- rf_we: rf_we_bignum,
- rf_wdata_sel: rf_wdata_sel_bignum,
- rf_ren_a: rf_ren_a_bignum,
- rf_ren_b: rf_ren_b_bignum
+ a: insn_rs1,
+ b: insn_rs2,
+ d: insn_rd,
+ i: imm_i_type_bignum,
+ rf_a_indirect: rf_a_indirect_bignum,
+ rf_b_indirect: rf_b_indirect_bignum,
+ rf_d_indirect: rf_d_indirect_bignum,
+ d_inc: d_inc_bignum,
+ a_inc: a_inc_bignum,
+ a_wlen_word_inc: a_wlen_word_inc_bignum,
+ b_inc: b_inc_bignum,
+ alu_shift_amt: alu_shift_amt_bignum,
+ alu_shift_right: alu_shift_right_bignum,
+ alu_flag_group: alu_flag_group_bignum,
+ alu_sel_flag: alu_sel_flag_bignum,
+ alu_flag_en: alu_flag_en_bignum,
+ mac_flag_en: mac_flag_en_bignum,
+ alu_op: alu_operator_bignum,
+ alu_op_b_sel: alu_op_b_mux_sel_bignum,
+ mac_op_a_qw_sel: mac_op_a_qw_sel_bignum,
+ mac_op_b_qw_sel: mac_op_b_qw_sel_bignum,
+ mac_wr_hw_sel_upper: mac_wr_hw_sel_upper_bignum,
+ mac_pre_acc_shift: mac_pre_acc_shift_bignum,
+ mac_zero_acc: mac_zero_acc_bignum,
+ mac_shift_out: mac_shift_out_bignum,
+ mac_en: mac_en_bignum,
+ rf_we: rf_we_bignum,
+ rf_wdata_sel: rf_wdata_sel_bignum,
+ rf_ren_a: rf_ren_a_bignum,
+ rf_ren_b: rf_ren_b_bignum
};
assign insn_dec_shared_o = '{
@@ -633,7 +635,7 @@
rf_wdata_sel_bignum = RfWdSelMac;
mac_en_bignum = 1'b1;
- if (insn[31:30] != 2'b00) begin // BN.MULQACC.WO/BN.MULQACC.SO
+ if (insn[30] == 1'b1 || insn[29] == 1'b1) begin // BN.MULQACC.WO/BN.MULQACC.SO
rf_we_bignum = 1'b1;
end
end
@@ -672,6 +674,7 @@
opcode_alu = insn_opcode_e'(insn_alu[6:0]);
alu_flag_en_bignum = 1'b0;
+ mac_flag_en_bignum = 1'b0;
unique case (opcode_alu)
//////////////
@@ -907,6 +910,16 @@
default: ;
endcase
end
+
+ ////////////////////////////////////////////
+ // BN.MULQACC/BN.MULQACC.WO/BN.MULQACC.SO //
+ ////////////////////////////////////////////
+
+ InsnOpcodeBignumMulqacc: begin
+ if (insn[30] == 1'b1 || insn[29] == 1'b1) begin // BN.MULQACC.WO/BN.MULQACC.SO
+ mac_flag_en_bignum = 1'b1;
+ end
+ end
endcase
end
diff --git a/hw/ip/otbn/rtl/otbn_mac_bignum.sv b/hw/ip/otbn/rtl/otbn_mac_bignum.sv
index 4de8e1b..ed667c0 100644
--- a/hw/ip/otbn/rtl/otbn_mac_bignum.sv
+++ b/hw/ip/otbn/rtl/otbn_mac_bignum.sv
@@ -14,6 +14,8 @@
input logic mac_en_i,
output logic [WLEN-1:0] operation_result_o,
+ output flags_t operation_flags_o,
+ output flags_t operation_flags_en_o,
output logic [WLEN-1:0] ispr_acc_o,
input logic [WLEN-1:0] ispr_acc_wr_data_i,
@@ -25,6 +27,7 @@
logic [WLEN-1:0] adder_op_a;
logic [WLEN-1:0] adder_op_b;
logic [WLEN-1:0] adder_result;
+ logic [1:0] adder_result_hw_is_zero;
logic [QWLEN-1:0] mul_op_a;
logic [QWLEN-1:0] mul_op_b;
@@ -91,6 +94,39 @@
assign adder_result = adder_op_a + adder_op_b;
+ // Split zero check between the two halves of the result. This is used for flag setting (see
+ // below).
+ assign adder_result_hw_is_zero[0] = adder_result[WLEN/2-1:0] == 'h0;
+ assign adder_result_hw_is_zero[1] = adder_result[WLEN/2+:WLEN/2] == 'h0;
+
+ assign operation_flags_o.L = adder_result[0];
+ // L is always updated for .WO, and for .SO when writing to the lower half-word
+ assign operation_flags_en_o.L = operation_i.shift_acc ? ~operation_i.wr_hw_sel_upper :
+ 1'b1;
+
+ // For .SO M is taken from the top-bit of shifted out half-word, otherwise it is taken from the top-bit
+ // of the full result.
+ assign operation_flags_o.M = operation_i.shift_acc ? adder_result[WLEN/2-1] :
+ adder_result[WLEN-1];
+ // M is always updated for .WO, and for .SO when writing to the upper half-word.
+ assign operation_flags_en_o.M = operation_i.shift_acc ? operation_i.wr_hw_sel_upper :
+ 1'b1;
+
+ // For .SO Z is calculated from the shifted out half-word, otherwise it is calculated on the full result.
+ assign operation_flags_o.Z = operation_i.shift_acc ? adder_result_hw_is_zero[0] :
+ &adder_result_hw_is_zero;
+
+ // Z is updated for .WO. For .SO updates are based upon result and half-word:
+ // - When writing to lower half-word always update Z.
+ // - When writing to upper half-word clear Z if result is non-zero otherwise leave it alone.
+ assign operation_flags_en_o.Z =
+ operation_i.shift_acc & operation_i.wr_hw_sel_upper ? ~adder_result_hw_is_zero[0] :
+ 1'b1;
+
+ // MAC never sets the carry flag
+ assign operation_flags_o.C = 1'b0;
+ assign operation_flags_en_o.C = 1'b0;
+
// If performing an ACC ISPR write the next accumulator value is taken from the ISPR write data,
// otherwise it is drawn from the adder result. The new accumulator can be optionally shifted
// right by one half-word (shift_acc).
diff --git a/hw/ip/otbn/rtl/otbn_pkg.sv b/hw/ip/otbn/rtl/otbn_pkg.sv
index cb9713d..03d0123 100644
--- a/hw/ip/otbn/rtl/otbn_pkg.sv
+++ b/hw/ip/otbn/rtl/otbn_pkg.sv
@@ -282,12 +282,13 @@
flag_group_t alu_flag_group;
flag_e alu_sel_flag;
logic alu_flag_en;
+ logic mac_flag_en;
alu_op_bignum_e alu_op;
op_b_sel_e alu_op_b_sel;
logic [1:0] mac_op_a_qw_sel;
logic [1:0] mac_op_b_qw_sel;
- logic mac_wr_hw_sel;
+ logic mac_wr_hw_sel_upper;
logic [1:0] mac_pre_acc_shift;
logic mac_zero_acc;
logic mac_shift_out;
@@ -319,7 +320,8 @@
logic [$clog2(WLEN)-1:0] shift_amt;
flag_group_t flag_group;
flag_e sel_flag;
- logic flag_en;
+ logic alu_flag_en;
+ logic mac_flag_en;
} alu_bignum_operation_t;
typedef struct packed {
@@ -327,6 +329,7 @@
logic [WLEN-1:0] operand_b;
logic [1:0] operand_a_qw_sel;
logic [1:0] operand_b_qw_sel;
+ logic wr_hw_sel_upper;
logic [1:0] pre_acc_shift_imm;
logic zero_acc;
logic shift_acc;