[otbn] Implement BN.MULQACC[.SO|.WO]

Signed-off-by: Greg Chadwick <gac@lowrisc.org>
diff --git a/hw/ip/otbn/dv/smoke/smoke_expected.txt b/hw/ip/otbn/dv/smoke/smoke_expected.txt
index 7dcbb9f..d85f618 100644
--- a/hw/ip/otbn/dv/smoke/smoke_expected.txt
+++ b/hw/ip/otbn/dv/smoke/smoke_expected.txt
@@ -41,7 +41,7 @@
 Final Bignum Register Values:
 Reg | Value
 -------------------------------------------------------------------------------
-w0  | 0x00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000
+w0  | 0x2f97be14_a0c429f2_53b42730_953d7d2f_0873f36c_1a01de4e_17fe23d9_0f09b7c8
 w1  | 0x78fccc06_2228e9d6_89c9b54f_887cf14e_c79af825_69be586e_9866bb3b_53769ada
 w2  | 0x99999999_99999999_99999999_99999999_99999999_99999999_99999999_99999999
 w3  | 0x1296659f_bbc28370_23634ee9_22168ae8_613491bf_0357f208_320054d4_ed103473
@@ -70,6 +70,6 @@
 w26 | 0x78fccc06_2228e9d6_89c9b54f_887cf1df_df9bf9bd_f9bfd9ff_99ffbbbb_dbff9bdb
 w27 | 0x18988800_00088990_89899109_88189108_81989801_09981808_98009919_11109898
 w28 | 0xe165559f_bbb1704f_10502cd6_11e568d7_5e0361bc_f027c1f7_01ff22a2_caef0343
-w29 | 0x00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000
-w30 | 0x00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000
-w31 | 0x00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000
+w29 | 0x41575c5d_24cf5526_1a1d070d_673963ce_e80fed2a_13c1b84d_b1fddf94_eb0953a3
+w30 | 0x15a7cbef_a5f473e1_860c1110_6bcc33ed_1583aef1_8130f3df_1a806984_c4f3507e
+w31 | 0x2f97be14_a0c429f2_53b42730_953d7d2f_0873f36c_1a01de4e_17fe23d9_0f09b7c8
diff --git a/hw/ip/otbn/dv/smoke/smoke_test.s b/hw/ip/otbn/dv/smoke/smoke_test.s
index 243fab8..98b53cf 100644
--- a/hw/ip/otbn/dv/smoke/smoke_test.s
+++ b/hw/ip/otbn/dv/smoke/smoke_test.s
@@ -229,6 +229,38 @@
 bn.cmpb w4, w3
 bn.sel w28, w7, w8, FG0.L
 
+# acc = w26 = 0x78fccc06_2228e9d6_89c9b54f_887cf1df_df9bf9bd_f9bfd9ff_99ffbbbb_dbff9bdb
+bn.wsrrw w0, 2, w26
+
+# {w30, w29} = (w28 * w27 + acc) =
+# 0x15a7cbef_a5f473e1_860c1110_6bcc33ed_1583aef1_8130f3df_1a806984_c4f3507e
+#   41575c5d_24cf5526_1a1d070d_673963ce_e80fed2a_13c1b84d_b1fddf94_eb0953a3
+bn.mulqacc           w27.0, w28.0, 0
+bn.mulqacc           w27.1, w28.0, 64
+bn.mulqacc.so w29.L, w27.0, w28.1, 64
+bn.mulqacc           w27.2, w28.0, 0
+bn.mulqacc           w27.1, w28.1, 0
+bn.mulqacc           w27.0, w28.2, 0
+bn.mulqacc           w27.3, w28.0, 64
+bn.mulqacc           w27.2, w28.1, 64
+bn.mulqacc           w27.1, w28.2, 64
+bn.mulqacc.so w29.U, w27.0, w28.3, 64
+bn.mulqacc           w27.3, w28.1, 0
+bn.mulqacc           w27.2, w28.2, 0
+bn.mulqacc           w27.1, w28.3, 0
+bn.mulqacc           w27.3, w28.2, 64
+bn.mulqacc.so w30.L, w27.2, w28.3, 64
+bn.mulqacc.so w30.U, w27.3, w28.3, 0
+
+# w31 = w28[127:0] * w27[127:0] = 0x2f97be14_a0c429f2_53b42730_953d7d2f_0873f36c_1a01de4e_17fe23d9_0f09b7c8
+bn.mulqacc.Z       w27.0, w28.0, 0
+bn.mulqacc         w27.0, w28.1, 64
+bn.mulqacc         w27.1, w28.0, 64
+bn.mulqacc.wo w31, w27.1, w28.1, 128
+
+# w0 = acc = 0x2f97be14_a0c429f2_53b42730_953d7d2f_0873f36c_1a01de4e_17fe23d9_0f09b7c8
+bn.wsrrs w0, 2, w0
+
 # Nested loop testing, inner adds repeated a total of 3 * 5 = 15 times
 # x28 = 4, x29 = 3
 li x28, 4
diff --git a/hw/ip/otbn/otbn.core b/hw/ip/otbn/otbn.core
index 2a57bff..1a78fdd 100644
--- a/hw/ip/otbn/otbn.core
+++ b/hw/ip/otbn/otbn.core
@@ -29,6 +29,7 @@
       - rtl/otbn_lsu.sv
       - rtl/otbn_alu_base.sv
       - rtl/otbn_alu_bignum.sv
+      - rtl/otbn_mac_bignum.sv
       - rtl/otbn_loop_controller.sv
       - rtl/otbn_stack.sv
       - rtl/otbn_core.sv
diff --git a/hw/ip/otbn/rtl/otbn_alu_bignum.sv b/hw/ip/otbn/rtl/otbn_alu_bignum.sv
index 1671178..45a1328 100644
--- a/hw/ip/otbn/rtl/otbn_alu_bignum.sv
+++ b/hw/ip/otbn/rtl/otbn_alu_bignum.sv
@@ -80,6 +80,10 @@
   input  logic                        ispr_bignum_wr_en_i,
   output logic [WLEN-1:0]             ispr_rdata_o,
 
+  input  logic [WLEN-1:0]             ispr_acc_i,
+  output logic [WLEN-1:0]             ispr_acc_wr_data_o,
+  output logic                        ispr_acc_wr_en_o,
+
   input  logic [WLEN-1:0]             rnd_i
 );
   ///////////
@@ -152,13 +156,16 @@
                                                            ispr_bignum_wr_en_i);
   end
 
+  assign ispr_acc_wr_en_o   = (ispr_addr_i == IsprAcc) & ispr_bignum_wr_en_i;
+  assign ispr_acc_wr_data_o = ispr_bignum_wdata_i;
+
   always_comb begin
     ispr_rdata_o = mod_q;
 
     unique case (ispr_addr_i)
       IsprMod:   ispr_rdata_o = mod_q;
       IsprRnd:   ispr_rdata_o = rnd_i;
-      IsprAcc:   ispr_rdata_o = 256'h0;
+      IsprAcc:   ispr_rdata_o = ispr_acc_i;
       IsprFlags: ispr_rdata_o = {{(WLEN - (NFlagGroups * FlagsWidth)){1'b0}}, flags_flattened};
       default: ;
     endcase
diff --git a/hw/ip/otbn/rtl/otbn_controller.sv b/hw/ip/otbn/rtl/otbn_controller.sv
index 8f0d949..3ea2bed 100644
--- a/hw/ip/otbn/rtl/otbn_controller.sv
+++ b/hw/ip/otbn/rtl/otbn_controller.sv
@@ -74,6 +74,11 @@
   output alu_bignum_operation_t alu_bignum_operation_o,
   input  logic [WLEN-1:0]       alu_bignum_operation_result_i,
 
+  // Bignum MAC
+  output mac_bignum_operation_t mac_bignum_operation_o,
+  input  logic [WLEN-1:0]       mac_bignum_operation_result_i,
+  output logic                  mac_bignum_en_o,
+
   // LSU
   output logic                     lsu_load_req_o,
   output logic                     lsu_store_req_o,
@@ -148,6 +153,8 @@
   logic                     loop_jump;
   logic [ImemAddrWidth-1:0] loop_jump_addr;
 
+  logic [WLEN-1:0] mac_bignum_rf_wr_data;
+
   // Stall a cycle on loads to allow load data writeback to happen the following cycle. Stall not
   // required on stores as there is no response to deal with.
   // TODO: Possibility of error response on store? Probably still don't need to stall in that case
@@ -369,7 +376,7 @@
 
   // Base ALU Operand B MUX
   always_comb begin
-    unique case (insn_dec_bignum_i.op_b_sel)
+    unique case (insn_dec_bignum_i.alu_op_b_sel)
       OpBSelRegister:  alu_bignum_operation_o.operand_b = rf_bignum_rd_data_b_i;
       OpBSelImmediate: alu_bignum_operation_o.operand_b = insn_dec_bignum_i.i;
       default:         alu_bignum_operation_o.operand_b = rf_bignum_rd_data_b_i;
@@ -377,25 +384,71 @@
   end
 
   assign alu_bignum_operation_o.op          = insn_dec_bignum_i.alu_op;
-  assign alu_bignum_operation_o.shift_right = insn_dec_bignum_i.shift_right;
-  assign alu_bignum_operation_o.shift_amt   = insn_dec_bignum_i.shift_amt;
-  assign alu_bignum_operation_o.flag_group  = insn_dec_bignum_i.flag_group;
-  assign alu_bignum_operation_o.sel_flag    = insn_dec_bignum_i.sel_flag;
+  assign alu_bignum_operation_o.shift_right = insn_dec_bignum_i.alu_shift_right;
+  assign alu_bignum_operation_o.shift_amt   = insn_dec_bignum_i.alu_shift_amt;
+  assign alu_bignum_operation_o.flag_group  = insn_dec_bignum_i.alu_flag_group;
+  assign alu_bignum_operation_o.sel_flag    = insn_dec_bignum_i.alu_sel_flag;
 
-  // Register file write MUX
-  // Suppress write for loads when controller isn't in stall state as load data for writeback is
-  // only available in the stall state.
-  assign rf_bignum_wr_en_o =
-    {2{insn_dec_bignum_i.rf_we & ~(insn_dec_shared_i.ld_insn & (state_q != OtbnStateStall))}};
+  assign mac_bignum_operation_o.operand_a         = rf_bignum_rd_data_a_i;
+  assign mac_bignum_operation_o.operand_b         = rf_bignum_rd_data_b_i;
+  assign mac_bignum_operation_o.operand_a_qw_sel  = insn_dec_bignum_i.mac_op_a_qw_sel;
+  assign mac_bignum_operation_o.operand_b_qw_sel  = insn_dec_bignum_i.mac_op_b_qw_sel;
+  assign mac_bignum_operation_o.pre_acc_shift_imm = insn_dec_bignum_i.mac_pre_acc_shift;
+  assign mac_bignum_operation_o.zero_acc          = insn_dec_bignum_i.mac_zero_acc;
+  assign mac_bignum_operation_o.shift_acc         = insn_dec_bignum_i.mac_shift_out;
+
+  assign mac_bignum_en_o = insn_dec_bignum_i.mac_en & insn_valid_i;
+
+
+  // Bignum Register file write control
+
+  always_comb begin
+    // By default write nothing
+    rf_bignum_wr_en_o = 2'b00;
+
+    // Only write if enabled
+    if (insn_dec_bignum_i.rf_we) begin
+      if (insn_dec_bignum_i.mac_en && insn_dec_bignum_i.mac_shift_out) begin
+        // Special handling for BN.MULQACC.SO, only enable upper or lower half depending on
+        // mac_wr_hw_sel.
+        rf_bignum_wr_en_o = insn_dec_bignum_i.mac_wr_hw_sel ? 2'b10 : 2'b01;
+      end else if (insn_dec_shared_i.ld_insn) begin
+        // Special handling for BN.LID. Load data is requested in the first cycle of the instruction
+        // (where state_q == OtbnStateRun) and is available in the second cycle following the
+        // request (where state_q == OtbnStateStall), so only enable writes for BN.LID when in
+        // OtbnStateStall.
+        if (state_q == OtbnStateStall) begin
+          rf_bignum_wr_en_o = 2'b11;
+        end
+      end else begin
+        // For everything else write both halves immediately.
+        rf_bignum_wr_en_o = 2'b11;
+      end
+    end
+  end
 
   assign rf_bignum_wr_addr_o = insn_dec_bignum_i.rf_d_indirect ? rf_base_rd_data_b_i[4:0] :
                                                                  insn_dec_bignum_i.d;
 
+  // For the shift-out variant of BN.MULQACC the bottom half of the MAC result is written to one
+  // half of a desintation register specified by the instruction (mac_wr_hw_sel). The bottom half of
+  // the MAC result must be placed in the appropriate half of the write data (the RF only accepts
+  // write data for the top half in the top half of the write data input). Otherwise (shift-out to
+  // bottom half and all other BN.MULQACC instructions) simply pass the MAC result through unchanged
+  // as write data.
+  assign mac_bignum_rf_wr_data[WLEN-1:WLEN/2] =
+    insn_dec_bignum_i.mac_wr_hw_sel &&
+    insn_dec_bignum_i.mac_shift_out    ? mac_bignum_operation_result_i[WLEN/2-1:0] :
+                                         mac_bignum_operation_result_i[WLEN-1:WLEN/2];
+
+  assign mac_bignum_rf_wr_data[WLEN/2-1:0] = mac_bignum_operation_result_i[WLEN/2-1:0];
+
   always_comb begin
     unique case (insn_dec_bignum_i.rf_wdata_sel)
       RfWdSelEx:   rf_bignum_wr_data_o = alu_bignum_operation_result_i;
       RfWdSelLsu:  rf_bignum_wr_data_o = lsu_bignum_rdata_i;
       RfWdSelIspr: rf_bignum_wr_data_o = ispr_rdata_i;
+      RfWdSelMac:  rf_bignum_wr_data_o = mac_bignum_rf_wr_data;
       default:     rf_bignum_wr_data_o = alu_bignum_operation_result_i;
     endcase
   end
diff --git a/hw/ip/otbn/rtl/otbn_core.sv b/hw/ip/otbn/rtl/otbn_core.sv
index 6f9ade2..d7a560d 100644
--- a/hw/ip/otbn/rtl/otbn_core.sv
+++ b/hw/ip/otbn/rtl/otbn_core.sv
@@ -115,12 +115,19 @@
   alu_bignum_operation_t alu_bignum_operation;
   logic [WLEN-1:0]       alu_bignum_operation_result;
 
+  mac_bignum_operation_t mac_bignum_operation;
+  logic [WLEN-1:0]       mac_bignum_operation_result;
+  logic                  mac_bignum_en;
+
   ispr_e                       ispr_addr;
   logic [31:0]                 ispr_base_wdata;
   logic [BaseWordsPerWLEN-1:0] ispr_base_wr_en;
   logic [WLEN-1:0]             ispr_bignum_wdata;
   logic                        ispr_bignum_wr_en;
   logic [WLEN-1:0]             ispr_rdata;
+  logic [WLEN-1:0]             ispr_acc;
+  logic [WLEN-1:0]             ispr_acc_wr_data;
+  logic                        ispr_acc_wr_en;
 
   // Depending on its usage, the instruction address (program counter) is qualified by two valid
   // signals: insn_fetch_resp_valid (together with the undecoded instruction data), and insn_valid
@@ -228,6 +235,11 @@
     .alu_bignum_operation_o         (alu_bignum_operation),
     .alu_bignum_operation_result_i  (alu_bignum_operation_result),
 
+    // To/from bignum MAC
+    .mac_bignum_operation_o        (mac_bignum_operation),
+    .mac_bignum_operation_result_i (mac_bignum_operation_result),
+    .mac_bignum_en_o               (mac_bignum_en),
+
     // To/from LSU (base and bignum)
     .lsu_load_req_o     (lsu_load_req),
     .lsu_store_req_o    (lsu_store_req),
@@ -354,7 +366,25 @@
     .ispr_bignum_wr_en_i (ispr_bignum_wr_en),
     .ispr_rdata_o        (ispr_rdata),
 
+    .ispr_acc_i          (ispr_acc),
+    .ispr_acc_wr_data_o  (ispr_acc_wr_data),
+    .ispr_acc_wr_en_o    (ispr_acc_wr_en),
+
     .rnd_i               (rnd)
   );
 
+  otbn_mac_bignum u_otbn_mac_bignum (
+    .clk_i,
+    .rst_ni,
+
+    .operation_i        (mac_bignum_operation),
+    .operation_result_o (mac_bignum_operation_result),
+
+    .mac_en_i           (mac_bignum_en),
+
+    .ispr_acc_o         (ispr_acc),
+    .ispr_acc_wr_data_i (ispr_acc_wr_data),
+    .ispr_acc_wr_en_i   (ispr_acc_wr_en)
+  );
+
 endmodule
diff --git a/hw/ip/otbn/rtl/otbn_decoder.sv b/hw/ip/otbn/rtl/otbn_decoder.sv
index 4e38e88..07c5790 100644
--- a/hw/ip/otbn/rtl/otbn_decoder.sv
+++ b/hw/ip/otbn/rtl/otbn_decoder.sv
@@ -76,6 +76,15 @@
 
   comparison_op_base_e comparison_operator_base;
 
+
+  logic [1:0] mac_op_a_qw_sel_bignum;
+  logic [1:0] mac_op_b_qw_sel_bignum;
+  logic       mac_wr_hw_sel_bignum;
+  logic [1:0] mac_pre_acc_shift_bignum;
+  logic       mac_zero_acc_bignum;
+  logic       mac_shift_out_bignum;
+  logic       mac_en_bignum;
+
   logic rf_ren_a_base;
   logic rf_ren_b_base;
 
@@ -109,17 +118,17 @@
   assign shift_amt_a_type_bignum = {insn[29:25], 3'b0};
   assign shift_amt_s_type_bignum = {insn[31:25], insn[14]};
 
-  logic shift_right_bignum;
+  logic alu_shift_right_bignum;
 
-  assign shift_right_bignum = insn[30];
+  assign alu_shift_right_bignum = insn[30];
 
-  flag_group_t flag_group_bignum;
+  flag_group_t alu_flag_group_bignum;
 
-  assign flag_group_bignum = insn[31];
+  assign alu_flag_group_bignum = insn[31];
 
-  flag_e sel_flag_bignum;
+  flag_e alu_sel_flag_bignum;
 
-  assign sel_flag_bignum = flag_e'(insn[26:25]);
+  assign alu_sel_flag_bignum = flag_e'(insn[26:25]);
 
   // source registers
   assign insn_rs1 = insn[19:15];
@@ -138,6 +147,13 @@
   assign loop_bodysize_base  = insn[31:20];
   assign loop_immediate_base = insn[12];
 
+  assign mac_op_a_qw_sel_bignum   = insn[26:25];
+  assign mac_op_b_qw_sel_bignum   = insn[28:27];
+  assign mac_wr_hw_sel_bignum     = insn[29];
+  assign mac_pre_acc_shift_bignum = insn[14:13];
+  assign mac_zero_acc_bignum      = insn[12];
+  assign mac_shift_out_bignum     = insn[31];
+
   logic d_inc_bignum;
   logic a_inc_bignum;
   logic a_wlen_word_inc_bignum;
@@ -167,13 +183,13 @@
     endcase
   end
 
-  logic [$clog2(WLEN)-1:0] shift_amt_bignum;
+  logic [$clog2(WLEN)-1:0] alu_shift_amt_bignum;
   always_comb begin
     unique case (shift_amt_mux_sel_bignum)
-      ShamtSelBignumA:    shift_amt_bignum = shift_amt_a_type_bignum;
-      ShamtSelBignumS:    shift_amt_bignum = shift_amt_s_type_bignum;
-      ShamtSelBignumZero: shift_amt_bignum = '0;
-      default:            shift_amt_bignum = shift_amt_a_type_bignum;
+      ShamtSelBignumA:    alu_shift_amt_bignum = shift_amt_a_type_bignum;
+      ShamtSelBignumS:    alu_shift_amt_bignum = shift_amt_s_type_bignum;
+      ShamtSelBignumZero: alu_shift_amt_bignum = '0;
+      default:            alu_shift_amt_bignum = shift_amt_a_type_bignum;
     endcase
   end
 
@@ -198,27 +214,34 @@
   };
 
   assign insn_dec_bignum_o = '{
-    a:               insn_rs1,
-    b:               insn_rs2,
-    d:               insn_rd,
-    i:               imm_i_type_bignum,
-    rf_a_indirect:   rf_a_indirect_bignum,
-    rf_b_indirect:   rf_b_indirect_bignum,
-    rf_d_indirect:   rf_d_indirect_bignum,
-    d_inc:           d_inc_bignum,
-    a_inc:           a_inc_bignum,
-    a_wlen_word_inc: a_wlen_word_inc_bignum,
-    b_inc:           b_inc_bignum,
-    shift_amt:       shift_amt_bignum,
-    shift_right:     shift_right_bignum,
-    flag_group:      flag_group_bignum,
-    sel_flag:        sel_flag_bignum,
-    alu_op:          alu_operator_bignum,
-    op_b_sel:        alu_op_b_mux_sel_bignum,
-    rf_we:           rf_we_bignum,
-    rf_wdata_sel:    rf_wdata_sel_bignum,
-    rf_ren_a:        rf_ren_a_bignum,
-    rf_ren_b:        rf_ren_b_bignum
+    a:                 insn_rs1,
+    b:                 insn_rs2,
+    d:                 insn_rd,
+    i:                 imm_i_type_bignum,
+    rf_a_indirect:     rf_a_indirect_bignum,
+    rf_b_indirect:     rf_b_indirect_bignum,
+    rf_d_indirect:     rf_d_indirect_bignum,
+    d_inc:             d_inc_bignum,
+    a_inc:             a_inc_bignum,
+    a_wlen_word_inc:   a_wlen_word_inc_bignum,
+    b_inc:             b_inc_bignum,
+    alu_shift_amt:     alu_shift_amt_bignum,
+    alu_shift_right:   alu_shift_right_bignum,
+    alu_flag_group:    alu_flag_group_bignum,
+    alu_sel_flag:      alu_sel_flag_bignum,
+    alu_op:            alu_operator_bignum,
+    alu_op_b_sel:      alu_op_b_mux_sel_bignum,
+    mac_op_a_qw_sel:   mac_op_a_qw_sel_bignum,
+    mac_op_b_qw_sel:   mac_op_b_qw_sel_bignum,
+    mac_wr_hw_sel:     mac_wr_hw_sel_bignum,
+    mac_pre_acc_shift: mac_pre_acc_shift_bignum,
+    mac_zero_acc:      mac_zero_acc_bignum,
+    mac_shift_out:     mac_shift_out_bignum,
+    mac_en:            mac_en_bignum,
+    rf_we:             rf_we_bignum,
+    rf_wdata_sel:      rf_wdata_sel_bignum,
+    rf_ren_a:          rf_ren_a_bignum,
+    rf_ren_b:          rf_ren_b_bignum
   };
 
   assign insn_dec_shared_o = '{
@@ -250,6 +273,7 @@
     rf_ren_b_base          = 1'b0;
     rf_ren_a_bignum        = 1'b0;
     rf_ren_b_bignum        = 1'b0;
+    mac_en_bignum          = 1'b0;
 
     rf_a_indirect_bignum   = 1'b0;
     rf_b_indirect_bignum   = 1'b0;
@@ -595,6 +619,22 @@
         endcase
       end
 
+      ////////////////////////////////////////////
+      // BN.MULQACC/BN.MULQACC.WO/BN.MULQACC.SO //
+      ////////////////////////////////////////////
+
+      InsnOpcodeBignumMulqacc: begin
+        insn_subset         = InsnSubsetBignum;
+        rf_ren_a_bignum     = 1'b1;
+        rf_ren_b_bignum     = 1'b1;
+        rf_wdata_sel_bignum = RfWdSelMac;
+        mac_en_bignum       = 1'b1;
+
+        if (insn[31:30] != 2'b00) begin // BN.MULQACC.WO/BN.MULQACC.SO
+          rf_we_bignum = 1'b1;
+        end
+      end
+
       default: illegal_insn = 1'b1;
     endcase
 
diff --git a/hw/ip/otbn/rtl/otbn_mac_bignum.sv b/hw/ip/otbn/rtl/otbn_mac_bignum.sv
new file mode 100644
index 0000000..4de8e1b
--- /dev/null
+++ b/hw/ip/otbn/rtl/otbn_mac_bignum.sv
@@ -0,0 +1,119 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+`include "prim_assert.sv"
+
+module otbn_mac_bignum
+  import otbn_pkg::*;
+(
+  input logic clk_i,
+  input logic rst_ni,
+
+  input mac_bignum_operation_t operation_i,
+  input logic                  mac_en_i,
+
+  output logic [WLEN-1:0] operation_result_o,
+
+  output logic [WLEN-1:0] ispr_acc_o,
+  input  logic [WLEN-1:0] ispr_acc_wr_data_i,
+  input  logic            ispr_acc_wr_en_i
+);
+  // The MAC operates on quarter-words, QWLEN gives the number of bits in a quarter-word.
+  localparam int unsigned QWLEN = WLEN / 4;
+
+  logic [WLEN-1:0] adder_op_a;
+  logic [WLEN-1:0] adder_op_b;
+  logic [WLEN-1:0] adder_result;
+
+  logic [QWLEN-1:0]  mul_op_a;
+  logic [QWLEN-1:0]  mul_op_b;
+  logic [WLEN/2-1:0] mul_res;
+  logic [WLEN-1:0]   mul_res_shifted;
+
+  logic [WLEN-1:0] acc;
+  logic [WLEN-1:0] acc_d;
+  logic [WLEN-1:0] acc_q;
+  logic            acc_en;
+
+  // Extract QWLEN multiply operands from WLEN operand inputs based on chosen quarter word from the
+  // instruction (operand_[a|b]_qw_sel).
+  always_comb begin
+    mul_op_a = '0;
+    mul_op_b = '0;
+
+    unique case (operation_i.operand_a_qw_sel)
+      2'd0: mul_op_a = operation_i.operand_a[QWLEN*0+:QWLEN];
+      2'd1: mul_op_a = operation_i.operand_a[QWLEN*1+:QWLEN];
+      2'd2: mul_op_a = operation_i.operand_a[QWLEN*2+:QWLEN];
+      2'd3: mul_op_a = operation_i.operand_a[QWLEN*3+:QWLEN];
+      default: mul_op_a = '0;
+    endcase
+
+    unique case (operation_i.operand_b_qw_sel)
+      2'd0: mul_op_b = operation_i.operand_b[QWLEN*0+:QWLEN];
+      2'd1: mul_op_b = operation_i.operand_b[QWLEN*1+:QWLEN];
+      2'd2: mul_op_b = operation_i.operand_b[QWLEN*2+:QWLEN];
+      2'd3: mul_op_b = operation_i.operand_b[QWLEN*3+:QWLEN];
+      default: mul_op_b = '0;
+    endcase
+  end
+
+  `ASSERT_KNOWN_IF(OperandAQWSelKnown, operation_i.operand_a_qw_sel, mac_en_i)
+  `ASSERT_KNOWN_IF(OperandBQWSelKnown, operation_i.operand_b_qw_sel, mac_en_i)
+
+  assign mul_res = mul_op_a * mul_op_b;
+
+  // Shift the QWLEN multiply result into a WLEN word before accumulating using the shift amount
+  // supplied in the instruction (pre_acc_shift_imm).
+  always_comb begin
+    mul_res_shifted = '0;
+
+    unique case (operation_i.pre_acc_shift_imm)
+      2'd0: mul_res_shifted = {{QWLEN*2{1'b0}}, mul_res};
+      2'd1: mul_res_shifted = {{QWLEN{1'b0}}, mul_res, {QWLEN{1'b0}}};
+      2'd2: mul_res_shifted = {mul_res, {QWLEN*2{1'b0}}};
+      2'd3: mul_res_shifted = {mul_res[63:0], {QWLEN*3{1'b0}}};
+      default: mul_res_shifted = '0;
+    endcase
+  end
+
+  `ASSERT_KNOWN_IF(PreAccShiftImmKnown, operation_i.pre_acc_shift_imm, mac_en_i)
+
+  // Accumulator logic
+
+  // Accumulator reads as 0 if .Z set in MULQACC (zero_acc).
+  assign acc = operation_i.zero_acc ? '0 : acc_q;
+
+  // Add shifted multiplier result to current accumulator.
+  assign adder_op_a = mul_res_shifted;
+  assign adder_op_b = acc;
+
+  assign adder_result = adder_op_a + adder_op_b;
+
+  // If performing an ACC ISPR write the next accumulator value is taken from the ISPR write data,
+  // otherwise it is drawn from the adder result. The new accumulator can be optionally shifted
+  // right by one half-word (shift_acc).
+  assign acc_d = ispr_acc_wr_en_i      ? ispr_acc_wr_data_i                                :
+                 operation_i.shift_acc ? {{QWLEN*2{1'b0}}, adder_result[QWLEN*2+:QWLEN*2]} :
+                                         adder_result;
+
+  // Only write to accumulator if the MAC is enabled or an ACC ISPR write is occuring.
+  assign acc_en = mac_en_i | ispr_acc_wr_en_i;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      acc_q <= '0;
+    end else if (acc_en) begin
+      acc_q <= acc_d;
+    end
+  end
+
+  assign ispr_acc_o = acc_q;
+
+  // The operation result is taken directly from the adder, shift_acc only applies to the new value
+  // written to the accumulator.
+  assign operation_result_o = adder_result;
+
+  `ASSERT(NoISPRAccWrAndMacEn, ~(ispr_acc_wr_en_i & mac_en_i))
+endmodule
diff --git a/hw/ip/otbn/rtl/otbn_pkg.sv b/hw/ip/otbn/rtl/otbn_pkg.sv
index 8c295fe..b9b2aa8 100644
--- a/hw/ip/otbn/rtl/otbn_pkg.sv
+++ b/hw/ip/otbn/rtl/otbn_pkg.sv
@@ -152,7 +152,8 @@
     RfWdSelNextPc,
     RfWdSelLsu,
     RfWdSelIspr,
-    RfWdSelIncr
+    RfWdSelIncr,
+    RfWdSelMac
   } rf_wd_sel_e;
 
   // Control and Status Registers (CSRs)
@@ -272,13 +273,22 @@
                                               // file
 
     // Shifting only applies to a subset of ALU operations
-    logic [$clog2(WLEN)-1:0] shift_amt;   // Shift amount
-    logic                    shift_right; // Shift right if set otherwise left
+    logic [$clog2(WLEN)-1:0] alu_shift_amt;   // Shift amount
+    logic                    alu_shift_right; // Shift right if set otherwise left
 
-    flag_group_t             flag_group;
-    flag_e                   sel_flag;
+    flag_group_t             alu_flag_group;
+    flag_e                   alu_sel_flag;
     alu_op_bignum_e          alu_op;
-    op_b_sel_e               op_b_sel;
+    op_b_sel_e               alu_op_b_sel;
+
+    logic [1:0]              mac_op_a_qw_sel;
+    logic [1:0]              mac_op_b_qw_sel;
+    logic                    mac_wr_hw_sel;
+    logic [1:0]              mac_pre_acc_shift;
+    logic                    mac_zero_acc;
+    logic                    mac_shift_out;
+    logic                    mac_en;
+
     logic                    rf_we;
     rf_wd_sel_e              rf_wdata_sel;
     logic                    rf_ren_a;
@@ -307,5 +317,15 @@
     flag_e                   sel_flag;
   } alu_bignum_operation_t;
 
+  typedef struct packed {
+    logic [WLEN-1:0] operand_a;
+    logic [WLEN-1:0] operand_b;
+    logic [1:0]      operand_a_qw_sel;
+    logic [1:0]      operand_b_qw_sel;
+    logic [1:0]      pre_acc_shift_imm;
+    logic            zero_acc;
+    logic            shift_acc;
+  } mac_bignum_operation_t;
+
 
 endpackage