[otbn] Add bignum RF & ALU along with CSR & WSR

Adds all bignum side arithemetic and logic instructions (no MULQACC,
this will be a seperate unit) as well as CSR and WSR access (included as
they live in the bignum ALU).

Signed-off-by: Greg Chadwick <gac@lowrisc.org>
diff --git a/hw/ip/otbn/otbn.core b/hw/ip/otbn/otbn.core
index b64cfb5..888afa5 100644
--- a/hw/ip/otbn/otbn.core
+++ b/hw/ip/otbn/otbn.core
@@ -22,9 +22,10 @@
       - rtl/otbn_decoder.sv
       - rtl/otbn_instruction_fetch.sv
       - rtl/otbn_rf_base.sv
-      - rtl/otbn_status_registers.sv
+      - rtl/otbn_rf_bignum.sv
       - rtl/otbn_lsu.sv
       - rtl/otbn_alu_base.sv
+      - rtl/otbn_alu_bignum.sv
       - rtl/otbn_core.sv
     file_type: systemVerilogSource
 
diff --git a/hw/ip/otbn/rtl/otbn_alu_bignum.sv b/hw/ip/otbn/rtl/otbn_alu_bignum.sv
new file mode 100644
index 0000000..c38a19aa
--- /dev/null
+++ b/hw/ip/otbn/rtl/otbn_alu_bignum.sv
@@ -0,0 +1,392 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+`include "prim_assert.sv"
+
+/**
+ * OTBN alu block for the bignum instruction subset
+ *
+ * This ALU supports all of the 'plain' arithmetic and logic bignum instructions, BN.MULQACC is
+ * implemented in a seperate block.
+ *
+ * One barrel shifter and two adders (X and Y) are implemented along with the logic operators
+ * (AND,OR,XOR,NOT).
+ *
+ * The adders have 256-bit operands with a carry_in and optional invert on the second operand. This
+ * can be used to implement subtraction (a - b == a + ~b + 1). BN.SUBB/BN.ADDC are implemented by
+ * feeding in the carry flag as carry in rather than a fixed 0 or 1.
+ *
+ * The shifter takes a 512-bit input (to implement BN.RSHI, concatenate and right shift) and shifts
+ * right by up to 256-bits. The lower (256-bit) half of the input and output can be reversed to
+ * allow left shift implementation.  There is no concatenate and left shift instruction so reversing
+ * isn't required over the full width.
+ *
+ * The dataflow between the adders and shifter is in the diagram below. This arrangement allows the
+ * implementation of the pseudo-mod (BN.ADDM/BN.SUBM) instructions in a single cycle whilst
+ * minimising the critical path. The pseudo-mod instructions do not have a shifted input so X can
+ * compute the initial add/sub and Y computes the pseudo-mod result. For all other add/sub
+ * operations Y computes the operation with one of the inputs supplied by the shifter and the other
+ * from operand_a.
+ *
+ * Both adder X and the shifter get supplied with operand_a and operand_b from the operation_i
+ * input. In addition the shifter gets a shift amount (shift_amt) and can use 0 instead of
+ * operand_a. The shifter concatenates operand_a (or 0) and operand_b together before shifting with
+ * operand_a in the upper (256-bit) half {operand_a/0, operand_b}. This allows the shifter to pass
+ * through operand_b simply by not performing a shift.
+ *
+ *                     A 0
+ *                     | |
+ *                   \-----/
+ *                    \---/
+ *      A       B       |   B   shift_amt
+ *      |       |       |   |   |
+ *    +-----------+   +-----------+
+ *    |  Adder X  |   |  Shifter  |
+ *    +-----------+   +-----------+
+ *          |               |
+ *          |----+     +----|
+ *          |    |     |    |
+ *      X result |     | Shifter result
+ *               |     |
+ *               |     |     +-----------+
+ *             A |     | +---|  MOD WSR  |
+ *             | |     | |   +-----------+
+ *           \-----/ \-----/
+ *            \---/   \---/
+ *              |       |
+ *              |       |
+ *            +-----------+
+ *            |  Adder Y  |
+ *            +-----------+
+ *                  |
+ *              Y result
+ */
+
+
+module otbn_alu_bignum
+  import otbn_pkg::*;
+(
+  input logic clk_i,
+  input logic rst_ni,
+
+  input  alu_bignum_operation_t operation_i,
+  output logic [WLEN-1:0]       operation_result_o,
+
+  input  ispr_e                       ispr_addr_i,
+  input  logic [31:0]                 ispr_base_wdata_i,
+  input  logic [BaseWordsPerWLEN-1:0] ispr_base_wr_en_i,
+  input  logic [WLEN-1:0]             ispr_bignum_wdata_i,
+  input  logic                        ispr_bignum_wr_en_i,
+  output logic [WLEN-1:0]             ispr_rdata_o,
+
+  input  logic [WLEN-1:0]             rnd_i
+);
+  ///////////
+  // ISPRs //
+  ///////////
+
+  flags_t                              flags_q [NFlagGroups];
+  flags_t                              flags_d [NFlagGroups];
+  logic   [NFlagGroups*FlagsWidth-1:0] flags_flattened;
+  logic   [NFlagGroups-1:0]            flags_en;
+  logic   [NFlagGroups-1:0]            is_operation_flag_group;
+  flags_t                              selected_flags;
+  flags_t                              update_flags;
+  logic                                update_flags_en;
+
+  for (genvar i_fg = 0; i_fg < NFlagGroups; i_fg++) begin : g_flag_groups
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        flags_q[i_fg] <= '{Z : 1'b0, M : 1'b0, L : 1'b0, C : 1'b0};
+      end else if (flags_en[i_fg]) begin
+        flags_q[i_fg] <= flags_d[i_fg];
+      end
+    end
+
+    assign is_operation_flag_group[i_fg] = operation_i.flag_group == i_fg;
+
+    assign flags_d[i_fg] = update_flags_en & is_operation_flag_group[i_fg] ?
+      update_flags : ispr_base_wdata_i[i_fg * FlagsWidth +: FlagsWidth];
+
+    assign flags_en[i_fg] = (update_flags_en & is_operation_flag_group[i_fg]) |
+                            (ispr_base_wr_en_i[0] & (ispr_addr_i == IsprFlags));
+
+    assign flags_flattened[i_fg * FlagsWidth +: FlagsWidth] = flags_q[i_fg];
+  end
+
+  assign selected_flags = flags_q[operation_i.flag_group];
+
+  logic [WLEN-1:0]             mod_q;
+  logic [WLEN-1:0]             mod_d;
+  logic [BaseWordsPerWLEN-1:0] mod_wr_en;
+
+  for (genvar i_word = 0; i_word < BaseWordsPerWLEN; i_word++) begin : g_mod_words
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        mod_q[i_word*32+:32] <= '0;
+      end else if (mod_wr_en[i_word]) begin
+        mod_q[i_word*32+:32] <= mod_d[i_word*32+:32];
+      end
+    end
+
+    assign mod_d[i_word*32+:32] = ispr_base_wr_en_i[i_word] ? ispr_base_wdata_i :
+                                                              ispr_bignum_wdata_i[i_word*32+:32];
+
+    assign mod_wr_en[i_word] = (ispr_addr_i == IsprMod) & (ispr_base_wr_en_i[i_word] |
+                                                           ispr_bignum_wr_en_i);
+  end
+
+  always_comb begin
+    ispr_rdata_o = mod_q;
+
+    unique case (ispr_addr_i)
+      IsprMod:   ispr_rdata_o = mod_q;
+      IsprRnd:   ispr_rdata_o = rnd_i;
+      IsprAcc:   ispr_rdata_o = 256'h0;
+      IsprFlags: ispr_rdata_o = {{(WLEN - (NFlagGroups * FlagsWidth)){1'b0}}, flags_flattened};
+      default: ;
+    endcase
+  end
+
+  /////////////
+  // Shifter //
+  /////////////
+
+  logic              shift_right;
+  logic [WLEN-1:0]   shifter_in_upper, shifter_in_lower, shifter_in_lower_reverse;
+  logic [WLEN*2-1:0] shifter_in;
+  logic [WLEN*2-1:0] shifter_out;
+  logic [WLEN-1:0]   shifter_out_lower_reverse, shifter_res;
+
+  assign shifter_in_upper = operation_i.op == AluOpBignumRshi ? operation_i.operand_a : '0;
+  assign shifter_in_lower = operation_i.operand_b;
+
+  for (genvar i = 0; i < WLEN; i++) begin : g_shifter_in_lower_reverse
+    assign shifter_in_lower_reverse[i] = shifter_in_lower[WLEN - i - 1];
+  end
+
+  assign shifter_in = {shifter_in_upper, shift_right ? shifter_in_lower :
+                                                       shifter_in_lower_reverse};
+
+  assign shifter_out = shifter_in >> operation_i.shift_amt;
+
+  for (genvar i = 0; i < WLEN; i++) begin : g_shifter_out_lower_reverse
+    assign shifter_out_lower_reverse[i] = shifter_out[WLEN - i - 1];
+  end
+
+  assign shifter_res = shift_right ? shifter_out[WLEN-1:0] : shifter_out_lower_reverse;
+
+  //////////////////
+  // Adders X & Y //
+  //////////////////
+
+  logic [WLEN:0]   adder_x_op_a, adder_x_op_b;
+  logic            adder_x_carry_in;
+  logic            adder_x_op_b_invert;
+  logic [WLEN+1:0] adder_x_res;
+
+  logic [WLEN:0]   adder_y_op_a, adder_y_op_b;
+  logic            adder_y_carry_in;
+  logic            adder_y_op_b_invert;
+  logic [WLEN+1:0] adder_y_res;
+
+  logic            shift_mod_sel;
+  logic [WLEN-1:0] shift_mod_mux_out;
+  logic            x_res_operand_a_sel;
+  logic [WLEN-1:0] x_res_operand_a_mux_out;
+
+  assign adder_x_op_a = {operation_i.operand_a, 1'b1};
+  assign adder_x_op_b = {adder_x_op_b_invert ? ~operation_i.operand_b : operation_i.operand_b,
+                         adder_x_carry_in};
+
+  assign adder_x_res = adder_x_op_a + adder_x_op_b;
+
+  assign x_res_operand_a_mux_out = x_res_operand_a_sel ? adder_x_res[WLEN:1] : operation_i.operand_a;
+  assign shift_mod_mux_out = shift_mod_sel ? shifter_res : mod_q;
+
+  assign adder_y_op_a = {x_res_operand_a_mux_out, 1'b1};
+  assign adder_y_op_b = {adder_y_op_b_invert ? ~shift_mod_mux_out : shift_mod_mux_out,
+                         adder_y_carry_in};
+
+  assign adder_y_res = adder_y_op_a + adder_y_op_b;
+
+  assign update_flags.C = (operation_i.op == AluOpBignumAdd ||
+                           operation_i.op == AluOpBignumAddc)  ?  adder_y_res[WLEN+1] :
+                                                                 ~adder_y_res[WLEN+1];
+  assign update_flags.M = adder_y_res[WLEN];
+  assign update_flags.L = adder_y_res[1];
+  assign update_flags.Z = ~|adder_y_res[WLEN:1];
+
+  //////////////////////////////
+  // Shifter & Adders control //
+  //////////////////////////////
+
+  always_comb begin
+    shift_right          = 1'b0;
+    adder_x_carry_in     = 1'b0;
+    adder_x_op_b_invert  = 1'b0;
+    x_res_operand_a_sel  = 1'b0;
+    shift_mod_sel        = 1'b0;
+    adder_y_carry_in     = 1'b0;
+    adder_y_op_b_invert  = 1'b0;
+    update_flags_en      = 1'b0;
+
+    unique case (operation_i.op)
+      AluOpBignumAdd: begin
+        // Shifter computes B [>>|<<] shift_amt
+        // Y computes A + shifter_res
+        // X ignored
+        shift_right         = operation_i.shift_right;
+        x_res_operand_a_sel = 1'b0;
+        shift_mod_sel       = 1'b1;
+        adder_y_carry_in    = 1'b0;
+        adder_y_op_b_invert = 1'b0;
+        update_flags_en     = 1'b1;
+      end
+      AluOpBignumAddc: begin
+        // Shifter computes B [>>|<<] shift_amt
+        // Y computes A + shifter_res + flags.C
+        // X ignored
+        shift_right         = operation_i.shift_right;
+        x_res_operand_a_sel = 1'b0;
+        shift_mod_sel       = 1'b1;
+        adder_y_carry_in    = selected_flags.C;
+        adder_y_op_b_invert = 1'b0;
+        update_flags_en     = 1'b1;
+      end
+      AluOpBignumAddm: begin
+        // X computes A + B
+        // Y computes adder_x_res - mod = adder_x_res + ~mod + 1
+        // Shifter ignored
+        // Output mux chooses result based on top bit of X result (whether mod subtraction in
+        // Y should be applied or not)
+        adder_x_carry_in    = 1'b0;
+        adder_x_op_b_invert = 1'b0;
+        x_res_operand_a_sel = 1'b1;
+        shift_mod_sel       = 1'b0;
+        adder_y_carry_in    = 1'b1;
+        adder_y_op_b_invert = 1'b1;
+      end
+      AluOpBignumSub: begin
+        // Shifter computes B [>>|<<] shift_amt
+        // Y computes A - shifter_res = A + ~shifter_res + 1
+        // X ignored
+        shift_right         = operation_i.shift_right;
+        x_res_operand_a_sel = 1'b0;
+        shift_mod_sel       = 1'b1;
+        adder_y_carry_in    = 1'b1;
+        adder_y_op_b_invert = 1'b1;
+        update_flags_en     = 1'b1;
+      end
+      AluOpBignumSubb: begin
+        // Shifter computes B [>>|<<] shift_amt
+        // Y computes A - shifter_res + ~flags.C = A + ~shifter_res + flags.C
+        // X ignored
+        shift_right         = operation_i.shift_right;
+        x_res_operand_a_sel = 1'b0;
+        shift_mod_sel       = 1'b1;
+        adder_y_carry_in    = ~selected_flags.C;
+        adder_y_op_b_invert = 1'b1;
+        update_flags_en     = 1'b1;
+      end
+      AluOpBignumSubm: begin
+        // X computes A - B = A + ~B + 1
+        // Y computes adder_x_res + mod
+        // Shifter ignored
+        // Output mux chooses result based on top bit of X result (whether subtraction in Y should
+        // be applied or not)
+        adder_x_carry_in    = 1'b1;
+        adder_x_op_b_invert = 1'b1;
+        x_res_operand_a_sel = 1'b1;
+        shift_mod_sel       = 1'b0;
+        adder_y_carry_in    = 1'b0;
+        adder_y_op_b_invert = 1'b0;
+      end
+      AluOpBignumRshi: begin
+        // Shifter computes {A, B} >> shift_amt
+        // X, Y ignored
+        shift_right         = 1'b1;
+      end
+      AluOpBignumXor,
+      AluOpBignumOr,
+      AluOpBignumAnd,
+      AluOpBignumNot: begin
+        // Shift computes one operand for the logical operation
+        // X & Y ignored
+        shift_right         = operation_i.shift_right;
+      end
+      default: ;
+    endcase
+  end
+
+  ////////////////////////
+  // Logical operations //
+  ////////////////////////
+
+  logic [WLEN-1:0] logical_res;
+
+  always_comb begin
+    logical_res = ~operation_i.operand_a;
+
+    unique case (operation_i.op)
+      AluOpBignumXor: logical_res = operation_i.operand_a ^ shifter_res;
+      AluOpBignumOr:  logical_res = operation_i.operand_a | shifter_res;
+      AluOpBignumAnd: logical_res = operation_i.operand_a & shifter_res;
+      AluOpBignumNot: logical_res = ~shifter_res;
+      default:;
+    endcase
+  end
+
+  ////////////////////////
+  // Output multiplexer //
+  ////////////////////////
+
+  always_comb begin
+    operation_result_o = adder_y_res[WLEN:1];
+
+    unique case(operation_i.op)
+      AluOpBignumAdd,
+      AluOpBignumAddc,
+      AluOpBignumSub,
+      AluOpBignumSubb: operation_result_o = adder_y_res[WLEN:1];
+
+      // For pseudo-mod operations the result depends upon initial a + b / a - b result that is
+      // computed in X. Operation to add/subtract mod (X + mod / X - mod) is computed in Y.
+      // Subtraction is computed using in the X & Y adders as a - b == a + ~b + 1. Note that for
+      // a - b the top bit of the result will be set if a - b >= 0 and otherwise clear.
+
+      // BN.ADDM - X = a + b, Y = X - mod, subtract mod if a + b >= mod
+      // * If X generates carry a + b > mod (as mod is 256-bit) - Select Y result
+      // * If Y generates carry X - mod == (a + b) - mod >= 0 hence a + b >= mod, note this is only valid if
+      //   X does not generate carry - Select Y result
+      // * If neither happen a + b < mod - Select X result
+      AluOpBignumAddm: begin
+        if (adder_x_res[WLEN+1] || adder_y_res[WLEN+1]) begin
+          operation_result_o = adder_y_res[WLEN:1];
+        end else begin
+          operation_result_o = adder_x_res[WLEN:1];
+        end
+      end
+
+      // BN.SUBM - X = a - b, Y = X + mod, add mod if a - b < 0
+      // * If X generates carry a - b >= 0 - Select X result
+      // * Otherwise select Y result
+      AluOpBignumSubm: begin
+        if (adder_x_res[WLEN+1]) begin
+          operation_result_o = adder_x_res[WLEN:1];
+        end else begin
+          operation_result_o = adder_y_res[WLEN:1];
+        end
+      end
+
+      AluOpBignumRshi: operation_result_o = shifter_res[WLEN-1:0];
+
+      AluOpBignumXor,
+      AluOpBignumOr,
+      AluOpBignumAnd,
+      AluOpBignumNot:  operation_result_o = logical_res;
+      default: ;
+    endcase
+  end
+endmodule
diff --git a/hw/ip/otbn/rtl/otbn_controller.sv b/hw/ip/otbn/rtl/otbn_controller.sv
index ae00f29..7713032 100644
--- a/hw/ip/otbn/rtl/otbn_controller.sv
+++ b/hw/ip/otbn/rtl/otbn_controller.sv
@@ -35,7 +35,8 @@
 
   // Decoded instruction data, matching the "Decoding" section of the specification.
   input insn_dec_base_t       insn_dec_base_i,
-  input insn_dec_ctrl_t       insn_dec_ctrl_i,
+  input insn_dec_bignum_t     insn_dec_bignum_i,
+  input insn_dec_shared_t     insn_dec_shared_i,
 
   // Base register file
   output logic [4:0]   rf_base_wr_addr_o,
@@ -48,12 +49,30 @@
   output logic [4:0]   rf_base_rd_addr_b_o,
   input  logic [31:0]  rf_base_rd_data_b_i,
 
+  // Bignum register file (WDRs)
+  output logic [4:0]      rf_bignum_wr_addr_o,
+  output logic [1:0]      rf_bignum_wr_en_o,
+  output logic [WLEN-1:0] rf_bignum_wr_data_o,
+
+  output logic [4:0]      rf_bignum_rd_addr_a_o,
+  input  logic [WLEN-1:0] rf_bignum_rd_data_a_i,
+
+  output logic [4:0]      rf_bignum_rd_addr_b_o,
+  input  logic [WLEN-1:0] rf_bignum_rd_data_b_i,
+
   // Execution units
+
+  // Base ALU
   output alu_base_operation_t  alu_base_operation_o,
   output alu_base_comparison_t alu_base_comparison_o,
   input  logic [31:0]          alu_base_operation_result_i,
   input  logic                 alu_base_comparison_result_i,
 
+  // Bignum ALU
+  output alu_bignum_operation_t alu_bignum_operation_o,
+  input  logic [WLEN-1:0]       alu_bignum_operation_result_i,
+
+  // LSU
   output logic                     lsu_load_req_o,
   output logic                     lsu_store_req_o,
   output insn_subset_e             lsu_req_subset_o,
@@ -64,7 +83,15 @@
 
   input  logic [31:0]              lsu_base_rdata_i,
   input  logic [WLEN-1:0]          lsu_bignum_rdata_i,
-  input  logic [1:0]               lsu_rdata_err_i // Bit1: Uncorrectable, Bit0: Correctable
+  input  logic [1:0]               lsu_rdata_err_i, // Bit1: Uncorrectable, Bit0: Correctable
+
+  // Internal Special-Purpose Registers (ISPRs)
+  output ispr_e                       ispr_addr_o,
+  output logic [31:0]                 ispr_base_wdata_o,
+  output logic [BaseWordsPerWLEN-1:0] ispr_base_wr_en_o,
+  output logic [WLEN-1:0]             ispr_bignum_wdata_o,
+  output logic                        ispr_bignum_wr_en_o,
+  input  logic [WLEN-1:0]             ispr_rdata_i
 );
 
   typedef enum logic [1:0] {
@@ -81,6 +108,22 @@
   logic [ImemAddrWidth-1:0] branch_target;
   logic [ImemAddrWidth-1:0] next_insn_addr;
 
+  csr_e                                csr_addr;
+  logic [31:0]                         csr_rdata;
+  logic [BaseWordsPerWLEN-1:0]         csr_rdata_mux [32];
+  logic [31:0]                         csr_wdata;
+
+  wsr_e                                wsr_addr;
+  logic [WLEN-1:0]                     wsr_wdata;
+
+  ispr_e                               ispr_addr_base;
+  logic [$clog2(BaseWordsPerWLEN)-1:0] ispr_word_addr_base;
+  logic [BaseWordsPerWLEN-1:0]         ispr_word_sel_base;
+
+  ispr_e                               ispr_addr_bignum;
+
+  logic                                ispr_wr_insn;
+
   // Stall a cycle on loads to allow load data writeback to happen the following cycle. Stall not
   // required on stores as there is no response to deal with.
   // TODO: Possibility of error response on store? Probably still don't need to stall in that case
@@ -88,12 +131,12 @@
   assign mem_stall = lsu_load_req_o;
 
   assign stall = mem_stall;
-  assign done_o = insn_valid_i && insn_dec_ctrl_i.ecall_insn;
+  assign done_o = insn_valid_i && insn_dec_shared_i.ecall_insn;
 
   // Branch taken when there is a valid branch instruction and comparison passes or a valid jump
   // instruction (which is always taken)
-  assign branch_taken = insn_valid_i & ((insn_dec_ctrl_i.branch_insn & alu_base_comparison_result_i) |
-                                        insn_dec_ctrl_i.jump_insn);
+  assign branch_taken = insn_valid_i & ((insn_dec_shared_i.branch_insn & alu_base_comparison_result_i) |
+                                        insn_dec_shared_i.jump_insn);
   // Branch target computed by base ALU (PC + imm)
   // TODO: Implement error on branch out of range
   assign branch_target = alu_base_operation_result_i[ImemAddrWidth-1:0];
@@ -149,7 +192,7 @@
   `ASSERT(ControllerStateValid, state_q inside {OtbnStateHalt, OtbnStateRun, OtbnStateStall});
   // Branch only takes effect in OtbnStateRun so must not go into stall state for branch
   // instructions.
-  `ASSERT(NoStallOnBranch, insn_valid_i & insn_dec_ctrl_i.branch_insn |-> state_q != OtbnStateStall);
+  `ASSERT(NoStallOnBranch, insn_valid_i & insn_dec_shared_i.branch_insn |-> state_q != OtbnStateStall);
 
   always_ff @(posedge clk_i or negedge rst_ni) begin
     if (!rst_ni) begin
@@ -164,7 +207,7 @@
 
   // Base ALU Operand A MUX
   always_comb begin
-    unique case (insn_dec_ctrl_i.op_a_sel)
+    unique case (insn_dec_shared_i.op_a_sel)
       OpASelRegister:
         alu_base_operation_o.operand_a = rf_base_rd_data_a_i;
       OpASelZero:
@@ -178,7 +221,7 @@
 
   // Base ALU Operand B MUX
   always_comb begin
-    unique case (insn_dec_ctrl_i.op_b_sel)
+    unique case (insn_dec_shared_i.op_b_sel)
       OpBSelRegister:
         alu_base_operation_o.operand_b = rf_base_rd_data_b_i;
       OpBSelImmediate:
@@ -198,27 +241,142 @@
   // Suppress write for loads when controller isn't in stall state as load data for writeback is
   // only available in the stall state.
   assign rf_base_wr_en_o =
-   insn_dec_ctrl_i.rf_we & ~(insn_dec_ctrl_i.ld_insn & (state_q != OtbnStateStall));
+   insn_dec_shared_i.rf_we                                    &
+   ~(insn_dec_shared_i.ld_insn & (state_q != OtbnStateStall)) &
+   (insn_dec_shared_i.subset == InsnSubsetBase);
 
   assign rf_base_wr_addr_o = insn_dec_base_i.d;
 
   always_comb begin
-    unique case (insn_dec_ctrl_i.rf_wdata_sel)
+    unique case (insn_dec_shared_i.rf_wdata_sel)
       RfWdSelEx:
         rf_base_wr_data_o = alu_base_operation_result_i;
       RfWdSelLsu:
         rf_base_wr_data_o = lsu_base_rdata_i;
       RfWdSelNextPc:
         rf_base_wr_data_o = {{(32-ImemAddrWidth){1'b0}}, next_insn_addr};
+      RfWdSelIspr:
+        rf_base_wr_data_o = csr_rdata;
       default:
         rf_base_wr_data_o = alu_base_operation_result_i;
     endcase
   end
 
+  assign rf_bignum_rd_addr_a_o = insn_dec_bignum_i.a;
+  assign rf_bignum_rd_addr_b_o = insn_dec_bignum_i.b;
+
+  assign alu_bignum_operation_o.operand_a = rf_bignum_rd_data_a_i;
+
+  // Base ALU Operand B MUX
+  always_comb begin
+    unique case (insn_dec_shared_i.op_b_sel)
+      OpBSelRegister:
+        alu_bignum_operation_o.operand_b = rf_bignum_rd_data_b_i;
+      OpBSelImmediate:
+        alu_bignum_operation_o.operand_b = insn_dec_bignum_i.i;
+      default:
+        alu_bignum_operation_o.operand_b = rf_bignum_rd_data_b_i;
+    endcase
+  end
+
+  assign alu_bignum_operation_o.op          = insn_dec_bignum_i.alu_op;
+  assign alu_bignum_operation_o.shift_right = insn_dec_bignum_i.shift_right;
+  assign alu_bignum_operation_o.shift_amt   = insn_dec_bignum_i.shift_amt;
+  assign alu_bignum_operation_o.flag_group  = insn_dec_bignum_i.flag_group;
+
+  // Register file write MUX
+  // Suppress write for loads when controller isn't in stall state as load data for writeback is
+  // only available in the stall state.
+  assign rf_bignum_wr_en_o =
+    {2{insn_dec_shared_i.rf_we                                    &
+       ~(insn_dec_shared_i.ld_insn & (state_q != OtbnStateStall)) &
+       (insn_dec_shared_i.subset == InsnSubsetBignum)
+    }};
+
+  assign rf_bignum_wr_addr_o = insn_dec_bignum_i.d;
+
+  always_comb begin
+    unique case (insn_dec_shared_i.rf_wdata_sel)
+      RfWdSelEx:
+        rf_bignum_wr_data_o = alu_bignum_operation_result_i;
+      RfWdSelLsu:
+        rf_bignum_wr_data_o = lsu_bignum_rdata_i;
+      RfWdSelIspr:
+        rf_bignum_wr_data_o = ispr_rdata_i;
+      default:
+        rf_bignum_wr_data_o = alu_bignum_operation_result_i;
+    endcase
+  end
+
+  // CSR/WSR/ISPR handling
+  // ISPRs (Internal Special Purpose Registers) are the internal registers. CSRs and WSRs are the
+  // ISA visible versions of those registers in the base and bignum ISAs respectively.
+
+  assign csr_addr = csr_e'(insn_dec_base_i.i[11:0]);
+
+  always_comb begin
+    ispr_addr_base      = IsprMod;
+    ispr_word_addr_base = '0;
+
+    unique case (csr_addr)
+      CsrFlags: begin
+        ispr_addr_base      = IsprFlags;
+        ispr_word_addr_base = '0;
+      end
+      CsrMod0,CsrMod1,CsrMod2,CsrMod3,CsrMod4,CsrMod5,CsrMod6,CsrMod7: begin
+        ispr_addr_base      = IsprMod;
+        ispr_word_addr_base = csr_addr[2:0];
+      end
+      CsrRnd: begin
+        ispr_addr_base      = IsprRnd;
+        ispr_word_addr_base = '0;
+      end
+      // TODO: Illegal addr handling
+      default: ;
+    endcase
+  end
+
+  for (genvar i_word = 0; i_word < BaseWordsPerWLEN; i_word++) begin : g_ispr_word_sel_base
+    assign ispr_word_sel_base[i_word] = ispr_word_addr_base == i_word;
+  end
+
+  for (genvar i_bit = 0; i_bit < 32; i_bit++) begin : g_csr_rdata_mux
+    for (genvar i_word = 0; i_word < BaseWordsPerWLEN; i_word++) begin : g_csr_rdata_mux_inner
+      assign csr_rdata_mux[i_bit][i_word] = ispr_rdata_i[i_word*32 + i_bit] & ispr_word_sel_base[i_word];
+    end
+
+    assign csr_rdata[i_bit] = |csr_rdata_mux[i_bit];
+  end
+
+  assign csr_wdata = insn_dec_shared_i.ispr_rs_insn ? csr_rdata | rf_base_rd_data_a_i : rf_base_rd_data_a_i;
+
+  assign wsr_addr = wsr_e'(insn_dec_bignum_i.i[WsrNumWidth-1:0]);
+
+  always_comb begin
+    ispr_addr_bignum = IsprMod;
+
+    unique case (wsr_addr)
+      WsrMod: ispr_addr_bignum = IsprMod;
+      WsrRnd: ispr_addr_bignum = IsprRnd;
+      WsrAcc: ispr_addr_bignum = IsprAcc;
+      default: ;
+    endcase
+  end
+
+  assign wsr_wdata = insn_dec_shared_i.ispr_rs_insn ? ispr_rdata_i | rf_bignum_rd_data_a_i : rf_bignum_rd_data_a_i;
+
+  assign ispr_wr_insn = insn_dec_shared_i.ispr_rw_insn | insn_dec_shared_i.ispr_rs_insn;
+
+  assign ispr_addr_o         = insn_dec_shared_i.subset == InsnSubsetBase ? ispr_addr_base : ispr_addr_bignum;
+  assign ispr_base_wdata_o   = csr_wdata;
+  assign ispr_base_wr_en_o   = {BaseWordsPerWLEN{(insn_dec_shared_i.subset == InsnSubsetBase) & ispr_wr_insn}} & ispr_word_sel_base;
+  assign ispr_bignum_wdata_o = wsr_wdata;
+  assign ispr_bignum_wr_en_o = (insn_dec_shared_i.subset == InsnSubsetBignum) & ispr_wr_insn;
+
   // TODO: Add error on unaligned/out of bounds
-  assign lsu_load_req_o   = insn_valid_i & insn_dec_ctrl_i.ld_insn & (state_q == OtbnStateRun);
-  assign lsu_store_req_o  = insn_valid_i & insn_dec_ctrl_i.st_insn & (state_q == OtbnStateRun);
-  assign lsu_req_subset_o = insn_dec_ctrl_i.subset;
+  assign lsu_load_req_o   = insn_valid_i & insn_dec_shared_i.ld_insn & (state_q == OtbnStateRun);
+  assign lsu_store_req_o  = insn_valid_i & insn_dec_shared_i.st_insn & (state_q == OtbnStateRun);
+  assign lsu_req_subset_o = insn_dec_shared_i.subset;
   // TODO: Switch between address from base/bignum
   assign lsu_addr_o       = alu_base_operation_result_i[DmemAddrWidth-1:0];
   assign lsu_base_wdata_o = rf_base_rd_data_b_i;
diff --git a/hw/ip/otbn/rtl/otbn_core.sv b/hw/ip/otbn/rtl/otbn_core.sv
index 2641858..a85808c 100644
--- a/hw/ip/otbn/rtl/otbn_core.sv
+++ b/hw/ip/otbn/rtl/otbn_core.sv
@@ -51,7 +51,11 @@
   // TODO: Decide what guarantees we make for random numbers on CSRs/WSRs, and how they might or
   // might not come from the same source.
   logic [WLEN-1:0] rnd;
-  assign rnd = 'd42;
+
+  // Constant for now until RNG is set up. This constant is the same in the model and must be
+  // altered there to match is altered here (the `_random_value` variable in the `RandWSR` class in
+  // dv/otbn/sim/wsr.py).
+  assign rnd = 256'h9999999999999999999999999999999999999999999999999999999999999999;
 
   // Fetch request (the next instruction)
   logic [ImemAddrWidth-1:0] insn_fetch_req_addr;
@@ -67,8 +71,8 @@
   logic                     insn_illegal;
   logic [ImemAddrWidth-1:0] insn_addr;
   insn_dec_base_t           insn_dec_base;
-
-  insn_dec_ctrl_t insn_dec_ctrl;
+  insn_dec_bignum_t         insn_dec_bignum;
+  insn_dec_shared_t         insn_dec_shared;
 
   logic [4:0]   rf_base_wr_addr;
   logic         rf_base_wr_en;
@@ -95,6 +99,24 @@
   logic [WLEN-1:0]          lsu_bignum_rdata;
   logic [1:0]               lsu_rdata_err; // Bit1: Uncorrectable, Bit0: Correctable
 
+  logic [WdrAw-1:0] rf_bignum_wr_addr;
+  logic [1:0]       rf_bignum_wr_en;
+  logic [WLEN-1:0]  rf_bignum_wr_data;
+  logic [WdrAw-1:0] rf_bignum_rd_addr_a;
+  logic [WLEN-1:0]  rf_bignum_rd_data_a;
+  logic [WdrAw-1:0] rf_bignum_rd_addr_b;
+  logic [WLEN-1:0]  rf_bignum_rd_data_b;
+
+  alu_bignum_operation_t alu_bignum_operation;
+  logic [WLEN-1:0]       alu_bignum_operation_result;
+
+  ispr_e                       ispr_addr;
+  logic [31:0]                 ispr_base_wdata;
+  logic [BaseWordsPerWLEN-1:0] ispr_base_wr_en;
+  logic [WLEN-1:0]             ispr_bignum_wdata;
+  logic                        ispr_bignum_wr_en;
+  logic [WLEN-1:0]             ispr_rdata;
+
   // Depending on its usage, the instruction address (program counter) is qualified by two valid
   // signals: insn_fetch_resp_valid (together with the undecoded instruction data), and insn_valid
   // for valid decoded (i.e. legal) instructions. Duplicate the signal in the source code for
@@ -138,10 +160,11 @@
     .insn_fetch_resp_valid_i (insn_fetch_resp_valid),
 
     // Decoded instruction
-    .insn_valid_o    (insn_valid),
-    .insn_illegal_o  (insn_illegal),
-    .insn_dec_base_o (insn_dec_base),
-    .insn_dec_ctrl_o (insn_dec_ctrl)
+    .insn_valid_o      (insn_valid),
+    .insn_illegal_o    (insn_illegal),
+    .insn_dec_base_o   (insn_dec_base),
+    .insn_dec_bignum_o (insn_dec_bignum),
+    .insn_dec_shared_o (insn_dec_shared)
   );
 
   // Controller: coordinate between functional units, prepare their inputs (e.g. by muxing between
@@ -166,8 +189,9 @@
     .insn_addr_i  (insn_addr),
 
     // Decoded instruction from decoder
-    .insn_dec_base_i (insn_dec_base),
-    .insn_dec_ctrl_i (insn_dec_ctrl),
+    .insn_dec_base_i   (insn_dec_base),
+    .insn_dec_bignum_i (insn_dec_bignum),
+    .insn_dec_shared_i (insn_dec_shared),
 
     // To/from base register file
     .rf_base_wr_addr_o   (rf_base_wr_addr),
@@ -178,12 +202,26 @@
     .rf_base_rd_addr_b_o (rf_base_rd_addr_b),
     .rf_base_rd_data_b_i (rf_base_rd_data_b),
 
+    // To/from bignunm register file
+    .rf_bignum_wr_addr_o   (rf_bignum_wr_addr),
+    .rf_bignum_wr_en_o     (rf_bignum_wr_en),
+    .rf_bignum_wr_data_o   (rf_bignum_wr_data),
+    .rf_bignum_rd_addr_a_o (rf_bignum_rd_addr_a),
+    .rf_bignum_rd_data_a_i (rf_bignum_rd_data_a),
+    .rf_bignum_rd_addr_b_o (rf_bignum_rd_addr_b),
+    .rf_bignum_rd_data_b_i (rf_bignum_rd_data_b),
+
     // To/from base ALU
     .alu_base_operation_o         (alu_base_operation),
     .alu_base_comparison_o        (alu_base_comparison),
     .alu_base_operation_result_i  (alu_base_operation_result),
     .alu_base_comparison_result_i (alu_base_comparison_result),
 
+    // To/from bignum ALU
+    .alu_bignum_operation_o         (alu_bignum_operation),
+    .alu_bignum_operation_result_i  (alu_bignum_operation_result),
+
+    // To/from LSU (base and bignum)
     .lsu_load_req_o     (lsu_load_req),
     .lsu_store_req_o    (lsu_store_req),
     .lsu_req_subset_o   (lsu_req_subset),
@@ -194,7 +232,15 @@
 
     .lsu_base_rdata_i   (lsu_base_rdata),
     .lsu_bignum_rdata_i (lsu_bignum_rdata),
-    .lsu_rdata_err_i    (lsu_rdata_err)
+    .lsu_rdata_err_i    (lsu_rdata_err),
+
+    // Isprs read/write (base and bignum)
+    .ispr_addr_o         (ispr_addr),
+    .ispr_base_wdata_o   (ispr_base_wdata),
+    .ispr_base_wr_en_o   (ispr_base_wr_en),
+    .ispr_bignum_wdata_o (ispr_bignum_wdata),
+    .ispr_bignum_wr_en_o (ispr_bignum_wr_en),
+    .ispr_rdata_i        (ispr_rdata)
   );
 
   // Load store unit: read and write data from data memory
@@ -225,18 +271,6 @@
     .lsu_rdata_err_o    (lsu_rdata_err)
   );
 
-  // Control and Status registers
-  // 32b Control and Status Registers (CSRs), and WLEN Wide Special-Purpose Registers (WSRs)
-  otbn_status_registers u_otbn_status_registers (
-    .clk_i,
-    .rst_ni,
-    .rnd_i (rnd)
-
-    // TODO: Add CSR and WSR read/write ports to controller.
-
-    // TODO: Add potential side-channel signals.
-  );
-
   // Base Instruction Subset =======================================================================
 
   // General-Purpose Register File (GPRs): 32 32b registers
@@ -263,4 +297,36 @@
     .operation_result_o  (alu_base_operation_result),
     .comparison_result_o (alu_base_comparison_result)
   );
+
+  otbn_rf_bignum u_otbn_rf_bignum (
+    .clk_i,
+    .rst_ni,
+
+    .wr_addr_i (rf_bignum_wr_addr),
+    .wr_en_i   (rf_bignum_wr_en),
+    .wr_data_i (rf_bignum_wr_data),
+
+    .rd_addr_a_i (rf_bignum_rd_addr_a),
+    .rd_data_a_o (rf_bignum_rd_data_a),
+    .rd_addr_b_i (rf_bignum_rd_addr_b),
+    .rd_data_b_o (rf_bignum_rd_data_b)
+  );
+
+  otbn_alu_bignum u_otbn_alu_bignum (
+    .clk_i,
+    .rst_ni,
+
+    .operation_i         (alu_bignum_operation),
+    .operation_result_o  (alu_bignum_operation_result),
+
+    .ispr_addr_i         (ispr_addr),
+    .ispr_base_wdata_i   (ispr_base_wdata),
+    .ispr_base_wr_en_i   (ispr_base_wr_en),
+    .ispr_bignum_wdata_i (ispr_bignum_wdata),
+    .ispr_bignum_wr_en_i (ispr_bignum_wr_en),
+    .ispr_rdata_o        (ispr_rdata),
+
+    .rnd_i               (rnd)
+  );
+
 endmodule
diff --git a/hw/ip/otbn/rtl/otbn_decoder.sv b/hw/ip/otbn/rtl/otbn_decoder.sv
index a63bb66..69c1a61 100644
--- a/hw/ip/otbn/rtl/otbn_decoder.sv
+++ b/hw/ip/otbn/rtl/otbn_decoder.sv
@@ -22,11 +22,9 @@
   output logic                 insn_valid_o,
   output logic                 insn_illegal_o,
 
-  // Decoded instruction data, matching the "Decoding" section of the specification.
   output insn_dec_base_t       insn_dec_base_o,
-
-  // Additional control signals
-  output insn_dec_ctrl_t       insn_dec_ctrl_o
+  output insn_dec_bignum_t     insn_dec_bignum_o,
+  output insn_dec_shared_t     insn_dec_shared_o
 );
 
   logic        illegal_insn;
@@ -53,8 +51,9 @@
   //////////////////////////////////////
   // Register and immediate selection //
   //////////////////////////////////////
-  imm_a_sel_base_e  imm_a_mux_sel_base; // immediate selection for operand a in base ISA
-  imm_b_sel_base_e  imm_b_mux_sel_base; // immediate selection for operand b in base ISA
+  imm_a_sel_base_e   imm_a_mux_sel_base; // immediate selection for operand a in base ISA
+  imm_b_sel_base_e   imm_b_mux_sel_base; // immediate selection for operand b in base ISA
+  shamt_sel_bignum_e shift_amt_mux_sel_bignum; // shift amount selection in bignum ISA
 
   logic [31:0] imm_i_type_base;
   logic [31:0] imm_s_type_base;
@@ -62,9 +61,10 @@
   logic [31:0] imm_u_type_base;
   logic [31:0] imm_j_type_base;
 
-  alu_op_base_e alu_operator_base;   // ALU operation selection for base ISA
-  op_a_sel_e    alu_op_a_mux_sel;    // operand a selection: reg value, PC, immediate or zero
-  op_b_sel_e    alu_op_b_mux_sel;    // operand b selection: reg value or immediate
+  alu_op_base_e   alu_operator_base;   // ALU operation selection for base ISA
+  alu_op_bignum_e alu_operator_bignum; // ALU operation selection for bignum ISA
+  op_a_sel_e      alu_op_a_mux_sel;    // operand a selection: reg value, PC, immediate or zero
+  op_b_sel_e      alu_op_b_mux_sel;    // operand b selection: reg value or immediate
 
   comparison_op_base_e comparison_operator_base;
 
@@ -78,6 +78,26 @@
   assign imm_u_type_base = { insn[31:12], 12'b0 };
   assign imm_j_type_base = { {12{insn[31]}}, insn[19:12], insn[20], insn[30:21], 1'b0 };
 
+  logic [WLEN-1:0] imm_i_type_bignum;
+
+  assign imm_i_type_bignum = {{(WLEN-10){1'b0}}, insn[29:20]};
+
+  // Shift amount for ALU instructions other than BN.RSHI
+  logic [$clog2(WLEN)-1:0] shift_amt_a_type_bignum;
+  // Shift amount for BN.RSHI
+  logic [$clog2(WLEN)-1:0] shift_amt_s_type_bignum;
+
+  assign shift_amt_a_type_bignum = {insn[29:25], 3'b0};
+  assign shift_amt_s_type_bignum = {insn[31:25], insn[14]};
+
+  logic shift_right_bignum;
+
+  assign shift_right_bignum = insn[30];
+
+  flag_group_t flag_group_bignum;
+
+  assign flag_group_bignum = insn[31];
+
   // source registers
   assign insn_rs1 = insn[19:15];
   assign insn_rs2 = insn[24:20];
@@ -93,6 +113,8 @@
   logic st_insn;
   logic branch_insn;
   logic jump_insn;
+  logic ispr_rw_insn;
+  logic ispr_rs_insn;
 
   // Reduced main ALU immediate MUX for Operand B
   logic [31:0] imm_b_base;
@@ -107,6 +129,15 @@
     endcase
   end
 
+  logic [$clog2(WLEN)-1:0] shift_amt_bignum;
+  always_comb begin
+    unique case (shift_amt_mux_sel_bignum)
+      ShamtSelBignumA: shift_amt_bignum = shift_amt_a_type_bignum;
+      ShamtSelBignumS: shift_amt_bignum = shift_amt_s_type_bignum;
+      default:      shift_amt_bignum = shift_amt_a_type_bignum;
+    endcase
+  end
+
   assign insn_valid_o   = insn_fetch_resp_valid_i & ~illegal_insn;
   assign insn_illegal_o = insn_fetch_resp_valid_i & illegal_insn;
 
@@ -119,7 +150,18 @@
     comparison_op: comparison_operator_base
   };
 
-  assign insn_dec_ctrl_o = '{
+  assign insn_dec_bignum_o = '{
+    a:           insn_rs1,
+    b:           insn_rs2,
+    d:           insn_rd,
+    i:           imm_i_type_bignum,
+    shift_amt:   shift_amt_bignum,
+    shift_right: shift_right_bignum,
+    flag_group:  flag_group_bignum,
+    alu_op:      alu_operator_bignum
+  };
+
+  assign insn_dec_shared_o = '{
     subset:        insn_subset,
     op_a_sel:      alu_op_a_mux_sel,
     op_b_sel:      alu_op_b_mux_sel,
@@ -129,7 +171,9 @@
     ld_insn:       ld_insn,
     st_insn:       st_insn,
     branch_insn:   branch_insn,
-    jump_insn:     jump_insn
+    jump_insn:     jump_insn,
+    ispr_rw_insn:  ispr_rw_insn,
+    ispr_rs_insn:  ispr_rs_insn
   };
 
   /////////////
@@ -148,13 +192,15 @@
     st_insn               = 1'b0;
     branch_insn           = 1'b0;
     jump_insn             = 1'b0;
+    ispr_rw_insn          = 1'b0;
+    ispr_rs_insn          = 1'b0;
 
     opcode                = insn_opcode_e'(insn[6:0]);
 
     unique case (opcode)
-      /////////
-      // ALU //
-      /////////
+      //////////////
+      // Base ALU //
+      //////////////
 
       InsnOpcodeBaseLui: begin  // Load Upper Immediate
         insn_subset      = InsnSubsetBase;
@@ -221,9 +267,9 @@
         end
       end
 
-      //////////////////
-      // Loads/Stores //
-      //////////////////
+      ///////////////////////
+      // Base Loads/Stores //
+      ///////////////////////
 
       InsnOpcodeBaseLoad: begin
         insn_subset  = InsnSubsetBase;
@@ -248,9 +294,9 @@
         end
       end
 
-      /////////////////
-      // Branch/Jump //
-      /////////////////
+      //////////////////////
+      // Base Branch/Jump //
+      //////////////////////
 
       InsnOpcodeBaseBranch: begin
         insn_subset = InsnSubsetBase;
@@ -283,9 +329,9 @@
         end
       end
 
-      /////////////
-      // Special //
-      /////////////
+      //////////////////
+      // Base Special //
+      //////////////////
 
       InsnOpcodeBaseSystem: begin
         insn_subset = InsnSubsetBase;
@@ -304,6 +350,80 @@
             illegal_insn = 1'b1;
           end
         end else begin
+          rf_we        = 1'b1;
+          rf_wdata_sel = RfWdSelIspr;
+          rf_ren_a     = 1'b1;
+
+          if (insn[14:12] == 3'b001) begin
+            ispr_rw_insn = 1'b1;
+          end else if(insn[14:12] == 3'b010) begin
+            ispr_rs_insn = 1'b1;
+          end else begin
+            illegal_insn = 1'b1;
+          end
+        end
+      end
+
+      ////////////////
+      // Bignum ALU //
+      ////////////////
+
+      InsnOpcodeBignumArith: begin
+        insn_subset = InsnSubsetBignum;
+        rf_we       = 1'b1;
+        rf_ren_a    = 1'b1;
+
+        if (insn[14:12] != 3'b100) begin
+          // All Alu instructions other than BN.ADDI/BN.SUBI
+          rf_ren_b = 1'b1;
+        end
+
+        unique case(insn[14:12])
+          3'b110,
+          3'b111: illegal_insn = 1'b1;
+          default: ;
+        endcase
+      end
+
+      ////////////////////////
+      // Bignum Right Shift //
+      ////////////////////////
+
+      InsnOpcodeBignumShiftLogical: begin
+        insn_subset = InsnSubsetBignum;
+        rf_we       = 1'b1;
+        rf_ren_a    = 1'b1;
+
+        // BN.NOT doesn't read register B
+        if (insn[14:12] != 3'b101) begin
+          rf_ren_b = 1'b1;
+        end
+
+        unique case(insn[14:12])
+          3'b000,
+          3'b001: illegal_insn = 1'b1;
+          default: ;
+        endcase
+      end
+
+      ////////////////////////////
+      // Bignum WSR/LID/SID/MOV //
+      ////////////////////////////
+
+      InsnOpcodeBignumMisc: begin
+        insn_subset = InsnSubsetBignum;
+
+        if (insn[14:12] == 3'b111) begin //BN.WSRRS/BN.WSRRW
+          rf_we         = 1'b1;
+          rf_ren_a      = 1'b1;
+          rf_wdata_sel  = RfWdSelIspr;
+
+          if (insn[31]) begin
+            ispr_rw_insn = 1'b1;
+          end else begin
+            ispr_rs_insn = 1'b1;
+          end
+        end else begin
           illegal_insn = 1'b1;
         end
       end
@@ -326,19 +446,21 @@
 
   always_comb begin
     alu_operator_base        = AluOpBaseAdd;
+    alu_operator_bignum      = AluOpBignumAdd;
     comparison_operator_base = ComparisonOpBaseEq;
     alu_op_a_mux_sel         = OpASelRegister;
     alu_op_b_mux_sel         = OpBSelImmediate;
 
     imm_a_mux_sel_base       = ImmBaseAZero;
     imm_b_mux_sel_base       = ImmBaseBI;
+    shift_amt_mux_sel_bignum = ShamtSelBignumA;
 
     opcode_alu               = insn_opcode_e'(insn_alu[6:0]);
 
     unique case (opcode_alu)
-      /////////
-      // ALU //
-      /////////
+      //////////////
+      // Base ALU //
+      //////////////
 
       InsnOpcodeBaseLui: begin  // Load Upper Immediate
         alu_op_a_mux_sel   = OpASelZero;
@@ -402,9 +524,9 @@
         end
       end
 
-      //////////////////
-      // Loads/Stores //
-      //////////////////
+      ///////////////////////
+      // Base Loads/Stores //
+      ///////////////////////
 
       InsnOpcodeBaseLoad: begin
         alu_op_a_mux_sel   = OpASelRegister;
@@ -420,9 +542,9 @@
         imm_b_mux_sel_base = ImmBaseBS;
       end
 
-      /////////////////
-      // Branch/Jump //
-      /////////////////
+      //////////////////////
+      // Base Branch/Jump //
+      //////////////////////
 
       InsnOpcodeBaseBranch: begin
         alu_op_a_mux_sel         = OpASelCurrPc;
@@ -446,19 +568,87 @@
         imm_b_mux_sel_base = ImmBaseBI;
       end
 
-      /////////////
-      // Special //
-      /////////////
+      //////////////////
+      // Base Special //
+      //////////////////
 
       InsnOpcodeBaseSystem: begin
-        if (insn_alu[14:12] == 3'b000) begin
-          // non CSR related SYSTEM instructions
-          alu_op_a_mux_sel = OpASelRegister;
-          alu_op_b_mux_sel = OpBSelImmediate;
-        end
-
+        // The only instructions with System opcode that care about operands are CSR access
+        alu_op_a_mux_sel   = OpASelRegister;
+        imm_b_mux_sel_base = ImmBaseBI;
       end
       default: ;
+
+      ////////////////
+      // Bignum ALU //
+      ////////////////
+
+      InsnOpcodeBignumArith: begin
+        alu_op_a_mux_sel         = OpASelRegister;
+        shift_amt_mux_sel_bignum = ShamtSelBignumA;
+
+        unique case(insn_alu[14:12])
+          3'b000: alu_operator_bignum = AluOpBignumAdd;
+          3'b001: alu_operator_bignum = AluOpBignumSub;
+          3'b010: alu_operator_bignum = AluOpBignumAddc;
+          3'b011: alu_operator_bignum = AluOpBignumSubb;
+          3'b100: begin
+            if (insn_alu[30]) begin
+              alu_operator_bignum = AluOpBignumSub;
+            end else begin
+              alu_operator_bignum = AluOpBignumAdd;
+            end
+          end
+          3'b101: begin
+            if (insn_alu[30]) begin
+              alu_operator_bignum = AluOpBignumSubm;
+            end else begin
+              alu_operator_bignum = AluOpBignumAddm;
+            end
+          end
+          default: ;
+        endcase
+
+        if (insn_alu[14:12] != 3'b100) begin
+          alu_op_b_mux_sel = OpBSelRegister;
+        end else begin
+          alu_op_b_mux_sel = OpBSelImmediate;
+        end
+      end
+
+      /////////////////
+      // Bignum RSHI //
+      /////////////////
+
+      InsnOpcodeBignumShiftLogical: begin
+        alu_op_a_mux_sel = OpASelRegister;
+        alu_op_b_mux_sel = OpBSelRegister;
+
+        unique case(insn_alu[14:12])
+          3'b010: begin
+            shift_amt_mux_sel_bignum = ShamtSelBignumA;
+            alu_operator_bignum      = AluOpBignumAnd;
+          end
+          3'b100: begin
+            shift_amt_mux_sel_bignum = ShamtSelBignumA;
+            alu_operator_bignum      = AluOpBignumOr;
+          end
+          3'b101: begin
+            shift_amt_mux_sel_bignum = ShamtSelBignumA;
+            alu_operator_bignum      = AluOpBignumNot;
+          end
+          3'b110: begin
+            shift_amt_mux_sel_bignum = ShamtSelBignumA;
+            alu_operator_bignum      = AluOpBignumXor;
+          end
+          3'b011,
+          3'b111: begin
+            shift_amt_mux_sel_bignum = ShamtSelBignumS;
+            alu_operator_bignum      = AluOpBignumRshi;
+          end
+          default: ;
+        endcase
+      end
     endcase
   end
 
diff --git a/hw/ip/otbn/rtl/otbn_pkg.sv b/hw/ip/otbn/rtl/otbn_pkg.sv
index 98e2cae..d92b8b9 100644
--- a/hw/ip/otbn/rtl/otbn_pkg.sv
+++ b/hw/ip/otbn/rtl/otbn_pkg.sv
@@ -11,6 +11,9 @@
   // Data path width for BN (wide) instructions, in bits.
   parameter int WLEN = 256;
 
+  // Number of 32-bit words per WLEN
+  parameter int BaseWordsPerWLEN = WLEN / 32;
+
   // Number of flag groups
   parameter int NFlagGroups = 2;
 
@@ -49,17 +52,21 @@
   // Opcodes (field [6:0] in the instruction), matching the RISC-V specification for the base
   // instruction subset.
   typedef enum logic [6:0] {
-    InsnOpcodeBaseLoad     = 7'h03,
-    InsnOpcodeBaseMemMisc  = 7'h0f,
-    InsnOpcodeBaseOpImm    = 7'h13,
-    InsnOpcodeBaseAuipc    = 7'h17,
-    InsnOpcodeBaseStore    = 7'h23,
-    InsnOpcodeBaseOp       = 7'h33,
-    InsnOpcodeBaseLui      = 7'h37,
-    InsnOpcodeBaseBranch   = 7'h63,
-    InsnOpcodeBaseJalr     = 7'h67,
-    InsnOpcodeBaseJal      = 7'h6f,
-    InsnOpcodeBaseSystem   = 7'h73
+    InsnOpcodeBaseLoad           = 7'h03,
+    InsnOpcodeBaseMemMisc        = 7'h0f,
+    InsnOpcodeBaseOpImm          = 7'h13,
+    InsnOpcodeBaseAuipc          = 7'h17,
+    InsnOpcodeBaseStore          = 7'h23,
+    InsnOpcodeBaseOp             = 7'h33,
+    InsnOpcodeBaseLui            = 7'h37,
+    InsnOpcodeBaseBranch         = 7'h63,
+    InsnOpcodeBaseJalr           = 7'h67,
+    InsnOpcodeBaseJal            = 7'h6f,
+    InsnOpcodeBaseSystem         = 7'h73,
+    InsnOpcodeBignumMisc         = 7'h0B,
+    InsnOpcodeBignumArith        = 7'h2B,
+    InsnOpcodeBignumMulqacc      = 7'h3B,
+    InsnOpcodeBignumShiftLogical = 7'h7B
   } insn_opcode_e;
 
   typedef enum logic [3:0] {
@@ -76,6 +83,24 @@
     AluOpBaseSll
   } alu_op_base_e;
 
+  // TODO: Can we arrange this to simplify decoding logic?
+  typedef enum logic [3:0] {
+    AluOpBignumAdd,
+    AluOpBignumAddc,
+    AluOpBignumAddm,
+
+    AluOpBignumSub,
+    AluOpBignumSubb,
+    AluOpBignumSubm,
+
+    AluOpBignumRshi,
+
+    AluOpBignumXor,
+    AluOpBignumOr,
+    AluOpBignumAnd,
+    AluOpBignumNot
+  } alu_op_bignum_e;
+
   typedef enum logic {
     ComparisonOpBaseEq,
     ComparisonOpBaseNeq
@@ -95,7 +120,6 @@
     OpBSelImmediate = 1'b1
   } op_b_sel_e;
 
-
   // Immediate a selection for base ISA
   typedef enum logic {
     ImmBaseAZero
@@ -110,11 +134,18 @@
     ImmBaseBJ
   } imm_b_sel_base_e;
 
+  // Shift amount select for bignum ISA
+  typedef enum logic {
+    ShamtSelBignumA,
+    ShamtSelBignumS
+  } shamt_sel_bignum_e;
+
   // Regfile write data selection
   typedef enum logic [1:0] {
     RfWdSelEx,
     RfWdSelNextPc,
-    RfWdSelLsu
+    RfWdSelLsu,
+    RfWdSelIspr
   } rf_wd_sel_e;
 
   // Control and Status Registers (CSRs)
@@ -127,7 +158,7 @@
     CsrMod3  = 12'h7D3,
     CsrMod4  = 12'h7D4,
     CsrMod5  = 12'h7D5,
-    CsrMod6  = 12'hdD6,
+    CsrMod6  = 12'h7D6,
     CsrMod7  = 12'h7D7,
     CsrRnd   = 12'hFC0
   } csr_e;
@@ -136,29 +167,48 @@
   parameter int NWsr = 3; // Number of WSRs
   parameter int WsrNumWidth = $clog2(NWsr);
   typedef enum logic [WsrNumWidth-1:0] {
-    WsrMod = 'd0,
-    WsrRnd = 'd1,
-    WsrAcc = 'd2
+    WsrMod   = 'd0,
+    WsrRnd   = 'd1,
+    WsrAcc   = 'd2
   } wsr_e;
+
+  // Internal Special Purpose Registers (ISPRs)
+  // CSRs and WSRs have some overlap into what they map into. ISPRs are the actual registers in the
+  // design which CSRs and WSRs are mapped on to.
+  parameter int NIspr = NWsr + 1;
+  parameter int IsprNumWidth = $clog2(NIspr);
+  typedef enum logic [IsprNumWidth-1:0] {
+    IsprMod   = 'd0,
+    IsprRnd   = 'd1,
+    IsprAcc   = 'd2,
+    IsprFlags = 'd3
+  } ispr_e;
+
+  typedef logic [$clog2(NFlagGroups)-1:0] flag_group_t;
+
+  typedef struct packed {
+    logic Z;
+    logic M;
+    logic L;
+    logic C;
+  } flags_t;
+
+  localparam int FlagsWidth = $bits(flags_t);
+
   // TODO: Figure out how to add assertions for the enum type width; initial blocks, as produced by
   // ASSERT_INIT, aren't allowed in packages.
   //`ASSERT_INIT(WsrESizeMatchesParameter_A, $bits(wsr_e) == WsrNumWidth)
 
-  // Decoded instruction components, with signals matching the "Decoding" section of the
-  // specification.
+  // Structures for decoded instructions, grouped into three:
+  // - insn_dec_shared_t - Anything that applies to both bignum and base ISAs, all fields valid when
+  // instruction is valid.
+  // - insn_dec_base_t - Anything that only applies to base ISA, fields only valid when `subset` in
+  // `insn_dec_shared_t` indicates a base ISA instruction.
+  // - insn_dec_bignum_t - Anything that only applies to bignum ISA, fields only valid when `subset` in
+  // `insn_dec_shared_t` indicates a bignum ISA instruction.
+  //
   // TODO: The variable names are rather short, especially "i" is confusing. Think about renaming.
-
-  typedef struct packed {
-    logic [4:0]     d;  // Destination register
-    logic [4:0]     a;  // First source register
-    logic [4:0]     b;  // Second source register
-    logic [31:0]    i;  // Immediate
-    alu_op_base_e        alu_op;
-    comparison_op_base_e comparison_op;
-  } insn_dec_base_t;
-
-  // Control signals from decoder to controller: additional information about the decoded
-  // instruction influencing the operation.
+  //
   typedef struct packed {
     insn_subset_e   subset;
     op_a_sel_e      op_a_sel;
@@ -170,7 +220,32 @@
     logic           st_insn;
     logic           branch_insn;
     logic           jump_insn;
-  } insn_dec_ctrl_t;
+    logic           ispr_rw_insn;
+    logic           ispr_rs_insn;
+  } insn_dec_shared_t;
+
+  typedef struct packed {
+    logic [4:0]          d;             // Destination register
+    logic [4:0]          a;             // First source register
+    logic [4:0]          b;             // Second source register
+    logic [31:0]         i;             // Immediate
+    alu_op_base_e        alu_op;
+    comparison_op_base_e comparison_op;
+  } insn_dec_base_t;
+
+  typedef struct packed {
+    logic [WdrAw-1:0]        d;           // Destination register
+    logic [WdrAw-1:0]        a;           // First source register
+    logic [WdrAw-1:0]        b;           // Second source register
+    logic [WLEN-1:0]         i;           // Immediate
+
+    // Shifting only applies to a subset of ALU operations
+    logic [$clog2(WLEN)-1:0] shift_amt;   // Shift amount
+    logic                    shift_right; // Shift right if set otherwise left
+
+    flag_group_t             flag_group;
+    alu_op_bignum_e          alu_op;
+  } insn_dec_bignum_t;
 
   typedef struct packed {
     alu_op_base_e     op;
@@ -184,4 +259,13 @@
     logic [31:0] operand_b;
   } alu_base_comparison_t;
 
+  typedef struct packed {
+    alu_op_bignum_e op;
+    logic [WLEN-1:0]         operand_a;
+    logic [WLEN-1:0]         operand_b;
+    logic                    shift_right;
+    logic [$clog2(WLEN)-1:0] shift_amt;
+    flag_group_t             flag_group;
+  } alu_bignum_operation_t;
+
 endpackage
diff --git a/hw/ip/otbn/rtl/otbn_rf_bignum.sv b/hw/ip/otbn/rtl/otbn_rf_bignum.sv
new file mode 100644
index 0000000..04f813b
--- /dev/null
+++ b/hw/ip/otbn/rtl/otbn_rf_bignum.sv
@@ -0,0 +1,55 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * WLEN (256b) Wide Register File (WDRs)
+ *
+ * Features:
+ * - 2 read ports
+ * - 1 write port
+ * - Half (WLEN) word write enables
+ */
+module otbn_rf_bignum
+  import otbn_pkg::*;
+(
+  input  logic             clk_i,
+  input  logic             rst_ni,
+
+  input  logic [WdrAw-1:0] wr_addr_i,
+  input  logic [1:0]       wr_en_i,
+  input  logic [WLEN-1:0]  wr_data_i,
+
+  input  logic [WdrAw-1:0] rd_addr_a_i,
+  output logic [WLEN-1:0]  rd_data_a_o,
+
+  input  logic [WdrAw-1:0] rd_addr_b_i,
+  output logic [WLEN-1:0]  rd_data_b_o
+);
+  logic [WLEN-1:0] rf [NWdr];
+  logic [1:0]      we_onehot [NWdr];
+
+  for (genvar i = 0;i < NWdr; i++) begin : g_rf
+    assign we_onehot[i] = wr_en_i & {2{wr_addr_i == i}};
+
+    // Split registers into halves for clear seperation for the enable terms
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        rf[i][0+:WLEN/2] <= '0;
+      end else if (we_onehot[i][0]) begin
+        rf[i][0+:WLEN/2] <= wr_data_i[0+:WLEN/2];
+      end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        rf[i][WLEN/2+:WLEN/2] <= '0;
+      end else if (we_onehot[i][1]) begin
+        rf[i][WLEN/2+:WLEN/2] <= wr_data_i[WLEN/2+:WLEN/2];
+      end
+    end
+  end
+
+  assign rd_data_a_o = rf[rd_addr_a_i];
+  assign rd_data_b_o = rf[rd_addr_b_i];
+endmodule
diff --git a/hw/ip/otbn/rtl/otbn_status_registers.sv b/hw/ip/otbn/rtl/otbn_status_registers.sv
deleted file mode 100644
index 85908bb..0000000
--- a/hw/ip/otbn/rtl/otbn_status_registers.sv
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright lowRISC contributors.
-// Licensed under the Apache License, Version 2.0, see LICENSE for details.
-// SPDX-License-Identifier: Apache-2.0
-
-/**
- * OTBN Special Purpose Registers: 32b CSRs, and WLEN WSRs
- */
-module otbn_status_registers
-  import otbn_pkg::*;
-(
-  input logic             clk_i,
-  input logic             rst_ni,
-
-  input [WLEN-1:0]        rnd_i
-);
-
-endmodule