[otbn] Add separate FG0 & FG1 CSRs

Fixes #3693

Signed-off-by: Greg Chadwick <gac@lowrisc.org>
diff --git a/hw/ip/otbn/doc/_index.md b/hw/ip/otbn/doc/_index.md
index 595eafb..10205c5 100644
--- a/hw/ip/otbn/doc/_index.md
+++ b/hw/ip/otbn/doc/_index.md
@@ -124,9 +124,52 @@
       <td>0x7C0</td>
       <td>RW</td>
       <td>
+        <strong>FG0</strong>.
+        Wide arithmetic flag group 0.
+        This CSR provides access to flag group 0 used by wide integer arithmetic.
+        <strong>FLAGS</strong>, <strong>FG0</strong> and <strong>FG1</strong> provide different views on the same underlying bits.
+        <table>
+          <thead>
+            <tr><th>Bit</th><th>Description</th></tr>
+          </thead>
+          <tbody>
+            <tr><td>0</td><td>Carry of Flag Group 0</td></tr>
+            <tr><td>1</td><td>MSb of Flag Group 0</td></tr>
+            <tr><td>2</td><td>LSb of Flag Group 0</td></tr>
+            <tr><td>3</td><td>Zero of Flag Group 0</td></tr>
+          </tbody>
+        </table>
+      </td>
+    </tr>
+    <tr>
+      <td>0x7C1</td>
+      <td>RW</td>
+      <td>
+        <strong>FG1</strong>.
+        Wide arithmetic flag group 1.
+        This CSR provides access to flag group 1 used by wide integer arithmetic.
+        <strong>FLAGS</strong>, <strong>FG0</strong> and <strong>FG1</strong> provide different views on the same underlying bits.
+        <table>
+          <thead>
+            <tr><th>Bit</th><th>Description</th></tr>
+          </thead>
+          <tbody>
+            <tr><td>0</td><td>Carry of Flag Group 1</td></tr>
+            <tr><td>1</td><td>MSb of Flag Group 1</td></tr>
+            <tr><td>2</td><td>LSb of Flag Group 1</td></tr>
+            <tr><td>3</td><td>Zero of Flag Group 1</td></tr>
+          </tbody>
+        </table>
+      </td>
+    </tr>
+    <tr>
+      <td>0x7C8</td>
+      <td>RW</td>
+      <td>
         <strong>FLAGS</strong>.
-        Wide arithmetic flags.
-        This CSR provides access to the flags used in wide integer arithmetic.
+        Wide arithmetic flag groups.
+        This CSR provides access to both flags groups used by wide integer arithmetic.
+        <strong>FLAGS</strong>, <strong>FG0</strong> and <strong>FG1</strong> provide different views on the same underlying bits.
         <table>
           <thead>
             <tr><th>Bit</th><th>Description</th></tr>
diff --git a/hw/ip/otbn/dv/otbnsim/sim/csr.py b/hw/ip/otbn/dv/otbnsim/sim/csr.py
index 4efaa6c..9ab87ad 100644
--- a/hw/ip/otbn/dv/otbnsim/sim/csr.py
+++ b/hw/ip/otbn/dv/otbnsim/sim/csr.py
@@ -2,10 +2,6 @@
 # Licensed under the Apache License, Version 2.0, see LICENSE for details.
 # SPDX-License-Identifier: Apache-2.0
 
-from typing import List
-
-from riscvmodel.types import Trace  # type: ignore
-
 from .flags import FlagGroups
 from .wsr import WSRFile
 
@@ -15,17 +11,32 @@
     def __init__(self) -> None:
         self.flags = FlagGroups()
 
+    @staticmethod
+    def _get_field(field_idx: int, field_size: int, val: int) -> int:
+        mask = (1 << field_size) - 1
+        return (val >> (field_size * field_idx)) & mask
+
+    @staticmethod
+    def _set_field(field_idx: int, field_size: int, field_val: int,
+                   old_val: int) -> int:
+        mask = (1 << field_size) - 1
+        shift = field_size * field_idx
+        return (old_val & ~(mask << shift)) | (field_val << shift)
+
     def read_unsigned(self, wsrs: WSRFile, idx: int) -> int:
-        if idx == 0x7c0:
+        if 0x7c0 <= idx <= 0x7c1:
+            # FG0/FG1
+            fg = idx - 0x7c0
+            return self._get_field(fg, 4, self.flags.read_unsigned())
+
+        if idx == 0x7c8:
             # FLAGS register
             return self.flags.read_unsigned()
 
         if 0x7d0 <= idx <= 0x7d7:
             # MOD0 .. MOD7. MODi is bits [32*(i+1)-1..32*i]
-            i = idx - 0x7d0
-            mod_val = wsrs.MOD.read_unsigned()
-            mask32 = (1 << 32) - 1
-            return (mod_val >> (32 * i)) & mask32
+            mod_n = idx - 0x7d0
+            return self._get_field(mod_n, 32, wsrs.MOD.read_unsigned())
 
         if idx == 0xfc0:
             # RND register
@@ -36,19 +47,23 @@
     def write_unsigned(self, wsrs: WSRFile, idx: int, value: int) -> None:
         assert 0 <= value < (1 << 32)
 
-        if idx == 0x7c0:
+        if 0x7c0 <= idx <= 0x7c1:
+            # FG0/FG1
+            fg = idx - 0x7c0
+            old = self.flags.read_unsigned()
+            self.flags.write_unsigned(self._set_field(fg, 4, value, old))
+            return
+
+        if idx == 0x7c8:
             # FLAGS register
             self.flags.write_unsigned(value)
             return
 
         if 0x7d0 <= idx <= 0x7d7:
             # MOD0 .. MOD7. MODi is bits [32*(i+1)-1..32*i]. read,modify,write.
-            i = idx - 0x7d0
-            old_val = wsrs.MOD.read_unsigned()
-            shifted_mask = ((1 << 32) - 1) << (32 * i)
-            cleared = old_val & ~shifted_mask
-            new_val = cleared | (value << (32 * i))
-            wsrs.MOD.write_unsigned(new_val)
+            mod_n = idx - 0x7d0
+            old = wsrs.MOD.read_unsigned()
+            wsrs.MOD.write_unsigned(self._set_field(mod_n, 32, value, old))
             return
 
         if idx == 0xfc0:
diff --git a/hw/ip/otbn/dv/smoke/smoke_test.s b/hw/ip/otbn/dv/smoke/smoke_test.s
index 94bd991..f984d6f 100644
--- a/hw/ip/otbn/dv/smoke/smoke_test.s
+++ b/hw/ip/otbn/dv/smoke/smoke_test.s
@@ -182,7 +182,7 @@
 bn.subb w17, w3, w4, FG1
 
 # x24 = {fg1, fg0} = 0x55
-csrrs x24, 0x7c0, x0
+csrrs x24, 0x7c8, x0
 
 # w18 = w1 + (w2 << 17B) = 0x1296659f_bbc28370_23634ee9_22168a4e_c79af825_69be586e_9866bb3b_53769ada
 bn.add w18, w1, w2 << 17B
diff --git a/hw/ip/otbn/rtl/otbn_controller.sv b/hw/ip/otbn/rtl/otbn_controller.sv
index 51a029c..e93e3cd 100644
--- a/hw/ip/otbn/rtl/otbn_controller.sv
+++ b/hw/ip/otbn/rtl/otbn_controller.sv
@@ -112,8 +112,10 @@
   logic [ImemAddrWidth-1:0] next_insn_addr;
 
   csr_e                                csr_addr;
+  logic [31:0]                         csr_rdata_raw;
   logic [31:0]                         csr_rdata;
   logic [BaseWordsPerWLEN-1:0]         csr_rdata_mux [32];
+  logic [31:0]                         csr_wdata_raw;
   logic [31:0]                         csr_wdata;
 
   wsr_e                                wsr_addr;
@@ -459,7 +461,7 @@
     ispr_word_addr_base = '0;
 
     unique case (csr_addr)
-      CsrFlags: begin
+      CsrFlags, CsrFg0, CsrFg1 : begin
         ispr_addr_base      = IsprFlags;
         ispr_word_addr_base = '0;
       end
@@ -485,10 +487,34 @@
       assign csr_rdata_mux[i_bit][i_word] = ispr_rdata_i[i_word*32 + i_bit] & ispr_word_sel_base[i_word];
     end
 
-    assign csr_rdata[i_bit] = |csr_rdata_mux[i_bit];
+    assign csr_rdata_raw[i_bit] = |csr_rdata_mux[i_bit];
   end
 
-  assign csr_wdata = insn_dec_shared_i.ispr_rs_insn ? csr_rdata | rf_base_rd_data_a_i : rf_base_rd_data_a_i;
+  // Specialised read data handling for CSR reads where raw read data needs modification.
+  always_comb begin
+    csr_rdata = csr_rdata_raw;
+
+    unique case(csr_addr)
+      // For FG0/FG1 select out appropriate bits from FLAGS ISPR and pad the rest with zeros.
+      CsrFg0: csr_rdata = {28'b0, csr_rdata_raw[3:0]};
+      CsrFg1: csr_rdata = {28'b0, csr_rdata_raw[7:4]};
+      default: ;
+    endcase
+  end
+
+  assign csr_wdata_raw = insn_dec_shared_i.ispr_rs_insn ? csr_rdata | rf_base_rd_data_a_i : rf_base_rd_data_a_i;
+
+  // Specialised write data handling for CSR writes where raw write data needs modification.
+  always_comb begin
+    csr_wdata = csr_wdata_raw;
+
+    unique case(csr_addr)
+      // For FG0/FG1 only modify relevant part of FLAGS ISPR.
+      CsrFg0: csr_wdata = {24'b0, csr_rdata_raw[7:4], csr_wdata_raw[3:0]};
+      CsrFg1: csr_wdata = {24'b0, csr_wdata_raw[3:0], csr_rdata_raw[3:0]};
+      default: ;
+    endcase
+  end
 
   assign wsr_addr = wsr_e'(insn_dec_bignum_i.i[WsrNumWidth-1:0]);
 
diff --git a/hw/ip/otbn/rtl/otbn_pkg.sv b/hw/ip/otbn/rtl/otbn_pkg.sv
index 83fecc6..cb9713d 100644
--- a/hw/ip/otbn/rtl/otbn_pkg.sv
+++ b/hw/ip/otbn/rtl/otbn_pkg.sv
@@ -160,7 +160,9 @@
   // Control and Status Registers (CSRs)
   parameter int CsrNumWidth = 12;
   typedef enum logic [CsrNumWidth-1:0] {
-    CsrFlags = 12'h7C0,
+    CsrFg0   = 12'h7C0,
+    CsrFg1   = 12'h7C1,
+    CsrFlags = 12'h7C8,
     CsrMod0  = 12'h7D0,
     CsrMod1  = 12'h7D1,
     CsrMod2  = 12'h7D2,