fix(spi_device): Missed Command Upload

This commit fixes one of the issue #11871

Problem:

    SW read incorrect opcode for Page Program.

Analysis
--------

The Page Program (0x02) opcode is adjacent to other commands that
SPI_DEVICE processes. So, the Datapath is fixed when the exact last beat
of the command opcode arrives. It is the second half of the 7th SCK.

The command parser activates Upload module at that time. However, the
top module switches the SRAM mux to the Upload module when the datapath
is latched by inverted SCK, which is right after the 8th posedge of SCK.

The Upload module, however, pushes the CMDFIFO entry (0x02 Page Program)
prior to 8th posedge of SCK to safely update the FIFO content as SCK
may not have 9th posedge if the SPI command is opcode only commands.
Examples are CHIP ERASE (0xC7), SECTOR ERASE, etc.

Resolution
----------

This commit revises the Mux design. Rather than using
`cmd_dp_sel_outclk`, it uses `cmd_dp_sel` (SCK clock) directly for the
SRAM mux. Other signals must be in inverted SCK clock domain to match
the output data phase.

Signed-off-by: Eunchan Kim <eunchan@opentitan.org>
diff --git a/hw/ip/spi_device/rtl/spi_device.sv b/hw/ip/spi_device/rtl/spi_device.sv
index 1324f26..806167f 100644
--- a/hw/ip/spi_device/rtl/spi_device.sv
+++ b/hw/ip/spi_device/rtl/spi_device.sv
@@ -121,6 +121,8 @@
 
 
   // Submoule SRAM Requests
+  sram_l2m_t flash_sram_l2m;
+  sram_m2l_t flash_sram_m2l;
   sram_l2m_t sub_sram_l2m [IoModeEnd];
   sram_m2l_t sub_sram_m2l [IoModeEnd];
 
@@ -1023,6 +1025,39 @@
     else            cmd_dp_sel_outclk <= cmd_dp_sel;
   end
 
+  // SCK clock domain MUX for SRAM access for Flash and Passthrough
+  always_comb begin
+    flash_sram_l2m = '{ default: '0 };
+
+    for (int unsigned i = IoModeCmdParse ; i < IoModeEnd ; i++) begin
+      sub_sram_m2l[i] = '{
+        rvalid: 1'b 0,
+        rdata: '0,
+        rerror: '{uncorr: 1'b 0, corr: 1'b 0}
+      };
+    end
+
+    unique case (cmd_dp_sel)
+      DpReadCmd, DpReadSFDP: begin
+        // SRAM:: Remember this has glitch
+        // switch should happen only when clock gate is disabled.
+        flash_sram_l2m = sub_sram_l2m[IoModeReadCmd];
+        sub_sram_m2l[IoModeReadCmd] = flash_sram_m2l;
+      end
+
+      DpUpload: begin
+        flash_sram_l2m = sub_sram_l2m[IoModeUpload];
+        sub_sram_m2l[IoModeUpload] = flash_sram_m2l;
+      end
+
+      default: begin
+        // DpNone, DpReadStatus, DpReadJEDEC
+        flash_sram_l2m = '{default: '0 };
+      end
+    endcase
+  end
+
+  // inverted SCK clock domain MUX for IO Mode and P2S
   always_comb begin
     io_mode = SingleIO;
     p2s_valid = 1'b 0;
@@ -1030,13 +1065,18 @@
     sub_p2s_sent = '{default: 1'b 0};
 
     mem_b_l2m = '{ default: '0 };
-    for (int unsigned i = 0 ; i < IoModeEnd ; i++) begin
-      sub_sram_m2l[i] = '{
-        rvalid: 1'b 0,
-        rdata: '0,
-        rerror: '{uncorr: 1'b 0, corr: 1'b 0}
-      };
-    end
+
+    sub_sram_m2l[IoModeFw] = '{
+      rvalid: 1'b 0,
+      rdata: '0,
+      rerror: '{uncorr: 1'b 0, corr: 1'b 0}
+    };
+
+    flash_sram_m2l = '{
+      rvalid: 1'b 0,
+      rdata: '0,
+      rerror: '{uncorr: 1'b 0, corr: 1'b 0}
+    };
 
     unique case (spi_mode)
       FwMode: begin
@@ -1053,13 +1093,16 @@
       end
 
       FlashMode, PassThrough: begin
+        // SRAM comb logic is in SCK clock domain
+        mem_b_l2m = flash_sram_l2m;
+        flash_sram_m2l = mem_b_m2l;
+
         unique case (cmd_dp_sel_outclk)
           DpNone: begin
             io_mode = sub_iomode[IoModeCmdParse];
 
             sub_p2s_sent[IoModeCmdParse] = p2s_sent;
 
-            // Leave SRAM default;
           end
           DpReadCmd, DpReadSFDP: begin
             io_mode = sub_iomode[IoModeReadCmd];
@@ -1067,11 +1110,6 @@
             p2s_valid = sub_p2s_valid[IoModeReadCmd];
             p2s_data  = sub_p2s_data[IoModeReadCmd];
             sub_p2s_sent[IoModeReadCmd] = p2s_sent;
-
-            // SRAM:: Remember this has glitch
-            // switch should happen only when clock gate is disabled.
-            mem_b_l2m = sub_sram_l2m[IoModeReadCmd];
-            sub_sram_m2l[IoModeReadCmd] = mem_b_m2l;
           end
           DpReadStatus: begin
             io_mode = sub_iomode[IoModeStatus];
@@ -1080,7 +1118,6 @@
             p2s_data  = sub_p2s_data[IoModeStatus];
             sub_p2s_sent[IoModeStatus] = p2s_sent;
 
-            // default memory (tied)
           end
 
           DpReadJEDEC: begin
@@ -1097,9 +1134,6 @@
             p2s_valid = sub_p2s_valid[IoModeUpload];
             p2s_data  = sub_p2s_data[IoModeUpload];
             sub_p2s_sent[IoModeUpload] = p2s_sent;
-
-            mem_b_l2m = sub_sram_l2m[IoModeUpload];
-            sub_sram_m2l[IoModeUpload] = mem_b_m2l;
           end
           // DpUnknown:
           default: begin