[flash_ctrl] Update RTL to support different data and bus width

Signed-off-by: Timothy Chen <timothytim@google.com>
diff --git a/hw/dv/data/sim.mk b/hw/dv/data/sim.mk
index 8b8fe30..de27979 100644
--- a/hw/dv/data/sim.mk
+++ b/hw/dv/data/sim.mk
@@ -65,7 +65,7 @@
 
 ifeq (${sw_test_is_prebuilt},1)
 	# Copy over the sw test image and related sources to the run_dir.
-	cp ${proj_root}/${sw_test}.32.vmem ${run_dir}/sw.32.vmem
+	cp ${proj_root}/${sw_test}.64.vmem ${run_dir}/sw.64.vmem
 	# Optionally, assume that ${sw_test}_logs.txt exists and copy over to the run_dir.
 	# Ignore copy error if it actually doesn't exist. Likewise for ${sw_test}_rodata.txt.
 	-cp ${proj_root}/${sw_test}_logs.txt ${run_dir}/sw_logs.txt
@@ -81,13 +81,13 @@
 		-f .logs.fields -r .rodata \
 		-n "sw" -o "${run_dir}"
 	# Copy over the sw test image to the run_dir.
-	cp ${sw_build_dir}/build-out/${sw_test}_${sw_build_device}.32.vmem \
-		${run_dir}/sw.32.vmem
-
+	cp ${sw_build_dir}/build-out/${sw_test}_${sw_build_device}.64.vmem \
+		${run_dir}/sw.64.vmem
 endif
 
 endif
 
+
 simulate: sw_build
 	@echo "[make]: simulate"
 	cd ${run_dir} && ${run_cmd} ${run_opts}
diff --git a/hw/ip/flash_ctrl/rtl/flash_ctrl_pkg.sv b/hw/ip/flash_ctrl/rtl/flash_ctrl_pkg.sv
index 0bff267..130aa1c 100644
--- a/hw/ip/flash_ctrl/rtl/flash_ctrl_pkg.sv
+++ b/hw/ip/flash_ctrl/rtl/flash_ctrl_pkg.sv
@@ -17,7 +17,7 @@
   localparam int WordW           = $clog2(WordsPerPage);
   localparam int AddrW           = BankW + PageW + WordW; // all flash range
   localparam int BankAddrW       = PageW + WordW;         // 1 bank of flash range
-  localparam int DataWidth       = BytesPerWord * 8;
+  localparam int DataWidth       = 64;
   localparam int FlashTotalPages = NumBanks * PagesPerBank;
   localparam int AllPagesW       = BankW + PageW;
 
diff --git a/hw/ip/flash_ctrl/rtl/flash_phy_core.sv b/hw/ip/flash_ctrl/rtl/flash_phy_core.sv
index 764e21a..9538023 100644
--- a/hw/ip/flash_ctrl/rtl/flash_phy_core.sv
+++ b/hw/ip/flash_ctrl/rtl/flash_phy_core.sv
@@ -178,6 +178,25 @@
   // program pipeline
   ////////////////////////
 
+  // Below code is temporary and does not account for scrambling
+  logic [DataWidth-1:0] prog_data;
+
+  if (WidthMultiple == 1) begin : gen_single_prog_data
+    assign prog_data = prog_data_i;
+  end else begin : gen_prog_data
+    logic [WidthMultiple-1:0][BusWidth-1:0] prog_data_packed;
+
+    always_comb begin
+      prog_data_packed = {DataWidth{1'b1}};
+      for (int i = 0; i < WidthMultiple; i++) begin
+        if (addr_i[0 +: WordSelW] == i) begin
+          prog_data_packed[i] = prog_data_i;
+        end
+      end
+    end
+
+    assign prog_data = prog_data_packed;
+  end
 
   ////////////////////////
   // scrambling / de-scrambling primitive
@@ -185,14 +204,15 @@
 
 
   ////////////////////////
-  // Actual read to flash phy
+  // Actual connection to flash phy
   ////////////////////////
 
-
   // The actual flash macro wrapper
+  // The size of a page is fixed.  However, depending on the sizing of the word,
+  // the number of words within a page will change.
   prim_flash #(
     .PagesPerBank(PagesPerBank),
-    .WordsPerPage(WordsPerPage),
+    .WordsPerPage(WordsPerPage / WidthMultiple),
     .DataWidth(DataWidth),
     .SkipInit(SkipInit)
   ) i_flash (
@@ -203,8 +223,8 @@
     .pg_erase_i(reqs[PhyPgErase]),
     .bk_erase_i(reqs[PhyBkErase]),
     //.addr_i(muxed_addr[0 +: PageW + WordW]),
-    .addr_i(muxed_addr),
-    .prog_data_i(prog_data_i),
+    .addr_i(muxed_addr[BankAddrW-1:LsbAddrBit]),
+    .prog_data_i(prog_data),
     .ack_o(ack),
     .rd_data_o(flash_rdata),
     .init_busy_o // TBD this needs to be looked at later. What init do we need to do,
@@ -217,6 +237,7 @@
 
   // requests to flash must always be one hot
   `ASSERT(OneHotReqs_A, $onehot0(reqs))
-
+  `ASSERT_INIT(NoRemainder_A, AddrBitsRemain == 0)
+  `ASSERT_INIT(Pow2Multiple_A, $onehot(WidthMultiple))
 
 endmodule // flash_phy_core
diff --git a/hw/ip/flash_ctrl/rtl/flash_phy_pkg.sv b/hw/ip/flash_ctrl/rtl/flash_phy_pkg.sv
index ca8593a..2dd6fd8 100644
--- a/hw/ip/flash_ctrl/rtl/flash_phy_pkg.sv
+++ b/hw/ip/flash_ctrl/rtl/flash_phy_pkg.sv
@@ -6,16 +6,16 @@
 //
 
 package flash_phy_pkg;
-
+  parameter int NumBanks     = flash_ctrl_pkg::NumBanks;
   parameter int PagesPerBank = flash_ctrl_pkg::PagesPerBank;
   parameter int WordsPerPage = flash_ctrl_pkg::WordsPerPage;
   parameter int BytesPerWord = flash_ctrl_pkg::BytesPerWord;
   parameter int BankW        = flash_ctrl_pkg::BankW;
   parameter int PageW        = flash_ctrl_pkg::PageW;
   parameter int WordW        = flash_ctrl_pkg::WordW;
-  parameter int BusWidth  = flash_ctrl_pkg::BusWidth;
-  parameter int DataWidth = flash_ctrl_pkg::DataWidth;
-  parameter int NumBuf = 4; // number of flash read buffers
+  parameter int BusWidth     = flash_ctrl_pkg::BusWidth;
+  parameter int DataWidth    = flash_ctrl_pkg::DataWidth;
+  parameter int NumBuf       = 4; // number of flash read buffers
   parameter int RspOrderDepth = 2; // this should be DataWidth / BusWidth
                                    // will switch to this after bus widening
 
@@ -26,8 +26,24 @@
   // This address width is from the perspective of the flash primitive,
   // which is an integer multiple of the bus width.  As a result, the number
   // of relevant address bits changes.
-  // This needs to be updated later
-  parameter int PrimFlashAddrW = BankAddrW;
+
+  // address bits remain must be 0
+  parameter int AddrBitsRemain = DataWidth % BusWidth;
+
+  // must be powers of 2 multiple
+  parameter int WidthMultiple = DataWidth / BusWidth;
+
+  // number of flash words per page vs bus words per page
+  parameter int FlashWordsPerPage = WordsPerPage / WidthMultiple;
+  parameter int FlashWordsW = $clog2(FlashWordsPerPage);
+
+  // base index
+  // This is the lsb position of the prim flash address when looking at the bus address
+  parameter int LsbAddrBit = $clog2(WidthMultiple);
+  parameter int WordSelW = WidthMultiple == 1 ? 1 : LsbAddrBit;
+
+  // prim flash addr width
+  parameter int PrimFlashAddrW = BankAddrW - LsbAddrBit;
 
   // Read buffer metadata
   typedef enum logic [1:0] {
@@ -39,13 +55,13 @@
 
   typedef struct packed {
     logic [DataWidth-1:0] data;
-    logic [BankAddrW-1:0] addr; // all address bits preserved to pick return portion
+    logic [PrimFlashAddrW-1:0] addr; // all address bits preserved to pick return portion
     rd_buf_attr_e attr;
   } rd_buf_t;
 
   typedef struct packed {
     logic [NumBuf-1:0] buf_sel;
-    logic word_sel; // this should eventually be represented by DataWidth / BusWidth
+    logic [WordSelW-1:0] word_sel;
   } rsp_fifo_entry_t;
 
   parameter int RspOrderFifoWidth = $bits(rsp_fifo_entry_t);
diff --git a/hw/ip/flash_ctrl/rtl/flash_phy_rd.sv b/hw/ip/flash_ctrl/rtl/flash_phy_rd.sv
index fe316cc..acb9127 100644
--- a/hw/ip/flash_ctrl/rtl/flash_phy_rd.sv
+++ b/hw/ip/flash_ctrl/rtl/flash_phy_rd.sv
@@ -25,7 +25,7 @@
   input [BankAddrW-1:0] addr_i,
   output logic rdy_o,
   output logic data_valid_o,
-  output logic [DataWidth-1:0] data_o,
+  output logic [BusWidth-1:0] data_o,
   output logic idle_o, // the entire read pipeline is idle
 
   // interface to actual flash primitive
@@ -109,7 +109,7 @@
   // do not attempt to generate match unless the transaction is relevant
   for (genvar i = 0; i < NumBuf; i++) begin: gen_buf_match
     assign buf_match[i] = req_i & (buf_valid[i] | buf_wip[i]) &
-                          read_buf[i].addr == addr_i;
+                          read_buf[i].addr == addr_i[BankAddrW-1:LsbAddrBit];
 
     // A data hazard should never happen to a wip buffer because it implies
     // that a read is in progress, so a hazard operation cannot start.
@@ -118,8 +118,8 @@
     // If program, only if it's the same flash word.
     assign data_hazard[i] = buf_valid[i] &
                             (bk_erase_i |
-                            (prog_i & read_buf[i].addr == addr_i) |
-                            (pg_erase_i & read_buf[i].addr[WordW +: PageW] ==
+                            (prog_i & read_buf[i].addr == addr_i[BankAddrW-1:LsbAddrBit]) |
+                            (pg_erase_i & read_buf[i].addr[FlashWordsW +: PageW] ==
                             addr_i[WordW +: PageW]));
 
   end
@@ -140,7 +140,7 @@
       .alloc_i(rdy_o & alloc[i]),
       .update_i(update[i]),
       .wipe_i(data_hazard[i]),
-      .addr_i(addr_i),
+      .addr_i(addr_i[BankAddrW-1:LsbAddrBit]),
       .data_i(data_i),
       .out_o(read_buf[i])
     );
@@ -170,14 +170,19 @@
   logic rd_busy;
   logic rd_done;
   logic [NumBuf-1:0] alloc_q;
-  logic unused_word_sel; // this is temporary
 
   assign rd_done = rd_busy & ack_i;
 
   // if buffer allocated, that is the return source
   // if buffer matched, that is the return source
   assign rsp_fifo_wdata.buf_sel = |alloc ? buf_alloc : buf_match;
-  assign rsp_fifo_wdata.word_sel = 1'b1; // TODO - fix later
+
+  // If width is the same, word_sel is unused
+  if (WidthMultiple == 1) begin : gen_single_word_sel
+    assign rsp_fifo_wdata.word_sel = '0;
+  end else begin : gen_word_sel
+    assign rsp_fifo_wdata.word_sel = addr_i[0 +: LsbAddrBit];
+  end
 
   // response order FIFO
   prim_fifo_sync #(
@@ -197,8 +202,6 @@
     .rdata  (rsp_fifo_rdata)
   );
 
-  assign unused_word_sel = rsp_fifo_rdata.word_sel;
-
   always_ff @(posedge clk_i or negedge rst_ni) begin
     if (!rst_ni) begin
       rd_busy <= 1'b0;
@@ -259,8 +262,22 @@
     end
   end
 
+  if (WidthMultiple == 1) begin : gen_width_one_rd
+    // When multiple is 1, just pass the read through directly
+    logic unused_word_sel;
+    assign data_o = |buf_rsp_match ? buf_rsp_data : data_i;
+    assign unused_word_sel = rsp_fifo_rdata.word_sel;
+
+  end else begin : gen_rd
+    // Re-arrange data into packed array to pick the correct one
+    logic [WidthMultiple-1:0][BusWidth-1:0] bus_words_packed;
+    assign bus_words_packed = |buf_rsp_match ? buf_rsp_data : data_i;
+    assign data_o = bus_words_packed[rsp_fifo_rdata.word_sel];
+
+  end
+
   assign data_valid_o = flash_rsp_match | |buf_rsp_match;
-  assign data_o = |buf_rsp_match ? buf_rsp_data : data_i;
+
 
   // the entire read pipeline is idle when there are no responses to return
   assign idle_o = ~rsp_fifo_vld;
diff --git a/hw/ip/flash_ctrl/rtl/flash_phy_rd_buffers.sv b/hw/ip/flash_ctrl/rtl/flash_phy_rd_buffers.sv
index 41e5238..ba53685 100644
--- a/hw/ip/flash_ctrl/rtl/flash_phy_rd_buffers.sv
+++ b/hw/ip/flash_ctrl/rtl/flash_phy_rd_buffers.sv
@@ -25,7 +25,7 @@
   input alloc_i,
   input update_i,
   input wipe_i,
-  input [BankAddrW-1:0] addr_i,
+  input [PrimFlashAddrW-1:0] addr_i,
   input [DataWidth-1:0] data_i,
   output rd_buf_t out_o
 );
diff --git a/hw/top_earlgrey/dv/env/chip_env_cfg.sv b/hw/top_earlgrey/dv/env/chip_env_cfg.sv
index e310b15..9146695 100644
--- a/hw/top_earlgrey/dv/env/chip_env_cfg.sv
+++ b/hw/top_earlgrey/dv/env/chip_env_cfg.sv
@@ -25,6 +25,7 @@
   // sw logger related
   string sw_types[]         = '{"rom", "sw"};
   sw_logger_vif             sw_logger_vif;
+  int sw_image_widths[]     = '{32, 64};
   string                    sw_images[string];
   virtual sw_test_status_if sw_test_status_vif;
   uint                      sw_test_timeout_ns = 5_000_000; // 5ms
@@ -77,7 +78,7 @@
 
     // initialize the sw_image names and log file names
     foreach (sw_types[i]) begin
-      sw_images[sw_types[i]] = {sw_types[i], ".32.vmem"};
+      sw_images[sw_types[i]] = $sformatf("%0s.%0d.vmem", sw_types[i], sw_image_widths[i]);
     end
   endfunction