[flash] Add scrambling primitive and hook-up to program / read pipelines
Signed-off-by: Timothy Chen <timothytim@google.com>
[flash] Pick correct buffer to update after de-scramble
Signed-off-by: Timothy Chen <timothytim@google.com>
[flash] update to multi-cycle prince and various fixes
Signed-off-by: Timothy Chen <timothytim@google.com>
diff --git a/hw/ip/flash_ctrl/data/flash_ctrl.hjson b/hw/ip/flash_ctrl/data/flash_ctrl.hjson
index 1edaaee..cf426c9 100644
--- a/hw/ip/flash_ctrl/data/flash_ctrl.hjson
+++ b/hw/ip/flash_ctrl/data/flash_ctrl.hjson
@@ -20,7 +20,15 @@
name: "flash", // flash_o (req), flash_i (rsp)
act: "req",
package: "flash_ctrl_pkg", // Origin package (only needs for the requester)
+ },
+
+ { struct: "otp_flash",
+ type: "uni",
+ name: "otp",
+ act: "rcv",
+ package: "flash_ctrl_pkg"
}
+
],
param_list: [
@@ -187,6 +195,23 @@
]
},
+ { name: "SCRAMBLE_EN",
+ desc: "Scramble enable for flash",
+ swaccess: "rw",
+ hwaccess: "hro",
+ resval: "0",
+ fields: [
+ { bits: "0",
+ name: "VAL",
+ desc: '''
+ Temporary enable bit for flash scramble.
+ See #2630.
+ '''
+ resval: "0"
+ },
+ ]
+ },
+
// TODO(#1412):
// This multireg is temporarily removed until the nested multireg compact feature is fully implemented.
// Until then, use only one register wen for all flash regions.
diff --git a/hw/ip/flash_ctrl/flash_ctrl.core b/hw/ip/flash_ctrl/flash_ctrl.core
index bc04143..01519b3 100644
--- a/hw/ip/flash_ctrl/flash_ctrl.core
+++ b/hw/ip/flash_ctrl/flash_ctrl.core
@@ -3,7 +3,7 @@
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
name: "lowrisc:ip:flash_ctrl:0.1"
-description: "Faux Flash Controller"
+description: "Flash Controller"
filesets:
files_rtl:
@@ -11,6 +11,7 @@
- lowrisc:ip:tlul
- lowrisc:prim:all
- lowrisc:prim:flash
+ - lowrisc:prim:gf_mult
- lowrisc:ip:flash_ctrl_pkg
files:
- rtl/flash_ctrl_reg_pkg.sv
@@ -25,6 +26,7 @@
- rtl/flash_phy_rd.sv
- rtl/flash_phy_prog.sv
- rtl/flash_phy_rd_buffers.sv
+ - rtl/flash_phy_scramble.sv
file_type: systemVerilogSource
files_verilator_waiver:
diff --git a/hw/ip/flash_ctrl/rtl/flash_ctrl.sv b/hw/ip/flash_ctrl/rtl/flash_ctrl.sv
index f8bae22..c1a5591 100644
--- a/hw/ip/flash_ctrl/rtl/flash_ctrl.sv
+++ b/hw/ip/flash_ctrl/rtl/flash_ctrl.sv
@@ -20,6 +20,9 @@
input flash_rsp_t flash_i,
output flash_req_t flash_o,
+ // OTP Interface
+ input otp_flash_t otp_i,
+
// Interrupts
output logic intr_prog_empty_o, // Program fifo is empty
output logic intr_prog_lvl_o, // Program fifo is empty
@@ -388,6 +391,9 @@
assign flash_o.part = flash_part_sel;
assign flash_o.prog_data = flash_prog_data;
assign flash_o.prog_last = flash_prog_last;
+ assign flash_o.scramble_en = reg2hw.scramble_en.q;
+ assign flash_o.addr_key = otp_i.addr_key;
+ assign flash_o.data_key = otp_i.data_key;
assign flash_rd_data = flash_i.rd_data;
assign init_busy = flash_i.init_busy;
diff --git a/hw/ip/flash_ctrl/rtl/flash_ctrl_pkg.sv b/hw/ip/flash_ctrl/rtl/flash_ctrl_pkg.sv
index 52be4e8..f8351f6 100644
--- a/hw/ip/flash_ctrl/rtl/flash_ctrl_pkg.sv
+++ b/hw/ip/flash_ctrl/rtl/flash_ctrl_pkg.sv
@@ -77,6 +77,9 @@
logic [BusAddrW-1:0] addr;
logic [BusWidth-1:0] prog_data;
logic prog_last;
+ logic scramble_en;
+ logic [127:0] addr_key;
+ logic [127:0] data_key;
} flash_req_t;
// default value of flash_req_t (for dangling ports)
@@ -89,7 +92,10 @@
part: DataPart,
addr: '0,
prog_data: '0,
- prog_last: '0
+ prog_last: '0,
+ scramble_en: '0,
+ addr_key: 128'hDEADBEEFBEEFFACEDEADBEEF5A5AA5A5,
+ data_key: 128'hDEADBEEF5A5AA5A5DEADBEEFBEEFFACE
};
// memory to flash controller
@@ -110,4 +116,22 @@
init_busy: 1'b0
};
+ ////////////////////////////
+ // The following inter-module should be moved to OTP
+ ////////////////////////////
+
+ // otp to flash_phy
+ typedef struct packed {
+ logic [127:0] addr_key;
+ logic [127:0] data_key;
+ } otp_flash_t;
+
+ // default value of otp_flash_t
+ parameter otp_flash_t OTP_FLASH_DEFAULT = '{
+ addr_key: 128'hDEADBEEFBEEFFACEDEADBEEF5A5AA5A5,
+ data_key: 128'hDEADBEEF5A5AA5A5DEADBEEFBEEFFACE
+ };
+
+
+
endpackage : flash_ctrl_pkg
diff --git a/hw/ip/flash_ctrl/rtl/flash_phy.sv b/hw/ip/flash_ctrl/rtl/flash_phy.sv
index 87f05b7..66b70a5 100644
--- a/hw/ip/flash_ctrl/rtl/flash_phy.sv
+++ b/hw/ip/flash_ctrl/rtl/flash_phy.sv
@@ -118,13 +118,27 @@
.rdata (host_rsp_data[bank])
);
+ logic host_req;
+ logic ctrl_req;
+ logic host_scramble_en;
+ logic ctrl_scramble_en;
+
+ assign host_req = host_req_i & (host_bank_sel == bank) & host_rsp_avail[bank];
+ assign ctrl_req = flash_ctrl_i.req & (ctrl_bank_sel == bank);
+
+ // #2630: Temporary scramble enable logic on one of the banks until register configuration
+ // is setup.
+ assign host_scramble_en = host_req & host_addr_i[BusAddrW-1 -: BankW] == 1;
+ assign ctrl_scramble_en = ctrl_req & flash_ctrl_i.addr[BusAddrW-1 -: BankW] == 1;
+
flash_phy_core i_core (
.clk_i,
.rst_ni,
- .req_i(flash_ctrl_i.req & (ctrl_bank_sel == bank)),
+ .scramble_en_i(flash_ctrl_i.scramble_en & (host_scramble_en | ctrl_scramble_en)),
+ .req_i(ctrl_req),
// host request must be suppressed if response fifo cannot hold more
// otherwise the flash_phy_core and flash_phy will get out of sync
- .host_req_i(host_req_i & (host_bank_sel == bank) & host_rsp_avail[bank]),
+ .host_req_i(host_req),
.host_addr_i(host_addr_i[0 +: BusBankAddrW]),
.rd_i(flash_ctrl_i.rd),
.prog_i(flash_ctrl_i.prog),
@@ -134,6 +148,8 @@
.addr_i(flash_ctrl_i.addr[0 +: BusBankAddrW]),
.prog_data_i(flash_ctrl_i.prog_data),
.prog_last_i(flash_ctrl_i.prog_last),
+ .addr_key_i(flash_ctrl_i.addr_key),
+ .data_key_i(flash_ctrl_i.data_key),
.host_req_rdy_o(host_req_rdy[bank]),
.host_req_done_o(host_req_done[bank]),
.rd_done_o(rd_done[bank]),
diff --git a/hw/ip/flash_ctrl/rtl/flash_phy_core.sv b/hw/ip/flash_ctrl/rtl/flash_phy_core.sv
index a04d48b..ddc0518 100644
--- a/hw/ip/flash_ctrl/rtl/flash_phy_core.sv
+++ b/hw/ip/flash_ctrl/rtl/flash_phy_core.sv
@@ -15,9 +15,10 @@
) (
input clk_i,
input rst_ni,
- input host_req_i, // host request - read only
+ input scramble_en_i,// temporary signal
+ input host_req_i, // host request - read only
input [BusBankAddrW-1:0] host_addr_i,
- input req_i, // controller request
+ input req_i, // controller request
input rd_i,
input prog_i,
input pg_erase_i,
@@ -26,6 +27,8 @@
input [BusBankAddrW-1:0] addr_i,
input [BusWidth-1:0] prog_data_i,
input prog_last_i,
+ input [KeySize-1:0] addr_key_i,
+ input [KeySize-1:0] data_key_i,
output logic host_req_rdy_o,
output logic host_req_done_o,
output logic rd_done_o,
@@ -85,6 +88,11 @@
logic inc_arb_cnt, clr_arb_cnt;
logic host_req_masked;
+ // scramble / de-scramble connections
+ logic calc_ack;
+ logic op_ack;
+ logic [DataWidth-1:0] scramble_mask;
+
assign host_req_masked = host_req_i & (arb_cnt < ArbCnt);
always_ff @(posedge clk_i or negedge rst_ni) begin
@@ -211,11 +219,17 @@
logic flash_rd_req;
logic [DataWidth-1:0] flash_rdata;
+ logic rd_calc_req;
+ logic [BankAddrW-1:0] rd_calc_addr;
+ logic rd_op_req;
+ logic [DataWidth-1:0] rd_scrambled_data;
+ logic [DataWidth-1:0] rd_descrambled_data;
flash_phy_rd u_rd (
.clk_i,
.rst_ni,
.req_i(reqs[PhyRead]),
+ .descramble_i(scramble_en_i),
.prog_i(reqs[PhyProg]),
.pg_erase_i(reqs[PhyPgErase]),
.bk_erase_i(reqs[PhyBkErase]),
@@ -227,16 +241,26 @@
.idle_o(rd_stage_idle),
.req_o(flash_rd_req),
.ack_i(ack),
- .data_i(flash_rdata)
+ .data_i(flash_rdata),
+ //scramble unit interface
+ .calc_req_o(rd_calc_req),
+ .calc_addr_o(rd_calc_addr),
+ .descramble_req_o(rd_op_req),
+ .scrambled_data_o(rd_scrambled_data),
+ .calc_ack_i(calc_ack),
+ .descramble_ack_i(op_ack),
+ .mask_i(scramble_mask),
+ .descrambled_data_i(rd_descrambled_data)
);
////////////////////////
// program pipeline
////////////////////////
- // Below code is temporary and does not account for scrambling
- logic [DataWidth-1:0] prog_data;
+ logic [DataWidth-1:0] prog_data, prog_scrambled_data;
logic flash_prog_req;
+ logic prog_calc_req;
+ logic prog_op_req;
if (WidthMultiple == 1) begin : gen_single_prog_data
assign flash_prog_req = reqs[PhyProg];
@@ -247,10 +271,17 @@
.clk_i,
.rst_ni,
.req_i(reqs[PhyProg]),
+ .scramble_i(scramble_en_i),
.sel_i(addr_i[0 +: WordSelW]),
.data_i(prog_data_i),
.last_i(prog_last_i),
.ack_i(ack),
+ .calc_ack_i(calc_ack),
+ .scramble_ack_i(op_ack),
+ .mask_i(scramble_mask),
+ .scrambled_data_i(prog_scrambled_data),
+ .calc_req_o(prog_calc_req),
+ .scramble_req_o(prog_op_req),
.req_o(flash_prog_req),
.ack_o(prog_ack),
.data_o(prog_data)
@@ -262,6 +293,28 @@
// scrambling / de-scrambling primitive
////////////////////////
+ logic [BankAddrW-1:0] scramble_muxed_addr;
+ assign scramble_muxed_addr = prog_calc_req ? muxed_addr[BusBankAddrW-1:LsbAddrBit] :
+ rd_calc_addr;
+
+ flash_phy_scramble u_scramble (
+ .clk_i,
+ .rst_ni,
+ .calc_req_i(prog_calc_req | rd_calc_req),
+ .op_req_i(prog_op_req | rd_op_req),
+ .op_type_i(prog_op_req ? ScrambleOp : DeScrambleOp),
+ .addr_i(scramble_muxed_addr),
+ .plain_data_i(prog_data),
+ .scrambled_data_i(rd_scrambled_data),
+ .addr_key_i(addr_key_i),
+ .data_key_i(data_key_i),
+ .calc_ack_o(calc_ack),
+ .op_ack_o(op_ack),
+ .mask_o(scramble_mask),
+ .plain_data_o(rd_descrambled_data),
+ .scrambled_data_o(prog_scrambled_data)
+ );
+
////////////////////////
// Actual connection to flash phy
diff --git a/hw/ip/flash_ctrl/rtl/flash_phy_pkg.sv b/hw/ip/flash_ctrl/rtl/flash_phy_pkg.sv
index 1ed95ce..6da6b6e 100644
--- a/hw/ip/flash_ctrl/rtl/flash_phy_pkg.sv
+++ b/hw/ip/flash_ctrl/rtl/flash_phy_pkg.sv
@@ -35,6 +35,13 @@
parameter int LsbAddrBit = $clog2(WidthMultiple);
parameter int WordSelW = WidthMultiple == 1 ? 1 : LsbAddrBit;
+ // scramble / de-scramble parameters
+ // Number of cycles the gf_mult is given to complete
+ parameter int KeySize = 128;
+ parameter int GfMultCycles = 2;
+ // If this value is greater than 1, constraints must be updated for multicycle paths
+ parameter int CipherCycles = 2;
+
// Read buffer metadata
typedef enum logic [1:0] {
Invalid = 2'h0,
@@ -57,6 +64,11 @@
parameter int RspOrderFifoWidth = $bits(rsp_fifo_entry_t);
+ typedef struct packed {
+ logic [BankAddrW-1:0] addr;
+ logic descramble;
+ } rd_attr_t;
+
// Flash Operations Supported
typedef enum logic [2:0] {
PhyRead = 3'h0,
@@ -73,4 +85,9 @@
Ctrl = 2'h2
} flash_phy_op_sel_e;
+ typedef enum logic {
+ ScrambleOp = 1'b0,
+ DeScrambleOp = 1'b1
+ } cipher_ops_e;
+
endpackage // flash_phy_pkg
diff --git a/hw/ip/flash_ctrl/rtl/flash_phy_prog.sv b/hw/ip/flash_ctrl/rtl/flash_phy_prog.sv
index 68d8392..d64cf8f 100644
--- a/hw/ip/flash_ctrl/rtl/flash_phy_prog.sv
+++ b/hw/ip/flash_ctrl/rtl/flash_phy_prog.sv
@@ -26,10 +26,17 @@
input clk_i,
input rst_ni,
input req_i,
+ input scramble_i,
input [WordSelW-1:0] sel_i,
input [BusWidth-1:0] data_i,
input last_i,
input ack_i,
+ input calc_ack_i,
+ input scramble_ack_i,
+ input [DataWidth-1:0] mask_i,
+ input [DataWidth-1:0] scrambled_data_i,
+ output logic calc_req_o,
+ output logic scramble_req_o,
output logic req_o,
output logic ack_o,
output logic [DataWidth-1:0] data_o
@@ -40,7 +47,9 @@
StPrePack,
StPackData,
StPostPack,
- StWaitFlash
+ StWaitFlash,
+ StCalcMask,
+ StScrambleData
} prog_state_e;
typedef enum logic [1:0] {
@@ -99,6 +108,8 @@
data_sel = Filler;
req_o = 1'b0;
ack_o = 1'b0;
+ calc_req_o = 1'b0;
+ scramble_req_o = 1'b0;
unique case (state_q)
StIdle: begin
@@ -124,7 +135,7 @@
if (req_i && idx == (WidthMultiple-1)) begin
// last beat of a flash word
- state_d = StWaitFlash;
+ state_d = scramble_i ? StCalcMask : StWaitFlash;
end else if (req_i && last_i) begin
// last beat is not aligned with the last entry of flash word
state_d = StPostPack;
@@ -140,6 +151,22 @@
// finish packing remaining entries
if (idx == (WidthMultiple-1)) begin
+ state_d = scramble_i ? StCalcMask : StWaitFlash;
+ end
+ end
+
+ StCalcMask: begin
+ calc_req_o = 1'b1;
+
+ if (calc_ack_i) begin
+ state_d = StScrambleData;
+ end
+ end
+
+ StScrambleData: begin
+ scramble_req_o = 1'b1;
+
+ if (scramble_ack_i) begin
state_d = StWaitFlash;
end
end
@@ -157,16 +184,27 @@
endcase // unique case (state_q)
end
+ logic [DataWidth-1:0] mask_q;
+
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
packed_data <= '0;
+ mask_q <= '0;
end else if (req_o && ack_i) begin
packed_data <= '0;
+ end else if (calc_req_o && calc_ack_i) begin
+ packed_data <= packed_data ^ mask_i;
+ mask_q <= mask_i;
+ end else if (scramble_req_o && scramble_ack_i) begin
+ packed_data <= scrambled_data_i ^ mask_q;
end else if (pack_valid) begin
packed_data[idx] <= pack_data;
end
end
+
+
+
assign data_o = packed_data;
diff --git a/hw/ip/flash_ctrl/rtl/flash_phy_rd.sv b/hw/ip/flash_ctrl/rtl/flash_phy_rd.sv
index 1054e4b..4ebee35 100644
--- a/hw/ip/flash_ctrl/rtl/flash_phy_rd.sv
+++ b/hw/ip/flash_ctrl/rtl/flash_phy_rd.sv
@@ -13,18 +13,18 @@
// upstream to stop issuing instructions, however once issued, the upstream will
// always accept the response.
//
-// TBD: Add support for descramble stage
-// The allocate and descramble indication received at read stage must be saved.
+// Support for descramble stage
+// The allocate and descramble indication received at read stage are saved.
// When the read completes, depending on the 'descramble' indication saved, the
// data is either stored into FIFO (reg + skid) between read and descramble stage,
// or forwarded directly to the buffers (no de-scramble)
//
-// If the storage element between read and de-scramble stages are completely fully
-// for some reason, then the read stage cannot start
+// If the storage element between read and de-scramble stages are completely full
+// for any reason, then the read stage cannot start.
//
-// When the read stage begins, the galois multiply portion of the de-scramble should
+// When the read stage begins, the galois multiply portion of the de-scramble is
// also be kicked off. When the galois multiply stage AND read stage completes, the
-// de-scramble is also kicked off (which is really what the de-scramble stage is doing).
+// de-scramble is then kicked off.
module flash_phy_rd import flash_phy_pkg::*; (
input clk_i,
@@ -32,6 +32,7 @@
// interface with arbitration unit
input req_i,
+ input descramble_i,
input prog_i,
input pg_erase_i,
input bk_erase_i,
@@ -42,6 +43,16 @@
output logic [BusWidth-1:0] data_o,
output logic idle_o, // the entire read pipeline is idle
+ // interface with scramble unit
+ output logic calc_req_o,
+ output logic descramble_req_o,
+ output logic [BankAddrW-1:0] calc_addr_o,
+ output logic [DataWidth-1:0] scrambled_data_o,
+ input calc_ack_i,
+ input descramble_ack_i,
+ input [DataWidth-1:0] mask_i,
+ input [DataWidth-1:0] descrambled_data_i,
+
// interface to actual flash primitive
output logic req_o,
input ack_i,
@@ -52,6 +63,12 @@
// Read buffers
/////////////////////////////////
+ // muxed de-scrambled and plain-data
+ logic [DataWidth-1:0] muxed_data;
+
+ // muxed data valid signal that takes scrambling into consideration
+ logic data_valid;
+
// A buffer allocate is invoked when a new transaction arrives.
// Alloc only happens if the new transaction does not match an existing entry.
logic [NumBuf-1:0] alloc;
@@ -166,7 +183,7 @@
// update sets state to valid
// wipe sets state to invalid - this comes from prog
for (genvar i = 0; i < NumBuf; i++) begin: gen_bufs
- flash_phy_rd_buffers i_rd_buf (
+ flash_phy_rd_buffers u_rd_buf (
.clk_i,
.rst_ni,
.alloc_i(rdy_o & alloc[i]),
@@ -174,7 +191,7 @@
.wipe_i(data_hazard[i]),
.addr_i(flash_word_addr),
.part_i(part_i),
- .data_i(data_i),
+ .data_i(muxed_data),
.out_o(read_buf[i])
);
end
@@ -203,7 +220,13 @@
logic rd_busy;
logic rd_done;
logic [NumBuf-1:0] alloc_q;
+ rd_attr_t rd_attrs;
+ // scramble stage ready
+ logic scramble_stage_rdy;
+
+ // read done does not mean data is available.
+ // if the data must be de-scrambled, there is another wait stage
assign rd_done = rd_busy & ack_i;
// if buffer allocated, that is the return source
@@ -239,10 +262,13 @@
if (!rst_ni) begin
rd_busy <= 1'b0;
alloc_q <= '0;
+ rd_attrs <= '0;
end else if (req_o) begin
// read only becomes busy if a buffer is allocated and read
rd_busy <= 1'b1;
alloc_q <= alloc;
+ rd_attrs.addr <= addr_i[BusBankAddrW-1:LsbAddrBit];
+ rd_attrs.descramble <= descramble_i;
end else if (rd_done) begin
rd_busy <= 1'b0;
end
@@ -254,7 +280,8 @@
// if no buffers matched, accept only if read state is idle and there is space
// if buffer is matched, accept as long as there is space in the rsp fifo
- assign rdy_o = no_match ? rd_stage_idle & rsp_fifo_rdy : rsp_fifo_rdy;
+ assign rdy_o = no_match ? rd_stage_idle & rsp_fifo_rdy & scramble_stage_rdy :
+ rsp_fifo_rdy & scramble_stage_rdy;
// issue a transaction to flash
assign req_o = req_i & rdy_o & no_match;
@@ -263,7 +290,111 @@
// De-scrambling stage
/////////////////////////////////
- // nothing here yet
+ logic fifo_data_ready;
+ logic fifo_data_valid;
+ logic mask_valid;
+ logic [DataWidth-1:0] fifo_data;
+ logic [DataWidth-1:0] mask;
+ logic data_fifo_rdy;
+ logic mask_fifo_rdy;
+ logic forward;
+ logic hint_forward;
+ logic hint_descram;
+ logic [NumBuf-1:0] alloc_q2;
+
+ assign scramble_stage_rdy = data_fifo_rdy & mask_fifo_rdy;
+
+ // data is consumed when:
+ // 1. When descrambling completes
+ // 2. Immediately consumed when descrambling not required
+ // 3. In both cases, when data has not already been forwarded
+ assign fifo_data_ready = hint_descram ? descramble_req_o & descramble_ack_i & ~hint_forward :
+ fifo_data_valid & !hint_forward;
+
+ // data is forwarded whenever it does not require descrambling or if it has been erased
+ // but forwarding is only possible if there are no entries in the FIFO to ensure the current
+ // read cannot run ahead of the descramble.
+ assign forward = rd_done & !fifo_data_valid &
+ ((data_i == {DataWidth{1'b1}}) | !rd_attrs.descramble);
+
+ // storage for read outputs
+ // This storage element can be fully merged with the fifo below if the time it takes
+ // to do a read is matched to gf_mult. This is doable and should be considered.
+ // However it would create a dependency on constraints (multicycle) instead of
+ // being correct by construction.
+ //
+ // In addition to potential different completion times, the mask storage may also
+ // be pushed even if it is not required (erase case). The solution for this issue
+ // is that the mask / data are always pushed, it is then selectively popped based
+ // on the forward / de-scrambling hints.
+ //
+ // All these problems could be resolved if the timings matched exactly, however
+ // the user would need to correctly setup constraints on either flash / gf_mult
+ // timing change.
+ prim_fifo_sync #(
+ .Width (DataWidth + 2 + NumBuf),
+ .Pass (0),
+ .Depth (2)
+ ) u_rd_storage (
+ .clk_i,
+ .rst_ni,
+ .clr_i (1'b0),
+ .wvalid (rd_done),
+ .wready (data_fifo_rdy),
+ .wdata ({alloc_q, rd_attrs.descramble,forward,data_i}),
+ .depth (),
+ .rvalid (fifo_data_valid),
+ .rready (fifo_data_ready | hint_forward),
+ .rdata ({alloc_q2, hint_descram,hint_forward,fifo_data})
+ );
+
+ // storage for mask calculations
+ prim_fifo_sync #(
+ .Width (DataWidth),
+ .Pass (0),
+ .Depth (2)
+ ) u_mask_storage (
+ .clk_i,
+ .rst_ni,
+ .clr_i (1'b0),
+ .wvalid (calc_req_o & calc_ack_i),
+ .wready (mask_fifo_rdy),
+ .wdata (mask_i),
+ .depth (),
+ .rvalid (mask_valid),
+ .rready (fifo_data_ready | hint_forward),
+ .rdata (mask)
+ );
+
+ // generate the mask calculation request
+ always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ calc_req_o <= '0;
+ end else if (req_o && descramble_i) begin
+ calc_req_o <= 1'b1;
+ end else if (calc_req_o && calc_ack_i) begin
+ calc_req_o <= 1'b0;
+ end
+ end
+
+ // operand to gf_mult
+ assign calc_addr_o = rd_attrs.addr;
+
+ // generate the descramble request whenever both stages are available
+ // and there is a need to descramble
+ assign descramble_req_o = fifo_data_valid & mask_valid & !hint_forward;
+
+ // scrambled data to de-scramble
+ assign scrambled_data_o = fifo_data ^ mask;
+
+ // muxed data
+ assign muxed_data = hint_descram ? descrambled_data_i ^ mask : data_i;
+
+ // muxed data valid
+ // if no de-scramble required, return data on read complete
+ // if data is all empty (erased), also return data on read complete
+ // if descramble is required, return data when descrambler finishes
+ assign data_valid = forward | fifo_data_ready;
/////////////////////////////////
@@ -275,10 +406,15 @@
logic [DataWidth-1:0] buf_rsp_data;
// update buffers
- assign update = rd_done ? alloc_q : '0;
+ // When forwarding, update entry stored in alloc_q
+ // When de-scrambling however, the contents of alloc_q may have already updated to the next read,
+ // so a different pointer is used.
+ // assign update = data_valid ? alloc_q : '0;
+ assign update = forward ? alloc_q :
+ fifo_data_ready ? alloc_q2 : '0;
// match in flash response when allocated buffer is the same as top of response fifo
- assign flash_rsp_match = rsp_fifo_vld & rd_done & (rsp_fifo_rdata.buf_sel == alloc_q);
+ assign flash_rsp_match = rsp_fifo_vld & data_valid & (rsp_fifo_rdata.buf_sel == update);
// match in buf response when there is a valid buffer that is the same as top of response fifo
for (genvar i = 0; i < NumBuf; i++) begin: gen_buf_rsp_match
@@ -287,7 +423,7 @@
// select among the buffers
always_comb begin
- buf_rsp_data = data_i;
+ buf_rsp_data = muxed_data;
for (int i = 0; i < NumBuf; i++) begin
if (buf_rsp_match[i]) begin
buf_rsp_data = read_buf[i].data;
@@ -298,21 +434,20 @@
if (WidthMultiple == 1) begin : gen_width_one_rd
// When multiple is 1, just pass the read through directly
logic unused_word_sel;
- assign data_o = |buf_rsp_match ? buf_rsp_data : data_i;
+ assign data_o = |buf_rsp_match ? buf_rsp_data : muxed_data;
assign unused_word_sel = rsp_fifo_rdata.word_sel;
end else begin : gen_rd
// Re-arrange data into packed array to pick the correct one
logic [WidthMultiple-1:0][BusWidth-1:0] bus_words_packed;
- assign bus_words_packed = |buf_rsp_match ? buf_rsp_data : data_i;
+ assign bus_words_packed = |buf_rsp_match ? buf_rsp_data : muxed_data;
assign data_o = bus_words_packed[rsp_fifo_rdata.word_sel];
end
assign data_valid_o = flash_rsp_match | |buf_rsp_match;
-
- // the entire read pipeline is idle when there are no responses to return
+ // the entire read pipeline is idle when there are no responses to return and no
assign idle_o = ~rsp_fifo_vld;
/////////////////////////////////
diff --git a/hw/ip/flash_ctrl/rtl/flash_phy_scramble.sv b/hw/ip/flash_ctrl/rtl/flash_phy_scramble.sv
new file mode 100644
index 0000000..1796e01
--- /dev/null
+++ b/hw/ip/flash_ctrl/rtl/flash_phy_scramble.sv
@@ -0,0 +1,82 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Flash Phy Scramble Module
+//
+// This module implements the flash scramble / de-scramble operation
+// This operation is actually XEX. However the components are broken
+// in two and separately manipulated by the program and read pipelines.
+//
+
+module flash_phy_scramble import flash_phy_pkg::*; (
+ input clk_i,
+ input rst_ni,
+ input calc_req_i, // calculate galois multiplier mask
+ input op_req_i, // request primitive operation
+ input cipher_ops_e op_type_i, // sramble or de-scramble
+ input [BankAddrW-1:0] addr_i,
+ input [DataWidth-1:0] plain_data_i,
+ input [DataWidth-1:0] scrambled_data_i,
+ input [KeySize-1:0] addr_key_i,
+ input [KeySize-1:0] data_key_i,
+ output logic calc_ack_o,
+ output logic op_ack_o,
+ output logic [DataWidth-1:0] mask_o,
+ output logic [DataWidth-1:0] plain_data_o,
+ output logic [DataWidth-1:0] scrambled_data_o
+);
+
+ localparam int AddrPadWidth = DataWidth - BankAddrW;
+ localparam int UnusedWidth = KeySize - AddrPadWidth;
+
+ // unused portion of addr_key
+ logic [UnusedWidth-1:0] unused_key;
+ assign unused_key = addr_key_i[KeySize-1 -: UnusedWidth];
+
+ // Galois Multiply portion
+ prim_gf_mult # (
+ .Width(DataWidth),
+ .StagesPerCycle(DataWidth / GfMultCycles)
+ ) u_mult (
+ .clk_i,
+ .rst_ni,
+ .req_i(calc_req_i),
+ .operand_a_i({addr_key_i[DataWidth +: AddrPadWidth], addr_i}),
+ .operand_b_i(addr_key_i[DataWidth-1:0]),
+ .ack_o(calc_ack_o),
+ .prod_o(mask_o)
+ );
+
+ // Cipher portion
+ logic dec;
+ logic [DataWidth-1:0] data;
+
+ assign dec = op_type_i == DeScrambleOp;
+
+ // Previous discussion settled on PRESENT, using PRINCE here for now
+ // just to get some area idea
+ prim_prince # (
+ .DataWidth(DataWidth),
+ .KeyWidth(KeySize),
+ .UseOldKeySched(1'b1),
+ .HalfwayDataReg(1'b1)
+ ) u_cipher (
+ .clk_i,
+ .rst_ni,
+ .valid_i(op_req_i),
+ .data_i(dec ? scrambled_data_i : plain_data_i),
+ .key_i(data_key_i),
+ .dec_i(dec),
+ .data_o(data),
+ .valid_o(op_ack_o)
+ );
+
+ // if decrypt, output the unscrambled data, feed input through otherwise
+ assign plain_data_o = dec ? data : scrambled_data_i;
+
+ // if encrypt, output the scrambled data, feed input through otherwise
+ assign scrambled_data_o = dec ? plain_data_i : data;
+
+
+endmodule // flash_phy_scramble
diff --git a/sw/device/tests/flash_ctrl_test.c b/sw/device/tests/flash_ctrl_test.c
index a09d9d0..9b003d2 100644
--- a/sw/device/tests/flash_ctrl_test.c
+++ b/sw/device/tests/flash_ctrl_test.c
@@ -76,6 +76,12 @@
output_page));
CHECK_ARRAYS_EQ(output_page, input_page, FLASH_WORDS_PER_PAGE);
+ // Check from host side also
+ for (int i = 0; i < FLASH_WORDS_PER_PAGE; i++) {
+ output_page[i] = mmio_region_read32(flash_bank_1, i * sizeof(uint32_t));
+ }
+ CHECK_ARRAYS_EQ(output_page, input_page, FLASH_WORDS_PER_PAGE);
+
// Similar check for info page
CHECK_EQZ(flash_page_erase(flash_bank_1_addr, kInfoPartition));
CHECK_EQZ(flash_write(flash_bank_1_addr, kInfoPartition, input_page,