[packer] Revise the implementation
Previous prim_packer focused on to reduce register FF usage. On the side
effect, it creates the timing path from valid_i --> ready_o, ready_i -->
ready_o, and valid_i --> valid_o.
Now the ready_o and valid_i is computed based on the current storage
condition, which results in the valid_o asserting at the next cycle of
valid_i earliest.
Current implementation also doesn't maximize the storage utilization
when mask_i is not full 1.
Signed-off-by: Eunchan Kim <eunchan@opentitan.org>
diff --git a/hw/ip/prim/rtl/prim_packer.sv b/hw/ip/prim/rtl/prim_packer.sv
index 9457aef..8ddee7d 100644
--- a/hw/ip/prim/rtl/prim_packer.sv
+++ b/hw/ip/prim/rtl/prim_packer.sv
@@ -27,25 +27,27 @@
output logic flush_done_o
);
- localparam int Width = InW + OutW;
- localparam int PtrW = $clog2(Width+1);
+ localparam int Width = InW + OutW; // storage width
+ localparam int ConcatW = Width + InW; // Input concatenated width
+ localparam int PtrW = $clog2(ConcatW+1);
localparam int MaxW = (InW > OutW) ? InW : OutW;
localparam int IdxW = $clog2(InW) + ~|$clog2(InW);
logic valid_next, ready_next;
- logic [MaxW-1:0] stored_data, stored_mask;
- logic [Width-1:0] concat_data, concat_mask;
- logic [Width-1:0] shiftl_data, shiftl_mask;
+ logic [Width-1:0] stored_data, stored_mask;
+ logic [ConcatW-1:0] concat_data, concat_mask;
+ logic [ConcatW-1:0] shiftl_data, shiftl_mask;
- logic [PtrW-1:0] pos, pos_next; // Current write position
+ logic [PtrW-1:0] pos, pos_next; // Current write position
logic [IdxW-1:0] lod_idx; // result of Leading One Detector
logic [$clog2(InW+1)-1:0] inmask_ones; // Counting Ones for mask_i
logic ack_in, ack_out;
- logic flush_ready; // flush_i is pulse, so only when the output is ready flush_ready assets
+ logic flush_valid; // flush data out request
+ logic flush_done;
- // Computing next position
+ // Computing next position ==================================================
always_comb begin
// counting mask_i ones
inmask_ones = '0;
@@ -54,20 +56,31 @@
end
end
- assign pos_next = (valid_i) ? pos + PtrW'(inmask_ones) : pos; // pos always stays (% OutW)
+ logic [PtrW-1:0] pos_with_input;
+
+ always_comb begin
+ pos_next = pos;
+ pos_with_input = pos + PtrW'(inmask_ones);
+
+ unique case ({ack_in, ack_out})
+ 2'b00: pos_next = pos;
+ 2'b01: pos_next = (pos <= OutW) ? '0 : pos - OutW;
+ 2'b10: pos_next = pos_with_input;
+ 2'b11: pos_next = (pos_with_input <= OutW) ? '0 : pos_with_input - OutW;
+ default: pos_next = pos;
+ endcase
+ end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
pos <= '0;
- end else if (flush_ready) begin
+ end else if (flush_done) begin
pos <= '0;
- end else if (ack_out) begin
- `ASSERT_I(pos_next_gte_outw_p, pos_next >= OutW)
- pos <= pos_next - OutW;
- end else if (ack_in) begin
+ end else begin
pos <= pos_next;
end
end
+ //---------------------------------------------------------------------------
// Leading one detector for mask_i
always_comb begin
@@ -82,21 +95,44 @@
assign ack_in = valid_i & ready_o;
assign ack_out = valid_o & ready_i;
- // Data process
+ // Data process =============================================================
+ // shiftl : Input data shifted into the current stored position
assign shiftl_data = (valid_i) ? Width'(data_i >> lod_idx) << pos : '0;
assign shiftl_mask = (valid_i) ? Width'(mask_i >> lod_idx) << pos : '0;
- assign concat_data = {{(Width-MaxW){1'b0}}, stored_data & stored_mask} |
+
+ // concat : Merging stored and shiftl
+ assign concat_data = {{(InW){1'b0}}, stored_data & stored_mask} |
(shiftl_data & shiftl_mask);
- assign concat_mask = {{(Width-MaxW){1'b0}}, stored_mask} | shiftl_mask;
+ assign concat_mask = {{(InW){1'b0}}, stored_mask} | shiftl_mask;
- logic [MaxW-1:0] stored_data_next, stored_mask_next;
+ logic [Width-1:0] stored_data_next, stored_mask_next;
- if (InW >= OutW) begin : gen_stored_in
- assign stored_data_next = concat_data[OutW+:InW];
- assign stored_mask_next = concat_mask[OutW+:InW];
- end else begin : gen_stored_out
- assign stored_data_next = {{(OutW-InW){1'b0}}, concat_data[OutW+:InW]};
- assign stored_mask_next = {{(OutW-InW){1'b0}}, concat_mask[OutW+:InW]};
+ always_comb begin
+ unique case ({ack_in, ack_out})
+ 2'b 00: begin
+ stored_data_next = stored_data;
+ stored_mask_next = stored_mask;
+ end
+ 2'b 01: begin
+ // ack_out : shift the amount of OutW
+ stored_data_next = {{OutW{1'b0}}, stored_data[Width-1:OutW]};
+ stored_mask_next = {{OutW{1'b0}}, stored_mask[Width-1:OutW]};
+ end
+ 2'b 10: begin
+ // ack_in : Store concat data
+ stored_data_next = concat_data[0+:Width];
+ stored_mask_next = concat_mask[0+:Width];
+ end
+ 2'b 11: begin
+ // both : shift the concat_data
+ stored_data_next = concat_data[ConcatW-1:OutW];
+ stored_mask_next = concat_mask[ConcatW-1:OutW];
+ end
+ default: begin
+ stored_data_next = stored_data;
+ stored_mask_next = stored_mask;
+ end
+ endcase
end
// Store the data temporary if it doesn't exceed OutW
@@ -104,24 +140,20 @@
if (!rst_ni) begin
stored_data <= '0;
stored_mask <= '0;
- end else if (flush_ready) begin
+ end else if (flush_done) begin
stored_data <= '0;
stored_mask <= '0;
- end else if (ack_out) begin
+ end else begin
stored_data <= stored_data_next;
stored_mask <= stored_mask_next;
- end else if (ack_in) begin
- // When the requested size is smaller than OutW or output isn't ready
- // Assume when output isn't ready, the module holds the input request
- stored_data <= concat_data[MaxW-1:0];
- stored_mask <= concat_mask[MaxW-1:0];
end
end
+ //---------------------------------------------------------------------------
- // flush_ready handling
+ // flush handling
typedef enum logic {
FlushIdle,
- FlushWait
+ FlushSend
} flush_st_e;
flush_st_e flush_st, flush_st_next;
@@ -136,63 +168,62 @@
always_comb begin
flush_st_next = FlushIdle;
- flush_ready = 1'b0;
+ flush_valid = 1'b0;
+ flush_done = 1'b0;
unique case (flush_st)
FlushIdle: begin
- if (flush_i && !ready_i) begin
- // Wait until hold released
- flush_st_next = FlushWait;
-
- flush_ready = 1'b0;
- end else if (flush_i && ready_i) begin
- // Can write right away!
- flush_st_next = FlushIdle;
-
- flush_ready = 1'b1;
+ if (flush_i) begin
+ flush_st_next = FlushSend;
end else begin
flush_st_next = FlushIdle;
end
end
- FlushWait: begin
- // TODO: Add timeout and force flush
- if (ready_i) begin
- // Ready to write
+ FlushSend: begin
+ if (pos == '0) begin
flush_st_next = FlushIdle;
- flush_ready = 1'b1;
+ flush_valid = 1'b 0;
+ flush_done = 1'b 1;
end else begin
- // Wait ...
- flush_st_next = FlushWait;
+ flush_st_next = FlushSend;
- flush_ready = 1'b0;
+ flush_valid = 1'b 1;
+ flush_done = 1'b 0;
end
end
-
default: begin
flush_st_next = FlushIdle;
- flush_ready = 1'b0;
+ flush_valid = 1'b 0;
+ flush_done = 1'b 0;
end
endcase
end
- assign flush_done_o = flush_ready;
+ assign flush_done_o = flush_done;
- assign valid_next = (pos_next >= OutW) ? 1'b 1 : flush_ready & (pos != '0);
- assign ready_next = ack_out ? 1'b1 : pos_next <= MaxW; // New `we` needs to be hold.
+
+ // Output signals ===========================================================
+ assign valid_next = (pos >= OutW) ? 1'b 1 : flush_valid;
+
+ // storage space is InW + OutW. So technically, ready_o can be asserted even
+ // if `pos` is greater than OutW. But in order to do that, the logic should
+ // use `inmask_ones` value whether pos+inmask_ones is less than (InW+OutW)
+ // with `valid_i`. It creates a path from `valid_i` --> `ready_o`.
+ // It may create a timing loop in some modules that use `ready_o` to
+ // `valid_i` (which is not a good practice though)
+ assign ready_next = pos <= OutW;
// Output request
assign valid_o = valid_next;
- assign data_o = concat_data[OutW-1:0];
- assign mask_o = concat_mask[OutW-1:0];
+ assign data_o = stored_data[OutW-1:0];
+ assign mask_o = stored_mask[OutW-1:0];
// ready_o
assign ready_o = ready_next;
-
- // TODO: Implement Pipelined logic
- // Need to change pos logic, mask&data calculation logic too
+ //---------------------------------------------------------------------------
//////////////////////////////////////////////
// Assertions, Assumptions, and Coverpoints //
@@ -215,7 +246,7 @@
// While in flush state, new request shouldn't come
`ASSUME(ValidIDeassertedOnFlush_M,
- flush_st == FlushWait |-> $stable(valid_i))
+ flush_st == FlushSend |-> $stable(valid_i))
// If not acked, input port keeps asserting valid and data
`ASSUME(DataIStable_M,
@@ -231,9 +262,9 @@
`ASSERT(ValidOPairedWidthReadyI_A,
valid_o && !ready_i |=> valid_o)
- // If input mask + stored data is greater than output width, valid should be asserted
- `ASSERT(ValidOAssertedForInputGTEOutW_A,
- valid_i && (($countones(mask_i) + $countones(stored_mask)) >= OutW) |-> valid_o)
+ // If stored data is greater than the output width, valid should be asserted
+ `ASSERT(ValidOAssertedForStoredDataGTEOutW_A,
+ ($countones(stored_mask) >= OutW) |-> valid_o)
// If output port doesn't accept the data, the data should be stable
`ASSERT(DataOStableWhenPending_A,
@@ -241,17 +272,15 @@
&& !$past(ready_i) |-> $stable(data_o))
// If input data & stored data are greater than OutW, remained should be stored
- // TODO: Find out how the FPV time can be reduced.
- //`ASSERT(ExcessiveDataStored_A,
- // ack_in && (($countones(mask_i) + $countones(stored_mask)) > OutW) |=>
- // (($past(data_i) & $past(mask_i)) >>
- // ($past(lod_idx)+OutW-$countones($past(stored_mask))))
- // == stored_data,
- // clk_i, !rst_ni)
- `ASSERT(ExcessiveMaskStored_A,
- ack_in && (($countones(mask_i) + $countones(stored_mask)) > OutW) |=>
- ($past(mask_i) >>
- ($past(lod_idx)+OutW-$countones($past(stored_mask))))
- == stored_mask)
+ `ASSERT(ExcessiveDataStored_A,
+ ack_in && ack_out && (($countones(mask_i) + $countones(stored_mask)) > OutW)
+ |=> (($past(data_i) & $past(mask_i)) >>
+ ($past(lod_idx)+OutW-$countones($past(stored_mask))))
+ == stored_data)
+ `ASSERT(ExcessiveMaskStored_A,
+ ack_in && ack_out && (($countones(mask_i) + $countones(stored_mask)) > OutW)
+ |=> ($past(mask_i) >>
+ ($past(lod_idx)+OutW-$countones($past(stored_mask))))
+ == stored_mask)
endmodule