[prim/packer] Revert the design prior to #634

Problem:

    `prim_packer` sent out partial data (mask_o != '1) in some case
    without `flush_i`.

While testing the `hmac_back_pressure`, the test failed at the assertion
below in `sha2_pad.sv`.

```systemverilog
  // When fifo_partial, fifo shouldn't be empty and hash_process was set
  `ASSERT(ValidPartialConditionAssert,
          fifo_partial && fifo_rvalid |-> hash_process_flag,
          clk_i, !rst_ni)
```

It complains when it receives not full word data from MSG_FIFO wihout
`hash_process` indication from the software. It means, something wrong
happens along the data path from TL-UL interface to sha2_pad logic.

The mask (which creates `fifo_partial`) is generated in `prim_packer`.
While reviewing the logic again, I found that the PR #634 I assumed
`prim_packer` has 64 bit internal registers (InW := 32 and OutW := 32).

But, when I designed it, I made it to have `max(InW, OutW)`, so if the
stored data + incoming data size exceeds `MaxW`, the logic should lower
`ready_o`. It worked well before changing the design.

While adding FPV assertions internally, I've assumed it wrong and
changed the design to store the incoming data if current stored data
size is less than or equal to `OutW`.

Resolution:

    Rolled-back the `prim_packer` to original and increased `pos_next`
    size to accomodate `InW + OutW` value, which is `Width + 1`.

This is related to #19

[prim/packer] Add more assertions for solid FPV
diff --git a/hw/ip/hmac/rtl/hmac.sv b/hw/ip/hmac/rtl/hmac.sv
index cc8e4b0..ade45a3 100644
--- a/hw/ip/hmac/rtl/hmac.sv
+++ b/hw/ip/hmac/rtl/hmac.sv
@@ -181,7 +181,7 @@
   assign reg_fifo_wentry.data = conv_endian(reg_fifo_wdata, 1'b1); // always convert
   assign reg_fifo_wentry.mask = {reg_fifo_wmask[0],  reg_fifo_wmask[8],
                                  reg_fifo_wmask[16], reg_fifo_wmask[24]};
-  assign fifo_full   = ~fifo_wready & ~packer_ready;
+  assign fifo_full   = ~fifo_wready;
   assign fifo_empty  = ~fifo_rvalid;
   assign fifo_wvalid = (hmac_fifo_wsel && fifo_wready) ? hmac_fifo_wvalid : reg_fifo_wvalid;
   assign fifo_wdata  = (hmac_fifo_wsel) ? '{data: digest[hmac_fifo_wdata_sel], mask: '1}
diff --git a/hw/ip/prim/rtl/prim_packer.sv b/hw/ip/prim/rtl/prim_packer.sv
index ae08dad..97ef15e 100644
--- a/hw/ip/prim/rtl/prim_packer.sv
+++ b/hw/ip/prim/rtl/prim_packer.sv
@@ -26,7 +26,7 @@
 );
 
   localparam int Width = InW + OutW;
-  localparam int PtrW = $clog2(Width);
+  localparam int PtrW = $clog2(Width+1);
   localparam int MaxW = (InW > OutW) ? InW : OutW;
 
   logic valid_next, ready_next;
@@ -59,7 +59,7 @@
     end else if (flush_ready) begin
       pos <= '0;
     end else if (ack_out) begin
-      `ASSERT_I(PosOrPosNextGTEOutW_A, (pos >= OutW) || (pos_next >= OutW))
+      `ASSERT_I(pos_next_gte_outw_p, pos_next >= OutW)
       pos <= pos_next - OutW;
     end else if (ack_in) begin
       pos <= pos_next;
@@ -171,25 +171,8 @@
 
   assign flush_done_o = flush_ready;
 
-  always_comb begin
-    if (pos >= OutW) begin
-      // Pending transactions in the storage
-      valid_next = 1'b1;
-    end else if (pos_next >= OutW) begin
-      valid_next = 1'b1;
-    end else begin
-      valid_next = flush_ready & (pos != '0);
-    end
-  end
-  always_comb begin
-    if (pos >= OutW) begin
-      // It has out data remained inside, shouldn't accept new data
-      ready_next = 1'b0;
-    end else begin
-      // Regardless of pos_next, it can store the data
-      ready_next = 1'b1;
-    end
-  end
+  assign valid_next = (pos_next >= OutW) ? 1'b 1 : flush_ready & (pos != '0);
+  assign ready_next = ack_out ? 1'b1 : pos_next <= MaxW; // New `we` needs to be hold.
 
   // Output request
   assign valid_o = valid_next;
@@ -199,6 +182,9 @@
   // ready_o
   assign ready_o = ready_next;
 
+  // TODO: Implement Pipelined logic
+  //       Need to change pos logic, mask&data calculation logic too
+
   //////////////////////////////////////////////
   // Assertions, Assumptions, and Coverpoints //
   //////////////////////////////////////////////
@@ -209,6 +195,11 @@
           valid_i |-> $countones(mask_i ^ {mask_i[InW-2:0],1'b0}) <= 2,
           clk_i, !rst_ni)
 
+  // Assume data pattern to reduce FPV test time
+  //`ASSUME_FPV(FpvDataWithin_M,
+  //            data_i inside {'0, '1, 32'hDEAD_BEEF},
+  //            clk_i, !rst_ni)
+
   // Flush and Write Enable cannot be asserted same time
   `ASSUME(ExFlushValid_M, flush_i |-> !valid_i, clk_i, !rst_ni)
 
@@ -235,9 +226,9 @@
           valid_o && !ready_i |=> valid_o,
           clk_i, !rst_ni)
 
-  // If input mask is greater than output width, valid should be asserted
+  // If input mask + stored data is greater than output width, valid should be asserted
   `ASSERT(ValidOAssertedForInputGTEOutW_A,
-          valid_i && ($countones(mask_i) >= OutW) |-> valid_o,
+          valid_i && (($countones(mask_i) + $countones(stored_mask)) >= OutW) |-> valid_o,
           clk_i, !rst_ni)
 
   // If output port doesn't accept the data, the data should be stable
@@ -246,4 +237,19 @@
           && !$past(ready_i) |-> $stable(data_o),
           clk_i, !rst_ni)
 
+  // If input data & stored data are greater than OutW, remained should be stored
+  // TODO: Find out how the FPV time can be reduced.
+  //`ASSERT(ExcessiveDataStored_A,
+  //        ack_in && (($countones(mask_i) + $countones(stored_mask)) > OutW) |=>
+  //          (($past(data_i) &  $past(mask_i)) >>
+  //          ($past(lod_idx)+OutW-$countones($past(stored_mask))))
+  //          == stored_data,
+  //        clk_i, !rst_ni)
+  `ASSERT(ExcessiveMaskStored_A,
+          ack_in && (($countones(mask_i) + $countones(stored_mask)) > OutW) |=>
+          ($past(mask_i) >>
+          ($past(lod_idx)+OutW-$countones($past(stored_mask))))
+            == stored_mask,
+          clk_i, !rst_ni)
+
 endmodule