[prim] Add primitive for REQ/ACK synchronization
This commits adds a primitive for synchronizing REQ/ACK handshakes across
clock domain crossings. The primitive comes with a simple scratch
Verilator testbench.
Signed-off-by: Pirmin Vogel <vogelpi@lowrisc.org>
diff --git a/hw/ip/prim/pre_dv/prim_sync_reqack/README.md b/hw/ip/prim/pre_dv/prim_sync_reqack/README.md
new file mode 100644
index 0000000..77447b4
--- /dev/null
+++ b/hw/ip/prim/pre_dv/prim_sync_reqack/README.md
@@ -0,0 +1,34 @@
+REQ/ACK Syncronizer Verilator Testbench
+=======================================
+
+This directory contains a basic, scratch Verilator testbench targeting
+functional verification of the REQ/ACK synchronizer primitive during
+development.
+
+How to build and run the testbench
+----------------------------------
+
+From the OpenTitan top level execute
+
+ ```sh
+ fusesoc --cores-root=. run --setup --build \
+ lowrisc:dv_verilator:prim_sync_reqack_tb
+ ```
+to build the testbench and afterwards
+
+ ```sh
+ ./build/lowrisc_dv_verilator_prim_sync_reqack_tb_0/default-verilator/Vprim_sync_reqack_tb \
+ --trace
+ ```
+to run it.
+
+Details of the testbench
+------------------------
+
+- `rtl/prim_sync_reqack_tb.sv`: SystemVerilog testbench, instantiates and
+ drives the DUT, counts handshakes in both domains, signals test end and
+ result (pass/fail) to C++ via output ports. Change this file to e.g.
+ for a different clock ratio or more transactions.
+- `cpp/prim_sync_reqack_tb.cc`: Contains main function and instantiation of
+ SimCtrl, reads output ports of DUT and signals simulation termination to
+ Verilator.
diff --git a/hw/ip/prim/pre_dv/prim_sync_reqack/cpp/prim_sync_reqack_tb.cc b/hw/ip/prim/pre_dv/prim_sync_reqack/cpp/prim_sync_reqack_tb.cc
new file mode 100644
index 0000000..4b93a4c
--- /dev/null
+++ b/hw/ip/prim/pre_dv/prim_sync_reqack/cpp/prim_sync_reqack_tb.cc
@@ -0,0 +1,62 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+#include "Vprim_sync_reqack_tb.h"
+#include "verilated_toplevel.h"
+#include "verilator_sim_ctrl.h"
+
+#include <signal.h>
+#include <functional>
+#include <iostream>
+
+#include "sim_ctrl_extension.h"
+
+class PrimSyncReqAckTB : public SimCtrlExtension {
+ using SimCtrlExtension::SimCtrlExtension;
+
+ public:
+ PrimSyncReqAckTB(prim_sync_reqack_tb *top);
+
+ void OnClock(unsigned long sim_time);
+
+ private:
+ prim_sync_reqack_tb *top_;
+};
+
+// Constructor:
+// - Set up top_ ptr
+PrimSyncReqAckTB::PrimSyncReqAckTB(prim_sync_reqack_tb *top)
+ : SimCtrlExtension{}, top_(top) {}
+
+// Function called once every clock cycle from SimCtrl
+void PrimSyncReqAckTB::OnClock(unsigned long sim_time) {
+ if (top_->test_done_o) {
+ VerilatorSimCtrl::GetInstance().RequestStop(top_->test_passed_o);
+ }
+}
+
+int main(int argc, char **argv) {
+ int ret_code;
+
+ // Init verilog instance
+ prim_sync_reqack_tb top;
+
+ // Init sim
+ VerilatorSimCtrl &simctrl = VerilatorSimCtrl::GetInstance();
+ simctrl.SetTop(&top, &top.clk_i, &top.rst_ni,
+ VerilatorSimCtrlFlags::ResetPolarityNegative);
+
+ // Create and register VerilatorSimCtrl extension
+ PrimSyncReqAckTB primsyncreqacktb(&top);
+ simctrl.RegisterExtension(&primsyncreqacktb);
+
+ std::cout << "Simulation of REQ/ACK Synchronizer primitive" << std::endl
+ << "============================================" << std::endl
+ << std::endl;
+
+ // Get pass / fail from Verilator
+ ret_code = simctrl.Exec(argc, argv);
+
+ return ret_code;
+}
diff --git a/hw/ip/prim/pre_dv/prim_sync_reqack/prim_sync_reqack_tb.core b/hw/ip/prim/pre_dv/prim_sync_reqack/prim_sync_reqack_tb.core
new file mode 100644
index 0000000..bce88fb
--- /dev/null
+++ b/hw/ip/prim/pre_dv/prim_sync_reqack/prim_sync_reqack_tb.core
@@ -0,0 +1,52 @@
+CAPI=2:
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+name: "lowrisc:dv_verilator:prim_sync_reqack_tb"
+description: "REQ/ACK Synchronizer Verilator TB"
+filesets:
+ files_rtl:
+ depend:
+ - lowrisc:prim:all
+ files:
+ - rtl/prim_sync_reqack_tb.sv
+ file_type: systemVerilogSource
+
+ files_dv_verilator:
+ depend:
+ - lowrisc:dv_verilator:simutil_verilator
+
+ files:
+ - cpp/prim_sync_reqack_tb.cc
+ file_type: cppSource
+
+targets:
+ default:
+ default_tool: verilator
+ filesets:
+ - files_rtl
+ - files_dv_verilator
+ toplevel: prim_sync_reqack_tb
+ tools:
+ verilator:
+ mode: cc
+ verilator_options:
+# Disabling tracing reduces compile times by multiple times, but doesn't have a
+# huge influence on runtime performance. (Based on early observations.)
+ - '--trace'
+ - '--trace-fst' # this requires -DVM_TRACE_FMT_FST in CFLAGS below!
+ - '--trace-structs'
+ - '--trace-params'
+ - '--trace-max-array 1024'
+# compiler flags
+#
+# -O
+# Optimization levels have a large impact on the runtime performance of the
+# simulation model. -O2 and -O3 are pretty similar, -Os is slower than -O2/-O3
+ - '-CFLAGS "-std=c++11 -Wall -DVM_TRACE_FMT_FST -DTOPLEVEL_NAME=prim_sync_reqack_tb -g -O0"'
+ - '-LDFLAGS "-pthread -lutil -lelf"'
+ - "-Wall"
+ - "-Wno-PINCONNECTEMPTY"
+ # XXX: Cleanup all warnings and remove this option
+ # (or make it more fine-grained at least)
+ - "-Wno-fatal"
diff --git a/hw/ip/prim/pre_dv/prim_sync_reqack/rtl/prim_sync_reqack_tb.sv b/hw/ip/prim/pre_dv/prim_sync_reqack/rtl/prim_sync_reqack_tb.sv
new file mode 100644
index 0000000..760c621
--- /dev/null
+++ b/hw/ip/prim/pre_dv/prim_sync_reqack/rtl/prim_sync_reqack_tb.sv
@@ -0,0 +1,173 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Scratch verification testbench for REQ/ACK synchronizer primitive
+
+module prim_sync_reqack_tb #(
+) (
+ input logic clk_i,
+ input logic rst_ni,
+
+ output logic test_done_o,
+ output logic test_passed_o
+);
+
+ // TB configuration
+ localparam int unsigned NumTransactions = 8;
+ localparam logic FastToSlow = 1'b1; // Select 1'b0 for SlowToFast
+ localparam int unsigned Ratio = 4; // must be even and greater equal 2
+
+ // Derivation of parameters
+ localparam int unsigned Ticks = Ratio/2;
+ localparam int unsigned WidthTicks = $clog2(Ticks)+1;
+ localparam int unsigned WidthTrans = $clog2(NumTransactions)+1;
+
+ // Derive slow clock (using a counter)
+ logic [WidthTicks-1:0] count_clk_d, count_clk_q;
+ assign count_clk_d = count_clk_q == (Ticks[WidthTicks-1:0]-1) ? '0 : count_clk_q + {{WidthTicks-1{1'b0}},{1'b1}};
+ always_ff @(posedge clk_i) begin : reg_count_clk
+ count_clk_q <= count_clk_d;
+ end
+
+ logic clk_slow_d, clk_slow_q, clk_slow;
+ assign clk_slow_d = count_clk_q == (Ticks[WidthTicks-1:0]-1) ? !clk_slow_q : clk_slow_q;
+ always_ff @(posedge clk_i) begin : reg_clk_slow
+ clk_slow_q <= clk_slow_d;
+ end
+ assign clk_slow = clk_slow_q;
+
+ // Sync reset to slow clock
+ logic [1:0] rst_slow_nq;
+ logic rst_slow_n;
+ always_ff @(posedge clk_slow) begin
+ rst_slow_nq <= {rst_slow_nq[0], rst_ni};
+ end
+ assign rst_slow_n = rst_ni & rst_slow_nq[1];
+
+ // Connect clocks
+ logic clk_src, clk_dst;
+ assign clk_src = FastToSlow ? clk_i : clk_slow;
+ assign clk_dst = FastToSlow ? clk_slow : clk_i;
+
+ logic src_req, dst_req;
+ logic src_ack, dst_ack;
+ logic rst_done;
+
+ // Instantiate DUT
+ prim_sync_reqack prim_sync_reqack (
+ .clk_src_i (clk_src),
+ .rst_src_ni (rst_slow_n),
+ .clk_dst_i (clk_dst),
+ .rst_dst_ni (rst_slow_n),
+
+ .src_req_i (src_req),
+ .src_ack_o (src_ack),
+ .dst_req_o (dst_req),
+ .dst_ack_i (dst_ack)
+ );
+
+ // Make sure we do not apply stimuli before the reset.
+ always_ff @(posedge clk_slow or negedge rst_slow_n) begin
+ if (!rst_slow_n) begin
+ rst_done <= '1;
+ end else begin
+ rst_done <= rst_done;
+ end
+ end
+
+ // Create randomized ACK delay
+ localparam int WIDTH_COUNT = 3;
+ logic [31:0] tmp;
+ logic [31-WIDTH_COUNT:0] unused_tmp;
+ assign unused_tmp = tmp[31:WIDTH_COUNT];
+ logic [WIDTH_COUNT-1:0] dst_count_clk_d, dst_count_clk_q;
+ logic [WIDTH_COUNT-1:0] dst_count_clk_max_d, dst_count_clk_max_q;
+ logic count_exp;
+ assign count_exp = dst_count_clk_q == dst_count_clk_max_q;
+ always_comb begin
+ dst_count_clk_d = dst_count_clk_q;
+ dst_count_clk_max_d = dst_count_clk_max_q;
+ tmp = '0;
+ if (dst_req && count_exp) begin
+ // Clear counter
+ dst_count_clk_d = '0;
+ // Get new max
+ tmp = $random;
+ dst_count_clk_max_d = tmp[2:0];
+ end else if (dst_req) begin
+ // Increment
+ dst_count_clk_d = dst_count_clk_q + {{WIDTH_COUNT-1{1'b0}},{1'b1}};
+ end
+ end
+ always_ff @(posedge clk_dst or negedge rst_slow_n) begin : reg_dst_count_clk
+ if (!rst_slow_n) begin
+ dst_count_clk_q <= '0;
+ dst_count_clk_max_q <= '0;
+ end else begin
+ dst_count_clk_q <= dst_count_clk_d;
+ dst_count_clk_max_q <= dst_count_clk_max_d;
+ end
+ end
+
+ // Apply stimuli
+ always_comb begin
+
+ src_req = 1'b0;
+ dst_ack = 1'b0;
+
+ if (rst_done && rst_slow_n) begin
+ // The source wants to perform handshakes at maximum rate.
+ src_req = 1'b1;
+ end
+
+ if (dst_req && count_exp) begin
+ // The destination sends the ACK after a random delay.
+ dst_ack = 1'b1;
+ end
+ end
+
+ // Count handshakes on both sides
+ logic [WidthTrans-1:0] src_count_d, src_count_q;
+ logic [WidthTrans-1:0] dst_count_d, dst_count_q;
+ assign src_count_d = (src_req && src_ack) ? src_count_q + 1'b1 : src_count_q;
+ always_ff @(posedge clk_src or negedge rst_slow_n) begin : reg_src_count
+ if (!rst_slow_n) begin
+ src_count_q <= '0;
+ end else begin
+ src_count_q <= src_count_d;
+ end
+ end
+ assign dst_count_d = (dst_req && dst_ack) ? dst_count_q + 1'b1 : dst_count_q;
+ always_ff @(posedge clk_dst or negedge rst_slow_n) begin : reg_dst_count
+ if (!rst_slow_n) begin
+ dst_count_q <= '0;
+ end else begin
+ dst_count_q <= dst_count_d;
+ end
+ end
+
+ // Check responses, signal end of simulation
+ always_ff @(posedge clk_i) begin : tb_ctrl
+ test_done_o <= 1'b0;
+ test_passed_o <= 1'b1;
+
+ if ((src_count_q == NumTransactions[WidthTrans-1:0]) &&
+ (dst_count_q == NumTransactions[WidthTrans-1:0])) begin // Success
+
+ $display("\nSUCCESS: Performed %0d handshakes in both source and destination domain.",
+ NumTransactions);
+ $display("Finishing simulation now.\n");
+ test_passed_o <= 1'b1;
+ test_done_o <= 1'b1;
+ end else if (((src_count_q > dst_count_q) && ((src_count_q - dst_count_q) > 1)) ||
+ ((dst_count_q > src_count_q) && ((dst_count_q - src_count_q) > 1))) begin // Failed
+ $display("\nERROR: Performed %0d handshakes in source domain, and %0d in destination domain.",
+ src_count_q, dst_count_q);
+ $display("Finishing simulation now.\n");
+ test_passed_o <= 1'b0;
+ test_done_o <= 1'b1;
+ end
+ end
+
+endmodule
diff --git a/hw/ip/prim/prim.core b/hw/ip/prim/prim.core
index 5a396e6..20d0867 100644
--- a/hw/ip/prim/prim.core
+++ b/hw/ip/prim/prim.core
@@ -29,6 +29,7 @@
- rtl/prim_fifo_async.sv
- rtl/prim_fifo_sync.sv
- rtl/prim_flop_2sync.sv
+ - rtl/prim_sync_reqack.sv
- rtl/prim_keccak.sv
- rtl/prim_lfsr.sv
- rtl/prim_packer.sv
diff --git a/hw/ip/prim/rtl/prim_sync_reqack.sv b/hw/ip/prim/rtl/prim_sync_reqack.sv
new file mode 100644
index 0000000..5484898
--- /dev/null
+++ b/hw/ip/prim/rtl/prim_sync_reqack.sv
@@ -0,0 +1,158 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// REQ/ACK synchronizer
+//
+// This module synchronizes a REQ/ACK handshake across a clock domain crossing.
+// Both domains will see a handshake with the duration of one clock cycle.
+//
+// Notes:
+// - Once asserted, the source domain is not allowed to de-assert REQ without ACK.
+// - The destination domain is not allowed to send an ACK without a REQ.
+// - This module works both when syncing from a faster to a slower clock domain and vice versa.
+// - Internally, this module uses a return-to-zero, four-phase handshake protocol. Assuming the
+// destination side responds with an ACK immediately, the latency from asserting the REQ on the
+// source side is:
+// - 1 source + 2 destination clock cycles until the handshake is performed on the
+// destination side,
+// - 1 source + 2 destination + 1 destination + 2 source clock cycles until the handshake is
+// performed on the source side.
+// - It takes another round trip (3 source + 3 destination clock cycles) before the next
+// REQ is starting to be propagated to the destination side. The module is thus not suitable
+// for high-bandwidth communication.
+
+`include "prim_assert.sv"
+
+module prim_sync_reqack (
+ input clk_src_i, // REQ side, SRC domain
+ input rst_src_ni, // REQ side, SRC domain
+ input clk_dst_i, // ACK side, DST domain
+ input rst_dst_ni, // ACK side, DST domain
+
+ input logic src_req_i, // REQ side, SRC domain
+ output logic src_ack_o, // REQ side, SRC domain
+ output logic dst_req_o, // ACK side, DST domain
+ input logic dst_ack_i // ACK side, DST domain
+);
+
+ // Types
+ typedef enum logic {
+ HANDSHAKE, SYNC
+ } sync_reqack_fsm_e;
+
+ // Signals
+ sync_reqack_fsm_e src_fsm_ns, src_fsm_cs;
+ sync_reqack_fsm_e dst_fsm_ns, dst_fsm_cs;
+ logic src_req_d, src_req_q, src_ack;
+ logic dst_ack_d, dst_ack_q, dst_req;
+
+ // Move REQ over to ACK side.
+ prim_flop_2sync #(
+ .Width(1)
+ ) req_sync (
+ .clk_i (clk_dst_i),
+ .rst_ni (rst_dst_ni),
+ .d (src_req_q),
+ .q (dst_req)
+ );
+
+ // Move ACK over to REQ side.
+ prim_flop_2sync #(
+ .Width(1)
+ ) ack_sync (
+ .clk_i (clk_src_i),
+ .rst_ni (rst_src_ni),
+ .d (dst_ack_q),
+ .q (src_ack)
+ );
+
+ // REQ-side FSM (source domain)
+ always_comb begin : src_fsm
+ src_fsm_ns = src_fsm_cs;
+
+ // By default, we forward the REQ and ACK.
+ src_req_d = src_req_i;
+ src_ack_o = src_ack;
+
+ unique case (src_fsm_cs)
+
+ HANDSHAKE: begin
+ // The handshake on the REQ side is done for exactly 1 clock cycle.
+ if (src_req_i && src_ack) begin
+ src_fsm_ns = SYNC;
+ // Tell ACK side that we are done.
+ src_req_d = 1'b0;
+ end
+ end
+
+ SYNC: begin
+ // Make sure ACK side knows that we are done.
+ src_req_d = 1'b0;
+ src_ack_o = 1'b0;
+ if (!src_ack) begin
+ src_fsm_ns = HANDSHAKE;
+ end
+ end
+
+ default: ;
+ endcase
+ end
+
+ // ACK-side FSM (destination domain)
+ always_comb begin : dst_fsm
+ dst_fsm_ns = dst_fsm_cs;
+
+ // By default, we forward the REQ and ACK.
+ dst_req_o = dst_req;
+ dst_ack_d = dst_ack_i;
+
+ unique case (dst_fsm_cs)
+
+ HANDSHAKE: begin
+ // The handshake on the ACK side is done for exactly 1 clock cycle.
+ if (dst_req && dst_ack_i) begin
+ dst_fsm_ns = SYNC;
+ end
+ end
+
+ SYNC: begin
+ // Don't forward REQ, hold ACK, wait for REQ side.
+ dst_req_o = 1'b0;
+ dst_ack_d = 1'b1;
+ if (!dst_req) begin
+ dst_fsm_ns = HANDSHAKE;
+ end
+ end
+
+ default: ;
+ endcase
+ end
+
+ // Registers
+ always_ff @(posedge clk_src_i or negedge rst_src_ni) begin
+ if (!rst_src_ni) begin
+ src_fsm_cs <= HANDSHAKE;
+ src_req_q <= 1'b0;
+ end else begin
+ src_fsm_cs <= src_fsm_ns;
+ src_req_q <= src_req_d;
+ end
+ end
+ always_ff @(posedge clk_dst_i or negedge rst_dst_ni) begin
+ if (!rst_dst_ni) begin
+ dst_fsm_cs <= HANDSHAKE;
+ dst_ack_q <= 1'b0;
+ end else begin
+ dst_fsm_cs <= dst_fsm_ns;
+ dst_ack_q <= dst_ack_d;
+ end
+ end
+
+ // Source domain cannot de-assert REQ while waiting for ACK.
+ `ASSERT(ReqAckSyncHoldReq, $fell(src_req_i) |-> (src_fsm_cs != HANDSHAKE), clk_src_i, rst_src_ni)
+
+ // Destination domain cannot assert ACK without REQ.
+ `ASSERT(ReqAckSyncAckNeedsReq, dst_ack_i |-> dst_req_o, clk_dst_i, rst_dst_ni)
+
+endmodule