The option is controlled through xbar_main.hjson

By default all xbar componentns are marked with
'pipeline' = true
'pipeline_byp' = true

This means all elements (socket1n/socketm1) have both incoming and outgoing
FIFOs that are bypassable if the FIFO is empty

Setting `pipeline_byp` to false makes it a non-bypassable fifo and incurs
extra latency while giving better timing

Setting `pipeline` to false passes through the FIFO entirely and connects
input to output

The current scheme does the following:
remove pipelines for coreI/coreD to minimize process latency
remove pipeliens for ROM / RAM / FLASH to minimize instruction latency
keep pipelines to all other peripherals and memories
diff --git a/hw/ip/prim/rtl/prim_arbiter.sv b/hw/ip/prim/rtl/prim_arbiter.sv
index c9e7bd0..ae1aea5 100644
--- a/hw/ip/prim/rtl/prim_arbiter.sv
+++ b/hw/ip/prim/rtl/prim_arbiter.sv
@@ -7,6 +7,10 @@
 // verilog parameter
 //   N:  Number of request ports
 //   DW: Data width
+//
+// This arbiter implements a first come first serve scheme.
+// If the destination is not ready, the current winning request is held until transaction
+// is accepted.
 
 module prim_arbiter #(
   parameter N   = 4,
@@ -56,7 +60,7 @@
     if (!rst_ni) begin
       mask <= '0;
     end else if (arb_valid && arb_ready) begin
-      // Latch only when requests available
+      // Latch only when requests accepted
       mask <= mask_next;
     end else if (arb_valid && !arb_ready) begin
       // Downstream isn't yet ready so, keep current request alive. (First come first serve)
diff --git a/hw/ip/prim/rtl/prim_fifo_sync.sv b/hw/ip/prim/rtl/prim_fifo_sync.sv
index 185120c..5cc1b67 100644
--- a/hw/ip/prim/rtl/prim_fifo_sync.sv
+++ b/hw/ip/prim/rtl/prim_fifo_sync.sv
@@ -5,10 +5,11 @@
 // Generic synchronous fifo for use in a variety of devices.
 
 module prim_fifo_sync #(
-  parameter int unsigned Width  = 16,
-  parameter bit Pass            = 1'b1, // if == 1 allow requests to pass through empty FIFO
-  parameter int unsigned Depth  = 4,
-  parameter int unsigned DepthW = $clog2(Depth+1)  // derived parameter
+  parameter int unsigned Width       = 16,
+  parameter bit Pass                 = 1'b1, // if == 1 allow requests to pass through empty FIFO
+  parameter int unsigned Depth       = 4,
+  localparam int unsigned DepthWNorm = $clog2(Depth+1),
+  localparam int unsigned DepthW     = (DepthWNorm == 0) ? 1 : DepthWNorm // derived parameter
 ) (
   input                   clk_i,
   input                   rst_ni,
@@ -24,76 +25,93 @@
   output  [DepthW-1:0]    depth
 );
 
-  `ASSERT_INIT(paramCheckDepthW, DepthW == $clog2(Depth+1))
+  // FIFO is in complete passthrough mode
+  if (Depth == 0) begin : gen_passthru_fifo
+    `ASSERT_INIT(paramCheckPass, Pass == 1)
 
-  // consider Depth == 1 case when $clog2(1) == 0
-  localparam int unsigned PTRV_W    = $clog2(Depth) + ~|$clog2(Depth);
-  localparam int unsigned PTR_WIDTH = PTRV_W+1;
+    assign depth = 1'b0; //output is meaningless
 
-  logic [PTR_WIDTH-1:0] fifo_wptr, fifo_rptr;
-  logic                 fifo_incr_wptr, fifo_incr_rptr, fifo_empty;
+    // devie facing
+    assign rvalid = wvalid;
+    assign rdata = wdata;
 
-  // create the write and read pointers
-  logic  full, empty;
-  logic  wptr_msb;
-  logic  rptr_msb;
-  logic  [PTRV_W-1:0] wptr_value;
-  logic  [PTRV_W-1:0] rptr_value;
+    // host facing
+    assign wready = rready;
 
-  assign wptr_msb = fifo_wptr[PTR_WIDTH-1];
-  assign rptr_msb = fifo_rptr[PTR_WIDTH-1];
-  assign wptr_value = fifo_wptr[0+:PTRV_W];
-  assign rptr_value = fifo_rptr[0+:PTRV_W];
-  assign depth = (full)                 ? DepthW'(Depth) :
-                 (wptr_msb == rptr_msb) ? DepthW'(wptr_value) - DepthW'(rptr_value) :
-                 (DepthW'(Depth) - DepthW'(rptr_value) + DepthW'(wptr_value)) ;
+  // Normal FIFO construction
+  end else begin : gen_normal_fifo
+    `ASSERT_INIT(paramCheckDepthW, DepthW == $clog2(Depth+1))
 
-  assign fifo_incr_wptr = wvalid & wready;
-  assign fifo_incr_rptr = rvalid & rready;
+    // consider Depth == 1 case when $clog2(1) == 0
+    localparam int unsigned PTRV_W    = $clog2(Depth) + ~|$clog2(Depth);
+    localparam int unsigned PTR_WIDTH = PTRV_W+1;
 
-  assign wready = ~full;
-  assign rvalid = ~empty;
+    logic [PTR_WIDTH-1:0] fifo_wptr, fifo_rptr;
+    logic                 fifo_incr_wptr, fifo_incr_rptr, fifo_empty;
 
-  always_ff @(posedge clk_i or negedge rst_ni)
-    if (!rst_ni) begin
-      fifo_wptr <= {(PTR_WIDTH){1'b0}};
-    end else if (fifo_incr_wptr) begin
-      if (fifo_wptr[PTR_WIDTH-2:0] == (Depth-1)) begin
-        fifo_wptr <= {~fifo_wptr[PTR_WIDTH-1],{(PTR_WIDTH-1){1'b0}}};
-      end else begin
-        fifo_wptr <= fifo_wptr + {{(PTR_WIDTH-1){1'b0}},1'b1};
-    end
-  end
+    // create the write and read pointers
+    logic  full, empty;
+    logic  wptr_msb;
+    logic  rptr_msb;
+    logic  [PTRV_W-1:0] wptr_value;
+    logic  [PTRV_W-1:0] rptr_value;
 
-  always_ff @(posedge clk_i or negedge rst_ni)
-    if (!rst_ni) begin
-      fifo_rptr <= {(PTR_WIDTH){1'b0}};
-    end else if (fifo_incr_rptr) begin
-      if (fifo_rptr[PTR_WIDTH-2:0] == (Depth-1)) begin
-        fifo_rptr <= {~fifo_rptr[PTR_WIDTH-1],{(PTR_WIDTH-1){1'b0}}};
-      end else begin
-        fifo_rptr <= fifo_rptr + {{(PTR_WIDTH-1){1'b0}},1'b1};
-    end
-  end
+    assign wptr_msb = fifo_wptr[PTR_WIDTH-1];
+    assign rptr_msb = fifo_rptr[PTR_WIDTH-1];
+    assign wptr_value = fifo_wptr[0+:PTRV_W];
+    assign rptr_value = fifo_rptr[0+:PTRV_W];
+    assign depth = (full)                 ? DepthW'(Depth) :
+                   (wptr_msb == rptr_msb) ? DepthW'(wptr_value) - DepthW'(rptr_value) :
+                   (DepthW'(Depth) - DepthW'(rptr_value) + DepthW'(wptr_value)) ;
 
-  assign  full       = (fifo_wptr == (fifo_rptr ^ {1'b1,{(PTR_WIDTH-1){1'b0}}}));
-  assign  fifo_empty = (fifo_wptr ==  fifo_rptr);
+    assign fifo_incr_wptr = wvalid & wready;
+    assign fifo_incr_rptr = rvalid & rready;
 
-  logic [Width-1:0] storage [0:Depth-1];
+    assign wready = ~full;
+    assign rvalid = ~empty;
 
-  always_ff @(posedge clk_i)
-    if (fifo_incr_wptr) begin
-      storage[fifo_wptr[PTR_WIDTH-2:0]] <= wdata;
+    always_ff @(posedge clk_i or negedge rst_ni)
+      if (!rst_ni) begin
+        fifo_wptr <= {(PTR_WIDTH){1'b0}};
+      end else if (fifo_incr_wptr) begin
+        if (fifo_wptr[PTR_WIDTH-2:0] == (Depth-1)) begin
+          fifo_wptr <= {~fifo_wptr[PTR_WIDTH-1],{(PTR_WIDTH-1){1'b0}}};
+        end else begin
+          fifo_wptr <= fifo_wptr + {{(PTR_WIDTH-1){1'b0}},1'b1};
+      end
     end
 
-  if (Pass == 1'b1) begin : gen_pass
-    assign empty = fifo_empty & ~wvalid;
-    assign rdata = (fifo_empty && wvalid) ? wdata : storage[fifo_rptr[PTR_WIDTH-2:0]];
-  end else begin : gen_nopass
-    assign empty = fifo_empty;
-    assign rdata = storage[fifo_rptr[PTR_WIDTH-2:0]];
-  end
+    always_ff @(posedge clk_i or negedge rst_ni)
+      if (!rst_ni) begin
+        fifo_rptr <= {(PTR_WIDTH){1'b0}};
+      end else if (fifo_incr_rptr) begin
+        if (fifo_rptr[PTR_WIDTH-2:0] == (Depth-1)) begin
+          fifo_rptr <= {~fifo_rptr[PTR_WIDTH-1],{(PTR_WIDTH-1){1'b0}}};
+        end else begin
+          fifo_rptr <= fifo_rptr + {{(PTR_WIDTH-1){1'b0}},1'b1};
+      end
+    end
 
-  `ASSERT(depthShallNotExceedParamDepth, !empty |-> depth <= DepthW'(Depth), clk_i, !rst_ni)
+    assign  full       = (fifo_wptr == (fifo_rptr ^ {1'b1,{(PTR_WIDTH-1){1'b0}}}));
+    assign  fifo_empty = (fifo_wptr ==  fifo_rptr);
+
+    logic [Width-1:0] storage [0:Depth-1];
+
+    always_ff @(posedge clk_i)
+      if (fifo_incr_wptr) begin
+        storage[fifo_wptr[PTR_WIDTH-2:0]] <= wdata;
+      end
+
+    if (Pass == 1'b1) begin : gen_pass
+      assign empty = fifo_empty & ~wvalid;
+      assign rdata = (fifo_empty && wvalid) ? wdata : storage[fifo_rptr[PTR_WIDTH-2:0]];
+    end else begin : gen_nopass
+      assign empty = fifo_empty;
+      assign rdata = storage[fifo_rptr[PTR_WIDTH-2:0]];
+    end
+
+    `ASSERT(depthShallNotExceedParamDepth, !empty |-> depth <= DepthW'(Depth), clk_i, !rst_ni)
+  end // block: gen_normal_fifo
+
 
 endmodule
diff --git a/hw/ip/rv_core_ibex/rtl/rv_core_ibex.sv b/hw/ip/rv_core_ibex/rtl/rv_core_ibex.sv
index d6aed4a..7e97113 100644
--- a/hw/ip/rv_core_ibex/rtl/rv_core_ibex.sv
+++ b/hw/ip/rv_core_ibex/rtl/rv_core_ibex.sv
@@ -171,8 +171,8 @@
     if (!rst_ni) begin
       {tl_i_source, tl_d_source} <= '0;
     end else begin
-      if (instr_req_o) tl_i_source <= !tl_i_source;
-      if (data_req_o)  tl_d_source <= !tl_d_source;
+      if (instr_req_o && instr_gnt_i) tl_i_source <= !tl_i_source;
+      if (data_req_o && data_gnt_i)  tl_d_source <= !tl_d_source;
     end
   end
 
diff --git a/hw/ip/tlul/rtl/tlul_socket_1n.sv b/hw/ip/tlul/rtl/tlul_socket_1n.sv
index a26e1c5..9532e26 100644
--- a/hw/ip/tlul/rtl/tlul_socket_1n.sv
+++ b/hw/ip/tlul/rtl/tlul_socket_1n.sv
@@ -147,7 +147,8 @@
   always_comb begin
     hfifo_reqready = tl_u_i[N].a_ready; // default to error
     for (int idx = 0 ; idx < N ; idx++) begin
-      if (dev_select_outstanding == NWD'(idx)) hfifo_reqready = tl_u_i[idx].a_ready;
+      //if (dev_select_outstanding == NWD'(idx)) hfifo_reqready = tl_u_i[idx].a_ready;
+      if (dev_select_t == NWD'(idx)) hfifo_reqready = tl_u_i[idx].a_ready;
     end
     if (hold_all_requests) hfifo_reqready = 1'b0;
   end
diff --git a/hw/ip/tlul/rtl/tlul_socket_m1.sv b/hw/ip/tlul/rtl/tlul_socket_m1.sv
index fb2c796..5642a62 100644
--- a/hw/ip/tlul/rtl/tlul_socket_m1.sv
+++ b/hw/ip/tlul/rtl/tlul_socket_m1.sv
@@ -209,8 +209,8 @@
     drsp_fifo_o.d_source[IDW-1:STIDW]
   };
   for (genvar i = 0 ; i < M ; i++) begin : gen_idrouting
-    assign hfifo_rspvalid[i] = drsp_fifo_o.d_valid    && (drsp_fifo_o.d_source[0+:STIDW] == i);
-    assign dfifo_rspready[i] = hreq_fifo_o[i].d_ready && (drsp_fifo_o.d_source[0+:STIDW] == i);
+    assign hfifo_rspvalid[i] = drsp_fifo_o.d_valid    & (drsp_fifo_o.d_source[0+:STIDW] == i);
+    assign dfifo_rspready[i] = hreq_fifo_o[i].d_ready & (drsp_fifo_o.d_source[0+:STIDW] == i) & drsp_fifo_o.d_valid;
 
     assign hrsp_fifo_i[i] = '{
       d_valid:  hfifo_rspvalid[i],
diff --git a/hw/top_earlgrey/doc/top_earlgrey.gen.hjson b/hw/top_earlgrey/doc/top_earlgrey.gen.hjson
index 3a2fb3b..0c32870 100644
--- a/hw/top_earlgrey/doc/top_earlgrey.gen.hjson
+++ b/hw/top_earlgrey/doc/top_earlgrey.gen.hjson
@@ -330,102 +330,125 @@
           name: corei
           type: host
           clock: main
-          pipeline: "true"
+          pipeline: "false"
           inst_type: rv_core_ibex
+          pipeline_byp: "true"
         }
         {
           name: cored
           type: host
           clock: main
-          pipeline: "true"
+          pipeline: "false"
           inst_type: rv_core_ibex
+          pipeline_byp: "true"
         }
         {
           name: dm_sba
           type: host
           clock: main
-          pipeline: "true"
+          pipeline_byp: "false"
           inst_type: rv_dm
+          pipeline: "true"
         }
         {
           name: rom
           type: device
           clock: main
+          pipeline: "false"
           inst_type: ram_1p
           base_addr: 0x00008000
           size_byte: 0x2000
+          pipeline_byp: "true"
         }
         {
           name: debug_mem
           type: device
           clock: main
+          pipeline_byp: "false"
           inst_type: rv_dm
           base_addr: 0x1A110000
           size_byte: 0x1000
+          pipeline: "true"
         }
         {
           name: ram_main
           type: device
           clock: main
+          pipeline: "false"
           inst_type: ram_1p
           base_addr: 0x10000000
           size_byte: 0x10000
+          pipeline_byp: "true"
         }
         {
           name: eflash
           type: device
           clock: main
+          pipeline: "false"
           inst_type: eflash
           base_addr: 0x20000000
           size_byte: 0x80000
+          pipeline_byp: "true"
         }
         {
           name: uart
           type: device
           clock: main
+          pipeline_byp: "false"
           inst_type: uart
           base_addr: 0x40000000
           size_byte: 0x1000
+          pipeline: "true"
         }
         {
           name: gpio
           type: device
           clock: main
+          pipeline_byp: "false"
           inst_type: gpio
           base_addr: 0x40010000
           size_byte: 0x1000
+          pipeline: "true"
         }
         {
           name: spi_device
           type: device
           clock: main
+          pipeline_byp: "false"
           inst_type: spi_device
           base_addr: 0x40020000
           size_byte: 0x1000
+          pipeline: "true"
         }
         {
           name: flash_ctrl
           type: device
           clock: main
+          pipeline_byp: "false"
           inst_type: flash_ctrl
           base_addr: 0x40030000
           size_byte: 0x1000
+          pipeline: "true"
         }
         {
           name: rv_timer
           type: device
           clock: main
+          pipeline_byp: "false"
           inst_type: rv_timer
           base_addr: 0x40080000
           size_byte: 0x1000
+          pipeline: "true"
         }
         {
           name: hmac
           type: device
           clock: main
+          pipeline_byp: "false"
           inst_type: hmac
           base_addr: 0x40120000
           size_byte: 0x1000
+          pipeline: "true"
         }
         {
           name: rv_plic
@@ -434,6 +457,8 @@
           inst_type: rv_plic
           base_addr: 0x40090000
           size_byte: 0x1000
+          pipeline_byp: "false"
+          pipeline: "true"
         }
       ]
     }
diff --git a/hw/top_earlgrey/doc/xbar_main.hjson b/hw/top_earlgrey/doc/xbar_main.hjson
index b2ece63..6694c7c 100644
--- a/hw/top_earlgrey/doc/xbar_main.hjson
+++ b/hw/top_earlgrey/doc/xbar_main.hjson
@@ -6,59 +6,72 @@
       type:  "host",
       clock: "main",
 
-      pipeline: "true",
+      pipeline: "false"
+
     },
     { name:  "cored",
       type:  "host",
       clock: "main",
 
-      pipeline: "true",
+      pipeline: "false"
+
     },
     { name:  "dm_sba", // DM
       type:  "host",
       clock: "main",
 
-      pipeline: "true",
+      pipeline_byp: "false"
+
     },
     { name:      "rom",
       type:      "device",
       clock:     "main",
+      pipeline:  "false",
     },
     { name:      "debug_mem",
       type:      "device",
       clock:     "main",
+      pipeline_byp: "false"
     },
     { name:      "ram_main",
       type:      "device",
       clock:     "main",
+      pipeline:  "false",
     },
     { name:      "eflash",
       type:      "device",
       clock:     "main",
+      pipeline:  "false",
     },
     { name:      "uart",
       type:      "device",
       clock:     "main",
+      pipeline_byp: "false"
     },
     { name:      "gpio",
       type:      "device",
       clock:     "main",
+      pipeline_byp: "false"
     },
     { name:      "spi_device",
       type:      "device",
       clock:     "main",
+      pipeline_byp: "false"
     },
     { name:      "flash_ctrl",
       type:      "device",
-      clock:     "main"
+      clock:     "main",
+      pipeline_byp: "false"
     },
     { name:      "rv_timer",
       type:      "device",
-      clock:     "main"
+      clock:     "main",
+      pipeline_byp: "false"
     },
     { name:      "hmac",
       type:      "device",
       clock:     "main"
+      pipeline_byp: "false"
     },
     { name:      "rv_plic",
       type:      "device",
@@ -66,6 +79,7 @@
       inst_type: "rv_plic",
       base_addr: "0x40090000",
       size_byte: "0x1000",
+      pipeline_byp: "false"
     },
   ],
   connections: {
diff --git a/hw/top_earlgrey/rtl/xbar_main.sv b/hw/top_earlgrey/rtl/xbar_main.sv
index ae2d3da..5287824 100644
--- a/hw/top_earlgrey/rtl/xbar_main.sv
+++ b/hw/top_earlgrey/rtl/xbar_main.sv
@@ -402,9 +402,11 @@
 
   // Instantiation phase
   tlul_socket_1n #(
-    .HReqPass (1'b0),
-    .HRspPass (1'b0),
-    .N        (4)
+    .HReqDepth (4'h0),
+    .HRspDepth (4'h0),
+    .DReqDepth ({4{4'h0}}),
+    .DRspDepth ({4{4'h0}}),
+    .N         (4)
   ) u_s1n_14 (
     .clk_i        (clk_main_i),
     .rst_ni       (rst_main_ni),
@@ -415,7 +417,11 @@
     .dev_select   (dev_sel_s1n_14)
   );
   tlul_socket_m1 #(
-    .M            (3)
+    .HReqDepth ({3{4'h0}}),
+    .HRspDepth ({3{4'h0}}),
+    .DReqDepth (4'h0),
+    .DRspDepth (4'h0),
+    .M         (3)
   ) u_sm1_15 (
     .clk_i        (clk_main_i),
     .rst_ni       (rst_main_ni),
@@ -425,7 +431,11 @@
     .tl_d_i       (tl_sm1_15_ds_d2h)
   );
   tlul_socket_m1 #(
-    .M            (2)
+    .HReqPass  (2'h0),
+    .HRspPass  (2'h0),
+    .DReqPass  (1'b0),
+    .DRspPass  (1'b0),
+    .M         (2)
   ) u_sm1_16 (
     .clk_i        (clk_main_i),
     .rst_ni       (rst_main_ni),
@@ -435,7 +445,11 @@
     .tl_d_i       (tl_sm1_16_ds_d2h)
   );
   tlul_socket_m1 #(
-    .M            (3)
+    .HReqDepth ({3{4'h0}}),
+    .HRspDepth ({3{4'h0}}),
+    .DReqDepth (4'h0),
+    .DRspDepth (4'h0),
+    .M         (3)
   ) u_sm1_17 (
     .clk_i        (clk_main_i),
     .rst_ni       (rst_main_ni),
@@ -445,7 +459,11 @@
     .tl_d_i       (tl_sm1_17_ds_d2h)
   );
   tlul_socket_m1 #(
-    .M            (3)
+    .HReqDepth ({3{4'h0}}),
+    .HRspDepth ({3{4'h0}}),
+    .DReqDepth (4'h0),
+    .DRspDepth (4'h0),
+    .M         (3)
   ) u_sm1_18 (
     .clk_i        (clk_main_i),
     .rst_ni       (rst_main_ni),
@@ -455,9 +473,11 @@
     .tl_d_i       (tl_sm1_18_ds_d2h)
   );
   tlul_socket_1n #(
-    .HReqPass (1'b0),
-    .HRspPass (1'b0),
-    .N        (11)
+    .HReqDepth (4'h0),
+    .HRspDepth (4'h0),
+    .DReqDepth ({11{4'h0}}),
+    .DRspDepth ({11{4'h0}}),
+    .N         (11)
   ) u_s1n_19 (
     .clk_i        (clk_main_i),
     .rst_ni       (rst_main_ni),
@@ -468,7 +488,11 @@
     .dev_select   (dev_sel_s1n_19)
   );
   tlul_socket_m1 #(
-    .M            (2)
+    .HReqPass  (2'h0),
+    .HRspPass  (2'h0),
+    .DReqPass  (1'b0),
+    .DRspPass  (1'b0),
+    .M         (2)
   ) u_sm1_20 (
     .clk_i        (clk_main_i),
     .rst_ni       (rst_main_ni),
@@ -478,7 +502,11 @@
     .tl_d_i       (tl_sm1_20_ds_d2h)
   );
   tlul_socket_m1 #(
-    .M            (2)
+    .HReqPass  (2'h0),
+    .HRspPass  (2'h0),
+    .DReqPass  (1'b0),
+    .DRspPass  (1'b0),
+    .M         (2)
   ) u_sm1_21 (
     .clk_i        (clk_main_i),
     .rst_ni       (rst_main_ni),
@@ -488,7 +516,11 @@
     .tl_d_i       (tl_sm1_21_ds_d2h)
   );
   tlul_socket_m1 #(
-    .M            (2)
+    .HReqPass  (2'h0),
+    .HRspPass  (2'h0),
+    .DReqPass  (1'b0),
+    .DRspPass  (1'b0),
+    .M         (2)
   ) u_sm1_22 (
     .clk_i        (clk_main_i),
     .rst_ni       (rst_main_ni),
@@ -498,7 +530,11 @@
     .tl_d_i       (tl_sm1_22_ds_d2h)
   );
   tlul_socket_m1 #(
-    .M            (2)
+    .HReqPass  (2'h0),
+    .HRspPass  (2'h0),
+    .DReqPass  (1'b0),
+    .DRspPass  (1'b0),
+    .M         (2)
   ) u_sm1_23 (
     .clk_i        (clk_main_i),
     .rst_ni       (rst_main_ni),
@@ -508,7 +544,11 @@
     .tl_d_i       (tl_sm1_23_ds_d2h)
   );
   tlul_socket_m1 #(
-    .M            (2)
+    .HReqPass  (2'h0),
+    .HRspPass  (2'h0),
+    .DReqPass  (1'b0),
+    .DRspPass  (1'b0),
+    .M         (2)
   ) u_sm1_24 (
     .clk_i        (clk_main_i),
     .rst_ni       (rst_main_ni),
@@ -518,7 +558,11 @@
     .tl_d_i       (tl_sm1_24_ds_d2h)
   );
   tlul_socket_m1 #(
-    .M            (2)
+    .HReqPass  (2'h0),
+    .HRspPass  (2'h0),
+    .DReqPass  (1'b0),
+    .DRspPass  (1'b0),
+    .M         (2)
   ) u_sm1_25 (
     .clk_i        (clk_main_i),
     .rst_ni       (rst_main_ni),
@@ -528,7 +572,11 @@
     .tl_d_i       (tl_sm1_25_ds_d2h)
   );
   tlul_socket_m1 #(
-    .M            (2)
+    .HReqPass  (2'h0),
+    .HRspPass  (2'h0),
+    .DReqPass  (1'b0),
+    .DRspPass  (1'b0),
+    .M         (2)
   ) u_sm1_26 (
     .clk_i        (clk_main_i),
     .rst_ni       (rst_main_ni),
@@ -538,9 +586,11 @@
     .tl_d_i       (tl_sm1_26_ds_d2h)
   );
   tlul_socket_1n #(
-    .HReqPass (1'b0),
-    .HRspPass (1'b0),
-    .N        (10)
+    .HReqPass  (1'b0),
+    .HRspPass  (1'b0),
+    .DReqPass  (10'h0),
+    .DRspPass  (10'h0),
+    .N         (10)
   ) u_s1n_27 (
     .clk_i        (clk_main_i),
     .rst_ni       (rst_main_ni),
diff --git a/util/tlgen/elaborate.py b/util/tlgen/elaborate.py
index 2927fe7..71e95e1 100644
--- a/util/tlgen/elaborate.py
+++ b/util/tlgen/elaborate.py
@@ -82,7 +82,9 @@
         new_node = Node(name="sm1_" + str(len(xbar.nodes)),
                         node_type=NodeType.SOCKET_M1,
                         clock=xbar.clock)
+        new_node.hdepth = 2
         new_node.hpass = 2**len(node.us) - 1
+        new_node.ddepth = 2
         new_node.dpass = 1
         xbar.insert_node(new_node, node)
         process_node(new_node, xbar)
@@ -93,7 +95,9 @@
         new_node = Node(name="s1n_" + str(len(xbar.nodes)),
                         node_type=NodeType.SOCKET_1N,
                         clock=xbar.clock)
+        new_node.hdepth = 2
         new_node.hpass = 1
+        new_node.ddepth = 2
         new_node.dpass = 2**len(node.ds) - 1
         xbar.insert_node(new_node, node)
 
@@ -105,42 +109,62 @@
 
 
 def process_pipeline(xbar):
-    """Check if HOST, DEVICE has pipeline key and is True, then propagate it to end
+    """Check if HOST, DEVICE has settings different from default, then propagate it to end
     """
     for host in xbar.hosts:
-        # go downstream and set the HReqPass at the first instance.
+        # go downstream and change the HReqPass/Depth at the first instance.
         # If it is async, skip.
-        # If Socket 1N, set hpass to 1 and skip
-        # If Socket M1, find position of the host and set 1 of the bit in hpass skip
+        # If Socket 1N,
+        #    if pipeline True and bypass false, set hpass to 0
+        #    if pipeline is False, set depth to 0
+        # If Socket M1, find position of the host and follow procedure above
         # If it is device, it means host and device are directly connected. Ignore now.
 
         # After process node is done, always only one downstream exists in any host node
-        if host.pipeline == False:
-            # No need to process, default is Pass the req/rsp
+        if host.pipeline == True and host.pipeline_byp == True:
+            # No need to process, same as default
             continue
 
+        no_bypass = (host.pipeline == True and host.pipeline_byp == False)
         dnode = host.ds[0].ds
         if dnode.node_type == NodeType.SOCKET_1N:
-            dnode.hpass = 0
+            dnode.hpass = 0 if no_bypass else dnode.hpass
+
         elif dnode.node_type == NodeType.SOCKET_M1:
             idx = dnode.us.index(host.ds)
-            dnode.hpass = dnode.hpass ^ (1 << idx)
+            dnode.hpass = dnode.hpass ^ (
+                1 << idx) if no_bypass else dnode.hpass
+
+        # keep variables separate in case we ever need to differentiate
+        dnode.dpass = 0 if no_bypass else dnode.dpass
+        dnode.hdepth = 0 if host.pipeline == False else dnode.hdepth
+        dnode.ddepth = dnode.hdepth
 
     for device in xbar.devices:
         # go upstream and set DReq/RspPass at the first instance.
         # If it is async, skip
-        # If Socket 1N, set dpass to the bit position and skip
-        # If Socket M1, set dpass to 1 and skip
+        # If Socket M1
+        #    If pipeline True and bypass False, set dpass to 0
+        #    If pipeline False, set depth to 0
+        # If Socket 1N, find position of the device and follow procedure above
         # If it is host, ignore
 
-        if device.pipeline == False:
+        if device.pipeline == True and device.pipeline_byp == True:
             continue
 
+        no_bypass = (device.pipeline == True and device.pipeline_byp == False)
         unode = device.us[0].us
         if unode.node_type == NodeType.SOCKET_1N:
             idx = unode.ds.index(device.us)
-            unode.dpass = unode.dpass ^ (1 << idx)
+            unode.dpass = unode.dpass ^ (
+                1 << idx) if no_bypass else unode.dpass
+
         elif unode.node_type == NodeType.SOCKET_M1:
-            unode.dpass = 0
+            unode.dpass = 0 if no_bypass else unode.dpass
+
+        # keep variables separate in case we ever need to differentiate
+        unode.hpass = 0 if no_bypass else unode.hpass
+        unode.ddepth = 0 if device.pipeline == False else unode.ddepth
+        unode.hdepth = unode.ddepth
 
     return xbar
diff --git a/util/tlgen/item.py b/util/tlgen/item.py
index 116e275..b26b54e 100644
--- a/util/tlgen/item.py
+++ b/util/tlgen/item.py
@@ -54,9 +54,15 @@
     # 1 for Host, Device, 2 for Async FIFO, N for Sockets
     ds = []  # Edges
 
-    # Req/Rsp Pass. default False
+    # Req/Rsp FIFO. default False
+    # when False, FIFO fully passthrough, no storage element
+    # when True, FIFO present with default depth, "pipeline_byp"
+    # controls passthrough option
     pipeline = False
 
+    # FIFO passtru option. default True
+    pipeline_byp = True
+
     def __init__(self, name, node_type, clock):
         self.name = name
         self.node_type = node_type
diff --git a/util/tlgen/validate.py b/util/tlgen/validate.py
index 653b81f..e573a67 100644
--- a/util/tlgen/validate.py
+++ b/util/tlgen/validate.py
@@ -78,6 +78,9 @@
             node.pipeline = True if nodeobj["pipeline"].lower() in [
                 "true", "1"
             ] else False
+            node.pipeline_byp = True if nodeobj["pipeline_byp"].lower() in [
+                "true", "1"
+            ] else False
         xbar.nodes.append(node)
 
     # Edge
diff --git a/util/tlgen/xbar.rtl.tpl.sv b/util/tlgen/xbar.rtl.tpl.sv
index 0debad5..90685b6 100644
--- a/util/tlgen/xbar.rtl.tpl.sv
+++ b/util/tlgen/xbar.rtl.tpl.sv
@@ -184,18 +184,22 @@
   % elif block.node_type.name == "SOCKET_1N":
   tlul_socket_1n #(
     % if block.hpass != 1:
-    .HReqPass (1'b${block.hpass}),
-    .HRspPass (1'b${block.hpass}),
+    .HReqPass  (1'b${block.hpass}),
+    .HRspPass  (1'b${block.hpass}),
+    % endif
+    % if block.hdepth != 2:
+    .HReqDepth (4'h${block.hdepth}),
+    .HRspDepth (4'h${block.hdepth}),
     % endif
     % if block.dpass != 2**(len(block.ds)) -1:
-    .DReqPass (${len(block.ds)}'h ${"%x" % block.dpass}),
-    .DRspPass (${len(block.ds)}'h ${"%x" % block.dpass}),
+    .DReqPass  (${len(block.ds)}'h${"%x" % block.dpass}),
+    .DRspPass  (${len(block.ds)}'h${"%x" % block.dpass}),
     % endif
-    ## //.HReqDepth(),
-    ## //.HRspDepth(),
-    ## //.DReqDepth(),
-    ## //.DRspDepth(),
-    .N        (${len(block.ds)})
+    % if block.hdepth != 2:
+    .DReqDepth ({${len(block.ds)}{4'h${block.ddepth}}}),
+    .DRspDepth ({${len(block.ds)}{4'h${block.ddepth}}}),
+    % endif
+    .N         (${len(block.ds)})
   ) u_${block.name} (
     .clk_i        (clk_${xbar.clock}_i),
     .rst_ni       (rst_${xbar.clock}_ni),
@@ -207,19 +211,23 @@
   );
   % elif block.node_type.name == "SOCKET_M1":
   tlul_socket_m1 #(
-    % if block.hpass != 2**(len(block.us)) -1:
-    .HReqPass     (${len(block.us)}'h ${"%x" % block.hpass}),
-    .HRspPass     (${len(block.us)}'h ${"%x" % block.hpass}),
+    % if block.hpass != 2**(len(block.us)) - 1:
+    .HReqPass  (${len(block.us)}'h${"%x" % block.hpass}),
+    .HRspPass  (${len(block.us)}'h${"%x" % block.hpass}),
     % endif
-    ## //.HReqDepth    (),
-    ## //.HRspDepth    (),
+    % if block.hdepth != 2:
+    .HReqDepth ({${len(block.us)}{4'h${block.hdepth}}}),
+    .HRspDepth ({${len(block.us)}{4'h${block.hdepth}}}),
+    % endif
+    % if block.ddepth != 2:
+    .DReqDepth (4'h${block.ddepth}),
+    .DRspDepth (4'h${block.ddepth}),
+    % endif
     % if block.dpass != 1:
-    .DReqPass     (1'b${block.dpass}),
-    .DRspPass     (1'b${block.dpass}),
+    .DReqPass  (1'b${block.dpass}),
+    .DRspPass  (1'b${block.dpass}),
     % endif
-    ## //.DReqDepth    (),
-    ## //.DRspDepth    (),
-    .M            (${len(block.us)})
+    .M         (${len(block.us)})
   ) u_${block.name} (
     .clk_i        (clk_${xbar.clock}_i),
     .rst_ni       (rst_${xbar.clock}_ni),
diff --git a/util/topgen/merge.py b/util/topgen/merge.py
index e7bfb95..fcfc3e8 100644
--- a/util/topgen/merge.py
+++ b/util/topgen/merge.py
@@ -162,7 +162,10 @@
             "clock": xbar["clock"],
             "type": "host",
             "inst_type": "",
-            "pipeline": "false"
+            # The default matches RTL default
+            # pipeline_byp is don't care if pipeline is false
+            "pipeline": "true",
+            "pipeline_byp": "true"
         }
         topxbar["nodes"].append(obj)
     else:
@@ -170,8 +173,16 @@
         obj[0]["inst_type"] = predefined_modules[
             host] if host in predefined_modules else ""
         obj[0]["pipeline"] = obj[0]["pipeline"] if "pipeline" in obj[
-            0] else "false"
+            0] else "true"
+        obj[0]["pipeline_byp"] = obj[0]["pipeline_byp"] if obj[0]["pipeline"] == "true" and "pipeline_byp" in obj[0] else "true"
 
+def process_pipeline_var(node):
+    """Add device nodes pipeline / pipeline_byp information
+
+    - Supply a default of true / true if not defined by xbar
+    """
+    node["pipeline"] = node["pipeline"] if "pipeline" in node else "true"
+    node["pipeline_byp"] = node["pipeline_byp"] if "pipeline_byp" in node else "true"
 
 def xbar_adddevice(top, xbar, device):
     """Add device nodes information
@@ -209,7 +220,9 @@
                         "clock": "main",
                         "inst_type": predefined_modules["debug_mem"],
                         "base_addr": top["debug_mem_base_addr"],
-                        "size_byte": "0x1000"
+                        "size_byte": "0x1000",
+                        "pipeline" : "true",
+                        "pipeline_byp" : "true"
                     }) # yapf: disable
                 else:
                     # Update if exists
@@ -217,6 +230,7 @@
                     node["inst_type"] = predefined_modules["debug_mem"]
                     node["base_addr"] = top["debug_mem_base_addr"]
                     node["size_byte"] = "0x1000"
+                    process_pipeline_var(node)
             else:
                 log.error("device %s shouldn't be host type" % device)
                 return
@@ -236,7 +250,9 @@
             "clock" : deviceobj[0]["clock"],
             "inst_type" : deviceobj[0]["type"],
             "base_addr" : deviceobj[0]["base_addr"],
-            "size_byte": deviceobj[0]["size"]
+            "size_byte": deviceobj[0]["size"],
+            "pipeline" : "true",
+            "pipeline_byp" : "true"
         }) # yapf: disable
 
     else:
@@ -245,6 +261,7 @@
         node["inst_type"] = deviceobj[0]["type"]
         node["base_addr"] = deviceobj[0]["base_addr"]
         node["size_byte"] = deviceobj[0]["size"]
+        process_pipeline_var(node)
 
 
 def amend_xbar(top, xbar):