The option is controlled through xbar_main.hjson
By default all xbar componentns are marked with
'pipeline' = true
'pipeline_byp' = true
This means all elements (socket1n/socketm1) have both incoming and outgoing
FIFOs that are bypassable if the FIFO is empty
Setting `pipeline_byp` to false makes it a non-bypassable fifo and incurs
extra latency while giving better timing
Setting `pipeline` to false passes through the FIFO entirely and connects
input to output
The current scheme does the following:
remove pipelines for coreI/coreD to minimize process latency
remove pipeliens for ROM / RAM / FLASH to minimize instruction latency
keep pipelines to all other peripherals and memories
diff --git a/util/tlgen/elaborate.py b/util/tlgen/elaborate.py
index 2927fe7..71e95e1 100644
--- a/util/tlgen/elaborate.py
+++ b/util/tlgen/elaborate.py
@@ -82,7 +82,9 @@
new_node = Node(name="sm1_" + str(len(xbar.nodes)),
node_type=NodeType.SOCKET_M1,
clock=xbar.clock)
+ new_node.hdepth = 2
new_node.hpass = 2**len(node.us) - 1
+ new_node.ddepth = 2
new_node.dpass = 1
xbar.insert_node(new_node, node)
process_node(new_node, xbar)
@@ -93,7 +95,9 @@
new_node = Node(name="s1n_" + str(len(xbar.nodes)),
node_type=NodeType.SOCKET_1N,
clock=xbar.clock)
+ new_node.hdepth = 2
new_node.hpass = 1
+ new_node.ddepth = 2
new_node.dpass = 2**len(node.ds) - 1
xbar.insert_node(new_node, node)
@@ -105,42 +109,62 @@
def process_pipeline(xbar):
- """Check if HOST, DEVICE has pipeline key and is True, then propagate it to end
+ """Check if HOST, DEVICE has settings different from default, then propagate it to end
"""
for host in xbar.hosts:
- # go downstream and set the HReqPass at the first instance.
+ # go downstream and change the HReqPass/Depth at the first instance.
# If it is async, skip.
- # If Socket 1N, set hpass to 1 and skip
- # If Socket M1, find position of the host and set 1 of the bit in hpass skip
+ # If Socket 1N,
+ # if pipeline True and bypass false, set hpass to 0
+ # if pipeline is False, set depth to 0
+ # If Socket M1, find position of the host and follow procedure above
# If it is device, it means host and device are directly connected. Ignore now.
# After process node is done, always only one downstream exists in any host node
- if host.pipeline == False:
- # No need to process, default is Pass the req/rsp
+ if host.pipeline == True and host.pipeline_byp == True:
+ # No need to process, same as default
continue
+ no_bypass = (host.pipeline == True and host.pipeline_byp == False)
dnode = host.ds[0].ds
if dnode.node_type == NodeType.SOCKET_1N:
- dnode.hpass = 0
+ dnode.hpass = 0 if no_bypass else dnode.hpass
+
elif dnode.node_type == NodeType.SOCKET_M1:
idx = dnode.us.index(host.ds)
- dnode.hpass = dnode.hpass ^ (1 << idx)
+ dnode.hpass = dnode.hpass ^ (
+ 1 << idx) if no_bypass else dnode.hpass
+
+ # keep variables separate in case we ever need to differentiate
+ dnode.dpass = 0 if no_bypass else dnode.dpass
+ dnode.hdepth = 0 if host.pipeline == False else dnode.hdepth
+ dnode.ddepth = dnode.hdepth
for device in xbar.devices:
# go upstream and set DReq/RspPass at the first instance.
# If it is async, skip
- # If Socket 1N, set dpass to the bit position and skip
- # If Socket M1, set dpass to 1 and skip
+ # If Socket M1
+ # If pipeline True and bypass False, set dpass to 0
+ # If pipeline False, set depth to 0
+ # If Socket 1N, find position of the device and follow procedure above
# If it is host, ignore
- if device.pipeline == False:
+ if device.pipeline == True and device.pipeline_byp == True:
continue
+ no_bypass = (device.pipeline == True and device.pipeline_byp == False)
unode = device.us[0].us
if unode.node_type == NodeType.SOCKET_1N:
idx = unode.ds.index(device.us)
- unode.dpass = unode.dpass ^ (1 << idx)
+ unode.dpass = unode.dpass ^ (
+ 1 << idx) if no_bypass else unode.dpass
+
elif unode.node_type == NodeType.SOCKET_M1:
- unode.dpass = 0
+ unode.dpass = 0 if no_bypass else unode.dpass
+
+ # keep variables separate in case we ever need to differentiate
+ unode.hpass = 0 if no_bypass else unode.hpass
+ unode.ddepth = 0 if device.pipeline == False else unode.ddepth
+ unode.hdepth = unode.ddepth
return xbar
diff --git a/util/tlgen/item.py b/util/tlgen/item.py
index 116e275..b26b54e 100644
--- a/util/tlgen/item.py
+++ b/util/tlgen/item.py
@@ -54,9 +54,15 @@
# 1 for Host, Device, 2 for Async FIFO, N for Sockets
ds = [] # Edges
- # Req/Rsp Pass. default False
+ # Req/Rsp FIFO. default False
+ # when False, FIFO fully passthrough, no storage element
+ # when True, FIFO present with default depth, "pipeline_byp"
+ # controls passthrough option
pipeline = False
+ # FIFO passtru option. default True
+ pipeline_byp = True
+
def __init__(self, name, node_type, clock):
self.name = name
self.node_type = node_type
diff --git a/util/tlgen/validate.py b/util/tlgen/validate.py
index 653b81f..e573a67 100644
--- a/util/tlgen/validate.py
+++ b/util/tlgen/validate.py
@@ -78,6 +78,9 @@
node.pipeline = True if nodeobj["pipeline"].lower() in [
"true", "1"
] else False
+ node.pipeline_byp = True if nodeobj["pipeline_byp"].lower() in [
+ "true", "1"
+ ] else False
xbar.nodes.append(node)
# Edge
diff --git a/util/tlgen/xbar.rtl.tpl.sv b/util/tlgen/xbar.rtl.tpl.sv
index 0debad5..90685b6 100644
--- a/util/tlgen/xbar.rtl.tpl.sv
+++ b/util/tlgen/xbar.rtl.tpl.sv
@@ -184,18 +184,22 @@
% elif block.node_type.name == "SOCKET_1N":
tlul_socket_1n #(
% if block.hpass != 1:
- .HReqPass (1'b${block.hpass}),
- .HRspPass (1'b${block.hpass}),
+ .HReqPass (1'b${block.hpass}),
+ .HRspPass (1'b${block.hpass}),
+ % endif
+ % if block.hdepth != 2:
+ .HReqDepth (4'h${block.hdepth}),
+ .HRspDepth (4'h${block.hdepth}),
% endif
% if block.dpass != 2**(len(block.ds)) -1:
- .DReqPass (${len(block.ds)}'h ${"%x" % block.dpass}),
- .DRspPass (${len(block.ds)}'h ${"%x" % block.dpass}),
+ .DReqPass (${len(block.ds)}'h${"%x" % block.dpass}),
+ .DRspPass (${len(block.ds)}'h${"%x" % block.dpass}),
% endif
- ## //.HReqDepth(),
- ## //.HRspDepth(),
- ## //.DReqDepth(),
- ## //.DRspDepth(),
- .N (${len(block.ds)})
+ % if block.hdepth != 2:
+ .DReqDepth ({${len(block.ds)}{4'h${block.ddepth}}}),
+ .DRspDepth ({${len(block.ds)}{4'h${block.ddepth}}}),
+ % endif
+ .N (${len(block.ds)})
) u_${block.name} (
.clk_i (clk_${xbar.clock}_i),
.rst_ni (rst_${xbar.clock}_ni),
@@ -207,19 +211,23 @@
);
% elif block.node_type.name == "SOCKET_M1":
tlul_socket_m1 #(
- % if block.hpass != 2**(len(block.us)) -1:
- .HReqPass (${len(block.us)}'h ${"%x" % block.hpass}),
- .HRspPass (${len(block.us)}'h ${"%x" % block.hpass}),
+ % if block.hpass != 2**(len(block.us)) - 1:
+ .HReqPass (${len(block.us)}'h${"%x" % block.hpass}),
+ .HRspPass (${len(block.us)}'h${"%x" % block.hpass}),
% endif
- ## //.HReqDepth (),
- ## //.HRspDepth (),
+ % if block.hdepth != 2:
+ .HReqDepth ({${len(block.us)}{4'h${block.hdepth}}}),
+ .HRspDepth ({${len(block.us)}{4'h${block.hdepth}}}),
+ % endif
+ % if block.ddepth != 2:
+ .DReqDepth (4'h${block.ddepth}),
+ .DRspDepth (4'h${block.ddepth}),
+ % endif
% if block.dpass != 1:
- .DReqPass (1'b${block.dpass}),
- .DRspPass (1'b${block.dpass}),
+ .DReqPass (1'b${block.dpass}),
+ .DRspPass (1'b${block.dpass}),
% endif
- ## //.DReqDepth (),
- ## //.DRspDepth (),
- .M (${len(block.us)})
+ .M (${len(block.us)})
) u_${block.name} (
.clk_i (clk_${xbar.clock}_i),
.rst_ni (rst_${xbar.clock}_ni),