refactor(soc): Introduce unified KelvinChiselSubsystem

This commit introduces a major architectural refactoring by creating a unified `KelvinChiselSubsystem`. This new top-level module programmatically instantiates and connects all Chisel-based components of the SoC, including the RV core, crossbar, and peripherals like the Spi2TLUL bridge.

Key changes:
- **`SoCChiselConfig.scala`**: A new central configuration file that defines the modules to be instantiated, their parameters, and their connections. This allows for a more flexible and maintainable SoC architecture.
- **`KelvinChiselSubsystem.scala`**: The new top-level module that reads the configuration and builds the hardware graph, connecting modules to the crossbar and exposing external ports.
- **`CrossbarConfig.scala`**: Updated to be more dynamic. It now sources its configuration from `SoCChiselConfig` and supports a test harness mode. The Ibex core hosts have been removed and replaced by the `spi2tlul` host and a generic 32-bit test host.
- **Tests**:
    - The existing crossbar tests (`kelvin_xbar_test.py`) have been updated to use the new test harness and reflect the new port mapping.
    - A new subsystem-level test suite (`test_subsystem.py`) has been added to verify the integrated subsystem. These tests demonstrate loading and executing an ELF file on the core via both a TL-UL test host and the SPI bridge.

This refactoring simplifies the top-level Verilog (`kelvin_soc.sv`) and provides a more scalable and configurable way to build the SoC.

Change-Id: Ib8249023d3df3da32a62b00006e103b5f8236f4f
diff --git a/fpga/ip/sram/Sram.sv b/fpga/ip/sram/Sram.sv
index 6cc1e3f..1cd1f3d 100644
--- a/fpga/ip/sram/Sram.sv
+++ b/fpga/ip/sram/Sram.sv
@@ -37,4 +37,4 @@
 
   localparam MemInitFile = "";
 `include "prim_util_memload.svh"
-endmodule
\ No newline at end of file
+endmodule
diff --git a/fpga/rtl/kelvin_soc.sv b/fpga/rtl/kelvin_soc.sv
index 479f887..37de707 100644
--- a/fpga/rtl/kelvin_soc.sv
+++ b/fpga/rtl/kelvin_soc.sv
@@ -58,10 +58,6 @@
   KelvinXbar i_xbar(
     .io_clk_i(clk_i),
     .io_rst_ni(rst_ni),
-    .io_async_ports_devices_0_clock(spi_clk_i),
-    .io_async_ports_devices_0_reset(rst_ni),
-    .io_async_ports_hosts_0_clock(ibex_clk_i),
-    .io_async_ports_hosts_0_reset(ibex_rst_ni),
 
     // Host connections
     .io_hosts_0_a_valid(tl_kelvin_core_i.a_valid),
@@ -77,32 +73,7 @@
     .io_hosts_0_a_bits_user_cmd_intg(tl_kelvin_core_i.a_user.cmd_intg),
     .io_hosts_0_a_bits_user_data_intg(tl_kelvin_core_i.a_user.data_intg),
     .io_hosts_0_d_ready(tl_kelvin_core_i.d_ready),
-    .io_hosts_1_a_valid(tl_ibex_core_i_o_32.a_valid),
-    .io_hosts_1_a_bits_opcode(tl_ibex_core_i_o_32.a_opcode),
-    .io_hosts_1_a_bits_param(tl_ibex_core_i_o_32.a_param),
-    .io_hosts_1_a_bits_size(tl_ibex_core_i_o_32.a_size),
-    .io_hosts_1_a_bits_source(tl_ibex_core_i_o_32.a_source),
-    .io_hosts_1_a_bits_address(tl_ibex_core_i_o_32.a_address),
-    .io_hosts_1_a_bits_mask(tl_ibex_core_i_o_32.a_mask),
-    .io_hosts_1_a_bits_data(tl_ibex_core_i_o_32.a_data),
-    .io_hosts_1_a_bits_user_rsvd(tl_ibex_core_i_o_32.a_user.rsvd),
-    .io_hosts_1_a_bits_user_instr_type(tl_ibex_core_i_o_32.a_user.instr_type),
-    .io_hosts_1_a_bits_user_cmd_intg(tl_ibex_core_i_o_32.a_user.cmd_intg),
-    .io_hosts_1_a_bits_user_data_intg(tl_ibex_core_i_o_32.a_user.data_intg),
-    .io_hosts_1_d_ready(tl_ibex_core_i_o_32.d_ready),
-    .io_hosts_2_a_valid(tl_ibex_core_d_o_32.a_valid),
-    .io_hosts_2_a_bits_opcode(tl_ibex_core_d_o_32.a_opcode),
-    .io_hosts_2_a_bits_param(tl_ibex_core_d_o_32.a_param),
-    .io_hosts_2_a_bits_size(tl_ibex_core_d_o_32.a_size),
-    .io_hosts_2_a_bits_source(tl_ibex_core_d_o_32.a_source),
-    .io_hosts_2_a_bits_address(tl_ibex_core_d_o_32.a_address),
-    .io_hosts_2_a_bits_mask(tl_ibex_core_d_o_32.a_mask),
-    .io_hosts_2_a_bits_data(tl_ibex_core_d_o_32.a_data),
-    .io_hosts_2_a_bits_user_rsvd(tl_ibex_core_d_o_32.a_user.rsvd),
-    .io_hosts_2_a_bits_user_instr_type(tl_ibex_core_d_o_32.a_user.instr_type),
-    .io_hosts_2_a_bits_user_cmd_intg(tl_ibex_core_d_o_32.a_user.cmd_intg),
-    .io_hosts_2_a_bits_user_data_intg(tl_ibex_core_d_o_32.a_user.data_intg),
-    .io_hosts_2_d_ready(tl_ibex_core_d_o_32.d_ready),
+    .io_hosts_1_a_valid(1'b0),
 
     // Host response connections
     .io_hosts_0_a_ready(tl_kelvin_core_o.a_ready),
@@ -116,28 +87,7 @@
     .io_hosts_0_d_bits_error(tl_kelvin_core_o.d_error),
     .io_hosts_0_d_bits_user_rsp_intg(tl_kelvin_core_o.d_user.rsp_intg),
     .io_hosts_0_d_bits_user_data_intg(tl_kelvin_core_o.d_user.data_intg),
-    .io_hosts_1_a_ready(tl_ibex_core_i_i_32.a_ready),
-    .io_hosts_1_d_valid(tl_ibex_core_i_i_32.d_valid),
-    .io_hosts_1_d_bits_opcode(tl_ibex_core_i_i_32.d_opcode),
-    .io_hosts_1_d_bits_param(tl_ibex_core_i_i_32.d_param),
-    .io_hosts_1_d_bits_size(tl_ibex_core_i_i_32.d_size),
-    .io_hosts_1_d_bits_source(tl_ibex_core_i_i_32.d_source),
-    .io_hosts_1_d_bits_sink(tl_ibex_core_i_i_32.d_sink),
-    .io_hosts_1_d_bits_data(tl_ibex_core_i_i_32.d_data),
-    .io_hosts_1_d_bits_error(tl_ibex_core_i_i_32.d_error),
-    .io_hosts_1_d_bits_user_rsp_intg(tl_ibex_core_i_i_32.d_user.rsp_intg),
-    .io_hosts_1_d_bits_user_data_intg(tl_ibex_core_i_i_32.d_user.data_intg),
-    .io_hosts_2_a_ready(tl_ibex_core_d_i_32.a_ready),
-    .io_hosts_2_d_valid(tl_ibex_core_d_i_32.d_valid),
-    .io_hosts_2_d_bits_opcode(tl_ibex_core_d_i_32.d_opcode),
-    .io_hosts_2_d_bits_param(tl_ibex_core_d_i_32.d_param),
-    .io_hosts_2_d_bits_size(tl_ibex_core_d_i_32.d_size),
-    .io_hosts_2_d_bits_source(tl_ibex_core_d_i_32.d_source),
-    .io_hosts_2_d_bits_sink(tl_ibex_core_d_i_32.d_sink),
-    .io_hosts_2_d_bits_data(tl_ibex_core_d_i_32.d_data),
-    .io_hosts_2_d_bits_error(tl_ibex_core_d_i_32.d_error),
-    .io_hosts_2_d_bits_user_rsp_intg(tl_ibex_core_d_i_32.d_user.rsp_intg),
-    .io_hosts_2_d_bits_user_data_intg(tl_ibex_core_d_i_32.d_user.data_intg),
+    .io_hosts_1_d_ready(1'b0),
 
     // Device connections
     .io_devices_0_a_ready(tl_kelvin_device_i.a_ready),
@@ -195,17 +145,6 @@
     .io_devices_4_d_bits_error(tl_uart1_i.d_error),
     .io_devices_4_d_bits_user_rsp_intg(tl_uart1_i.d_user.rsp_intg),
     .io_devices_4_d_bits_user_data_intg(tl_uart1_i.d_user.data_intg),
-    .io_devices_5_a_ready(tl_spi0_i.a_ready),
-    .io_devices_5_d_valid(tl_spi0_i.d_valid),
-    .io_devices_5_d_bits_opcode(tl_spi0_i.d_opcode),
-    .io_devices_5_d_bits_param(tl_spi0_i.d_param),
-    .io_devices_5_d_bits_size(tl_spi0_i.d_size),
-    .io_devices_5_d_bits_source(tl_spi0_i.d_source),
-    .io_devices_5_d_bits_sink(tl_spi0_i.d_sink),
-    .io_devices_5_d_bits_data(tl_spi0_i.d_data),
-    .io_devices_5_d_bits_error(tl_spi0_i.d_error),
-    .io_devices_5_d_bits_user_rsp_intg(tl_spi0_i.d_user.rsp_intg),
-    .io_devices_5_d_bits_user_data_intg(tl_spi0_i.d_user.data_intg),
 
     // Device response connections
     .io_devices_0_a_valid(tl_kelvin_device_o.a_valid),
@@ -272,20 +211,7 @@
     .io_devices_4_a_bits_user_instr_type(tl_uart1_o.a_user.instr_type),
     .io_devices_4_a_bits_user_cmd_intg(tl_uart1_o.a_user.cmd_intg),
     .io_devices_4_a_bits_user_data_intg(tl_uart1_o.a_user.data_intg),
-    .io_devices_4_d_ready(tl_uart1_o.d_ready),
-    .io_devices_5_a_valid(tl_spi0_o.a_valid),
-    .io_devices_5_a_bits_opcode(tl_spi0_o.a_opcode),
-    .io_devices_5_a_bits_param(tl_spi0_o.a_param),
-    .io_devices_5_a_bits_size(tl_spi0_o.a_size),
-    .io_devices_5_a_bits_source(tl_spi0_o.a_source),
-    .io_devices_5_a_bits_address(tl_spi0_o.a_address),
-    .io_devices_5_a_bits_mask(tl_spi0_o.a_mask),
-    .io_devices_5_a_bits_data(tl_spi0_o.a_data),
-    .io_devices_5_a_bits_user_rsvd(tl_spi0_o.a_user.rsvd),
-    .io_devices_5_a_bits_user_instr_type(tl_spi0_o.a_user.instr_type),
-    .io_devices_5_a_bits_user_cmd_intg(tl_spi0_o.a_user.cmd_intg),
-    .io_devices_5_a_bits_user_data_intg(tl_spi0_o.a_user.data_intg),
-    .io_devices_5_d_ready(tl_spi0_o.d_ready)
+    .io_devices_4_d_ready(tl_uart1_o.d_ready)
   );
 
   uart i_uart0(.clk_i(clk_i),
@@ -417,7 +343,7 @@
                      .write_pending_i(1'b0));
 
   Sram #(.Width(32),
-         .Depth(4096))
+         .Depth(1048576))
       i_sram(.clk_i(clk_i),
              .req_i(sram_req),
              .we_i(sram_we),
diff --git a/hdl/chisel/src/soc/BUILD b/hdl/chisel/src/soc/BUILD
index f01f1e7..ee0b62d 100644
--- a/hdl/chisel/src/soc/BUILD
+++ b/hdl/chisel/src/soc/BUILD
@@ -13,12 +13,24 @@
 # limitations under the License.
 
 load("@kelvin_hw//rules:chisel.bzl", "chisel_binary", "chisel_cc_library", "chisel_library")
+load("@kelvin_hw//rules:coco_tb.bzl", "verilator_cocotb_model")
 
 package(default_visibility = ["//visibility:public"])
 
+load(
+    "//tests/cocotb:build_defs.bzl",
+    "VERILATOR_BUILD_ARGS",
+)
+
 chisel_library(
     name = "crossbar_config_lib",
-    srcs = ["CrossbarConfig.scala"],
+    srcs = [
+        "CrossbarConfig.scala",
+        "SoCChiselConfig.scala",
+    ],
+    deps = [
+        "//hdl/chisel/src/kelvin:kelvin_params",
+    ],
 )
 
 chisel_binary(
@@ -44,3 +56,63 @@
     emit_class = "kelvin.soc.KelvinXbarEmitter",
     module_name = "KelvinXbar",
 )
+
+chisel_cc_library(
+    name = "kelvin_xbar_testharness_cc_library",
+    chisel_lib = ":kelvin_xbar_lib",
+    emit_class = "kelvin.soc.KelvinXbarEmitter",
+    module_name = "KelvinXbarTestHarness",
+    gen_flags = ["--enableTestHarness"],
+)
+
+verilator_cocotb_model(
+    name = "kelvin_xbar_testharness_model",
+    cflags = VERILATOR_BUILD_ARGS,
+    hdl_toplevel = "KelvinXbarTestHarness",
+    trace = True,
+    verilog_source = ":kelvin_xbar_testharness_cc_library_emit_verilog",
+)
+
+chisel_library(
+    name = "kelvin_chisel_subsystem_lib",
+    srcs = ["KelvinChiselSubsystem.scala"],
+    deps = [
+        ":crossbar_config_lib",
+        ":kelvin_xbar_lib",
+        "//hdl/chisel/src/bus",
+        "//hdl/chisel/src/kelvin",
+        "//hdl/chisel/src/kelvin:kelvin_params",
+    ],
+)
+
+chisel_cc_library(
+    name = "kelvin_chisel_subsystem_cc_library",
+    chisel_lib = ":kelvin_chisel_subsystem_lib",
+    emit_class = "kelvin.soc.KelvinChiselSubsystemEmitter",
+    module_name = "KelvinChiselSubsystem",
+)
+
+
+verilator_cocotb_model(
+    name = "kelvin_chisel_subsystem_model",
+    cflags = VERILATOR_BUILD_ARGS,
+    hdl_toplevel = "KelvinChiselSubsystem",
+    trace = True,
+    verilog_source = "//hdl/chisel/src/soc:KelvinChiselSubsystem.sv",
+)
+
+chisel_cc_library(
+    name = "kelvin_chisel_subsystem_testharness_cc_library",
+    chisel_lib = ":kelvin_chisel_subsystem_lib",
+    emit_class = "kelvin.soc.KelvinChiselSubsystemEmitter",
+    module_name = "KelvinChiselSubsystemTestHarness",
+    gen_flags = ["--enableTestHarness"],
+)
+
+verilator_cocotb_model(
+    name = "kelvin_chisel_subsystem_testharness_model",
+    cflags = VERILATOR_BUILD_ARGS,
+    hdl_toplevel = "KelvinChiselSubsystemTestHarness",
+    trace = True,
+    verilog_source = ":kelvin_chisel_subsystem_testharness_cc_library_emit_verilog",
+)
diff --git a/hdl/chisel/src/soc/CrossbarConfig.scala b/hdl/chisel/src/soc/CrossbarConfig.scala
index cc5f74e..bde5c30 100644
--- a/hdl/chisel/src/soc/CrossbarConfig.scala
+++ b/hdl/chisel/src/soc/CrossbarConfig.scala
@@ -60,11 +60,17 @@
  */
 object CrossbarConfig {
   // List of all host (master) interfaces.
-  val hosts = Seq(
-    HostConfig("kelvin_core", width = 128),
-    HostConfig("ibex_core_i", width = 32, clockDomain = "ibex"),
-    HostConfig("ibex_core_d", width = 32, clockDomain = "ibex")
-  )
+  def hosts(enableTestHarness: Boolean): Seq[HostConfig] = {
+    val baseHosts = Seq(
+      HostConfig("kelvin_core", width = 128),
+      HostConfig("spi2tlul", width = 128)
+    )
+    if (enableTestHarness) {
+      baseHosts :+ HostConfig("test_host_32", width = 32, clockDomain = "test")
+    } else {
+      baseHosts
+    }
+  }
 
   // List of all device (slave) interfaces with their address maps.
   val devices = Seq(
@@ -76,20 +82,21 @@
     DeviceConfig("rom",  Seq(AddressRange(0x10000000, 0x8000))),      // 32kB
     DeviceConfig("sram", Seq(AddressRange(0x20000000, 0x400000))),    // 4MB
     DeviceConfig("uart0", Seq(AddressRange(0x40000000, 0x1000))),
-    DeviceConfig("uart1", Seq(AddressRange(0x40010000, 0x1000))),
-    DeviceConfig(
-      name = "spi0",
-      addr = Seq(AddressRange(0x40020000, 0x1000)),
-      clockDomain = "spi" // This device is on a separate clock domain
-    )
+    DeviceConfig("uart1", Seq(AddressRange(0x40010000, 0x1000)))
   )
 
   // A map defining which hosts are allowed to connect to which devices.
-  val connections = Map(
-    "kelvin_core" -> Seq("sram", "uart1", "spi0", "kelvin_device"),
-    "ibex_core_i" -> Seq("rom", "sram"),
-    "ibex_core_d" -> Seq("rom", "sram", "uart0", "kelvin_device")
-  )
+  def connections(enableTestHarness: Boolean): Map[String, Seq[String]] = {
+    val baseConnections = Map(
+      "kelvin_core" -> Seq("sram", "uart1", "kelvin_device", "rom", "uart0"),
+      "spi2tlul" -> Seq("kelvin_device", "sram")
+    )
+    if (enableTestHarness) {
+      baseConnections + ("test_host_32" -> Seq("rom", "sram", "uart0", "kelvin_device"))
+    } else {
+      baseConnections
+    }
+  }
 }
 
 /**
@@ -125,7 +132,7 @@
                  |FATAL: Address range collision detected!
                  |  Device 1: ${dev1.name} -> Range [0x${start1.toString(16)}, 0x${(end1 - 1).toString(16)}]
                  |  Device 2: ${dev2.name} -> Range [0x${start2.toString(16)}, 0x${(end2 - 1).toString(16)}]
-               """.stripMargin
+               """
             System.err.println(errorMsg)
             throw new Exception("Crossbar configuration validation failed.")
           }
@@ -136,25 +143,29 @@
 
   println("Validation successful: No address range collisions found.")
 
-  // Pretty-print the configuration
-  println("\n--- Crossbar Configuration ---")
-  println("Hosts:")
-  CrossbarConfig.hosts.foreach(h => println(s"  - ${h.name}"))
+  def printConfig(enableTestHarness: Boolean): Unit = {
+    println(s"\n--- Crossbar Configuration (TestHarness: $enableTestHarness) ---")
+    println("Hosts:")
+    CrossbarConfig.hosts(enableTestHarness).foreach(h => println(s"  - ${h.name}"))
 
-  println("\nDevices:")
-  CrossbarConfig.devices.foreach {
-    d =>
-      println(s"  - ${d.name} (${d.clockDomain} clock domain)")
-      d.addr.foreach {
-        a =>
-          println(f"    - 0x${a.base}%08x - 0x${a.base + a.size - 1}%08x (Size: ${a.size / 1024}kB)")
-      }
+    println("\nDevices:")
+    CrossbarConfig.devices.foreach {
+      d =>
+        println(s"  - ${d.name} (${d.clockDomain} clock domain)")
+        d.addr.foreach {
+          a =>
+            println(f"    - 0x${a.base}%08x - 0x${a.base + a.size - 1}%08x (Size: ${a.size / 1024}kB)")
+        }
+    }
+
+    println("\nConnections:")
+    CrossbarConfig.connections(enableTestHarness).foreach {
+      case (host, devices) =>
+        println(s"  - ${host} -> [${devices.mkString(", ")}]")
+    }
+    println("\n--------------------------")
   }
 
-  println("\nConnections:")
-  CrossbarConfig.connections.foreach {
-    case (host, devices) =>
-      println(s"  - ${host} -> [${devices.mkString(", ")}]")
-  }
-  println("\n--------------------------")
+  printConfig(false)
+  printConfig(true)
 }
diff --git a/hdl/chisel/src/soc/KelvinChiselSubsystem.scala b/hdl/chisel/src/soc/KelvinChiselSubsystem.scala
new file mode 100644
index 0000000..0e52e4b
--- /dev/null
+++ b/hdl/chisel/src/soc/KelvinChiselSubsystem.scala
@@ -0,0 +1,248 @@
+package kelvin.soc
+
+import chisel3._
+import chisel3.util.MixedVec
+import bus._
+import kelvin.Parameters
+import kelvin.CoreTlul
+
+/**
+ * This is the IO bundle for the unified Chisel subsystem.
+ */
+class KelvinChiselSubsystemIO(val hostParams: Seq[bus.TLULParameters], val deviceParams: Seq[bus.TLULParameters], val enableTestHarness: Boolean) extends Bundle {
+  val cfg = SoCChiselConfig.crossbar
+
+  // --- Clocks and Resets ---
+  val clk_i = Input(Clock())
+  val rst_ni = Input(AsyncReset())
+
+  // --- Dynamic Asynchronous Clock/Reset Ports ---
+  val asyncHostDomains = cfg.hosts(enableTestHarness).map(_.clockDomain).distinct.filter(_ != "main")
+  val async_ports_hosts = new Bundle {
+    val clocks = Input(Vec(asyncHostDomains.length, Clock()))
+    val resets = Input(Vec(asyncHostDomains.length, AsyncReset()))
+  }
+
+  val asyncDeviceDomains = cfg.devices.map(_.clockDomain).distinct.filter(_ != "main")
+  val async_ports_devices = new Bundle {
+    val clocks = Input(Vec(asyncDeviceDomains.length, Clock()))
+    val resets = Input(Vec(asyncDeviceDomains.length, AsyncReset()))
+  }
+
+  // --- Identify Internal vs. External Connections ---
+  val internalHosts = SoCChiselConfig.modules.flatMap(_.hostConnections.values).toSet
+  val internalDevices = SoCChiselConfig.modules.flatMap(_.deviceConnections.values).toSet
+
+  val externalHostPorts = cfg.hosts(enableTestHarness).filterNot(h => internalHosts.contains(h.name))
+  val externalDevicePorts = cfg.devices.filterNot(d => internalDevices.contains(d.name))
+
+  // --- Create External TileLink Ports ---
+  val external_hosts = Flipped(new Bundle {
+    val ports = MixedVec(externalHostPorts.map { h =>
+      new OpenTitanTileLink.Host2Device(hostParams(cfg.hosts(enableTestHarness).indexWhere(_.name == h.name)))
+    })
+  })
+
+  val external_devices = new Bundle {
+    val ports = MixedVec(externalDevicePorts.map { d =>
+      new OpenTitanTileLink.Host2Device(deviceParams(cfg.devices.indexWhere(_.name == d.name)))
+    })
+  }
+
+  // --- Manually define peripheral ports for now ---
+  val allExternalPortsConfig = SoCChiselConfig.modules.flatMap(_.externalPorts)
+  val external_ports = MixedVec(allExternalPortsConfig.map { p =>
+    val port = p.portType match {
+      case kelvin.soc.Clk  => Clock()
+      case kelvin.soc.Bool => Bool()
+    }
+    if (p.direction == kelvin.soc.In) Input(port) else Output(port)
+  })
+}
+
+import chisel3.experimental.BaseModule
+import chisel3.reflect.DataMirror
+import scala.collection.mutable
+
+/**
+ * A generator for the entire Chisel-based subsystem of the Kelvin SoC.
+ */
+class KelvinChiselSubsystem(val hostParams: Seq[bus.TLULParameters], val deviceParams: Seq[bus.TLULParameters], val enableTestHarness: Boolean) extends RawModule {
+  override val desiredName = if (enableTestHarness) "KelvinChiselSubsystemTestHarness" else "KelvinChiselSubsystem"
+  val io = IO(new KelvinChiselSubsystemIO(hostParams, deviceParams, enableTestHarness))
+  val cfg = SoCChiselConfig.crossbar
+
+  /**
+   * A helper function to recursively traverse a Chisel Bundle and populate a
+   * map with the full hierarchical path to every port and sub-port.
+   */
+  def populatePorts(prefix: String, data: Data, map: mutable.Map[String, Data]): Unit = {
+    map(prefix) = data
+    data match {
+      case b: Record =>
+        b.elements.foreach { case (name, child) =>
+          populatePorts(s"$prefix.$name", child, map)
+        }
+      case v: Vec[_] =>
+        v.zipWithIndex.foreach { case (child, i) =>
+          populatePorts(s"$prefix($i)", child, map)
+        }
+      case _ => // Leaf element
+    }
+  }
+
+  withClockAndReset(io.clk_i, (!io.rst_ni.asBool).asAsyncReset) {
+    // --- Instantiate Core Chisel Components ---
+    val xbar = Module(new KelvinXbar(hostParams, deviceParams, enableTestHarness))
+
+    // --- Dynamic Module Instantiation ---
+    def instantiateModule(config: ChiselModuleConfig): BaseModule = {
+      config.params match {
+        case p: CoreTlulParameters =>
+          val core_p = new Parameters
+          core_p.m = p.memoryRegions
+          core_p.lsuDataBits = p.lsuDataBits
+          core_p.enableRvv = p.enableRvv
+          core_p.enableFetchL0 = p.enableFetchL0
+          core_p.fetchDataBits = p.fetchDataBits
+          core_p.enableVector = p.enableVector
+          core_p.enableFloat = p.enableFloat
+          Module(new CoreTlul(core_p, config.name))
+
+        case p: Spi2TlulParameters =>
+          val spi_p = new Parameters
+          spi_p.lsuDataBits = p.lsuDataBits
+          Module(new Spi2TLUL(spi_p))
+      }
+    }
+
+    val instantiatedModules = SoCChiselConfig.modules.map {
+      config =>
+      config.name -> instantiateModule(config)
+    }.toMap
+
+    // --- Dynamic Wiring ---
+    val hostMap = cfg.hosts(enableTestHarness).map(_.name).zipWithIndex.toMap
+    val deviceMap = cfg.devices.map(_.name).zipWithIndex.toMap
+    val externalPortsMap = io.allExternalPortsConfig.map(_.name).zip(io.external_ports).toMap
+
+    // Create a map of all ports on all instantiated modules for easy lookup.
+    val modulePorts = mutable.Map[String, Data]()
+    instantiatedModules.foreach { case (moduleName, module) =>
+      DataMirror.modulePorts(module).foreach { case (portName, port) =>
+        populatePorts(s"$moduleName.$portName", port, modulePorts)
+      }
+    }
+
+    // --- Clock & Reset Connections ---
+    xbar.io.clk_i := io.clk_i
+    xbar.io.rst_ni := io.rst_ni
+    instantiatedModules.foreach { case (name, module) =>
+      modulePorts.get(s"$name.io.clk").foreach(_ := io.clk_i)
+      modulePorts.get(s"$name.io.clock").foreach(_ := io.clk_i)
+      modulePorts.get(s"$name.io.rst_ni").foreach(_ := io.rst_ni)
+      modulePorts.get(s"$name.io.reset").foreach(_ := io.rst_ni)
+    }
+
+    // Connect all modules based on the configuration.
+    SoCChiselConfig.modules.foreach {
+      config =>
+      config.hostConnections.foreach { case (modulePort, xbarPort) =>
+        modulePorts(s"${config.name}.$modulePort") <> xbar.io.hosts(hostMap(xbarPort))
+      }
+      config.deviceConnections.foreach { case (modulePort, xbarPort) =>
+        xbar.io.devices(deviceMap(xbarPort)) <> modulePorts(s"${config.name}.$modulePort")
+      }
+      config.externalPorts.foreach {
+        extPort =>
+        val moduleIo = modulePorts(s"${config.name}.${extPort.modulePort}")
+        val topIo = externalPortsMap(extPort.name)
+        if (extPort.direction == In) moduleIo := topIo else topIo := moduleIo
+      }
+    }
+
+    // Connect external-facing TileLink ports
+    io.externalHostPorts.map(_.name).zip(io.external_hosts.ports).foreach { case (name, port) =>
+      xbar.io.hosts(hostMap(name)) <> port
+    }
+    io.externalDevicePorts.map(_.name).zip(io.external_devices.ports).foreach { case (name, port) =>
+      port <> xbar.io.devices(deviceMap(name))
+    }
+
+    // Connect async clocks
+    val asyncHostDomainMap = io.asyncHostDomains.zipWithIndex.toMap
+    asyncHostDomainMap.foreach {
+      case (domainName, index) =>
+      val xbarPort = xbar.io.async_ports_hosts
+      val ioPort = io.async_ports_hosts
+      if (index < xbarPort.length) {
+        xbarPort(index).clock := ioPort.clocks(index)
+        xbarPort(index).reset := ioPort.resets(index)
+      }
+    }
+
+    val asyncDeviceDomainMap = io.asyncDeviceDomains.zipWithIndex.toMap
+    asyncDeviceDomainMap.foreach {
+      case (domainName, index) =>
+      val xbarPort = xbar.io.async_ports_devices
+      val ioPort = io.async_ports_devices
+      if (index < xbarPort.length) {
+        xbarPort(index).clock := ioPort.clocks(index)
+        xbarPort(index).reset := ioPort.resets(index)
+      }
+    }
+  }
+}
+
+import _root_.circt.stage.ChiselStage
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, Paths, StandardOpenOption}
+
+object KelvinChiselSubsystemEmitter extends App {
+  val enableTestHarness = args.contains("--enableTestHarness")
+  val chiselArgs = args.filterNot(a => a.startsWith("--enableTestHarness") || a.startsWith("--target-dir="))
+
+  val hostParams = SoCChiselConfig.crossbar.hosts(enableTestHarness).map {
+    host =>
+    val p = new Parameters
+    p.lsuDataBits = host.width
+    new bus.TLULParameters(p)
+  }
+  val deviceParams = SoCChiselConfig.crossbar.devices.map {
+    device =>
+    val p = new Parameters
+    p.lsuDataBits = device.width
+    new bus.TLULParameters(p)
+  }
+
+  // Manually parse arguments to find the target directory.
+  var targetDir: Option[String] = None
+  args.foreach {
+    case s if s.startsWith("--target-dir=") => targetDir = Some(s.stripPrefix("--target-dir="))
+    case "--enableTestHarness" => // Already handled by filterNot
+    case _ => // Ignore other arguments
+  }
+
+  // The subsystem module must be created in the ChiselStage context.
+  lazy val subsystem = new KelvinChiselSubsystem(hostParams, deviceParams, enableTestHarness)
+
+  val firtoolOpts = Array("-enable-layers=Verification")
+  val systemVerilogSource = ChiselStage.emitSystemVerilog(
+    subsystem, chiselArgs.toArray, firtoolOpts)
+
+  // CIRCT adds extra data to the end of the file. Remove it.
+  val resourcesSeparator =
+      "// ----- 8< ----- FILE \"firrtl_black_box_resource_files.f\" ----- 8< -----"
+  val strippedVerilogSource = systemVerilogSource.split(resourcesSeparator)(0)
+
+  // Write the stripped Verilog to the target directory.
+  targetDir.foreach {
+    dir =>
+      Files.write(
+        Paths.get(dir, subsystem.name + ".sv"),
+        strippedVerilogSource.getBytes(StandardCharsets.UTF_8),
+        StandardOpenOption.CREATE,
+        StandardOpenOption.TRUNCATE_EXISTING)
+  }
+}
+
diff --git a/hdl/chisel/src/soc/KelvinXbar.scala b/hdl/chisel/src/soc/KelvinXbar.scala
index ed81b5f..2d3a497 100644
--- a/hdl/chisel/src/soc/KelvinXbar.scala
+++ b/hdl/chisel/src/soc/KelvinXbar.scala
@@ -28,7 +28,7 @@
  * @param hostParams A sequence of TileLink parameters, one for each host.
  * @param deviceParams A sequence of TileLink parameters, one for each device.
  */
-class KelvinXbarIO(val hostParams: Seq[bus.TLULParameters], val deviceParams: Seq[bus.TLULParameters]) extends Bundle {
+class KelvinXbarIO(val hostParams: Seq[bus.TLULParameters], val deviceParams: Seq[bus.TLULParameters], val enableTestHarness: Boolean) extends Bundle {
   val cfg = CrossbarConfig
 
   // --- Primary Clock and Reset ---
@@ -44,7 +44,7 @@
   // --- Dynamic Asynchronous Clock/Reset Ports ---
   // Find all unique clock domains from the config, excluding the main one.
   val asyncDeviceDomains = cfg.devices.map(_.clockDomain).distinct.filter(_ != "main")
-  val asyncHostDomains = cfg.hosts.map(_.clockDomain).distinct.filter(_ != "main")
+  val asyncHostDomains = cfg.hosts(enableTestHarness).map(_.clockDomain).distinct.filter(_ != "main")
 
   // Create a Vec of Bundles for clock and reset inputs for each async domain.
   val async_ports_devices = Input(Vec(asyncDeviceDomains.length, new Bundle {
@@ -67,27 +67,28 @@
  *
  * @param p The TileLink UL parameters for the bus.
  */
-class KelvinXbar(val hostParams: Seq[bus.TLULParameters], val deviceParams: Seq[bus.TLULParameters]) extends RawModule {
+class KelvinXbar(val hostParams: Seq[bus.TLULParameters], val deviceParams: Seq[bus.TLULParameters], val enableTestHarness: Boolean) extends RawModule {
+  override val desiredName = if (enableTestHarness) "KelvinXbarTestHarness" else "KelvinXbar"
   // Load the single source of truth for the crossbar configuration.
   val cfg = CrossbarConfig
 
   // Create simple maps from name to index for easy port access.
-  val hostMap = cfg.hosts.map(_.name).zipWithIndex.toMap
+  val hostMap = cfg.hosts(enableTestHarness).map(_.name).zipWithIndex.toMap
   val deviceMap = cfg.devices.map(_.name).zipWithIndex.toMap
 
   // Instantiate the dynamically generated IO bundle.
-  val io = IO(new KelvinXbarIO(hostParams, deviceParams))
+  val io = IO(new KelvinXbarIO(hostParams, deviceParams, enableTestHarness))
 
   // Find all unique clock domains from the config, excluding the main one.
   val asyncDeviceDomains = cfg.devices.map(_.clockDomain).distinct.filter(_ != "main")
-  val asyncHostDomains = cfg.hosts.map(_.clockDomain).distinct.filter(_ != "main")
+  val asyncHostDomains = cfg.hosts(enableTestHarness).map(_.clockDomain).distinct.filter(_ != "main")
 
   // --- 1. Graph Analysis ---
   // Analyze the configuration to understand the connection topology. This will be
   // used to determine the size of sockets and how to wire them up.
-  val hostConnections = cfg.connections
+  val hostConnections = cfg.connections(enableTestHarness)
   val deviceFanIn = cfg.devices.map { device =>
-    device.name -> cfg.hosts.filter(h => hostConnections(h.name).contains(device.name))
+    device.name -> cfg.hosts(enableTestHarness).filter(h => hostConnections(h.name).contains(device.name))
   }.toMap
 
   // --- 2. Programmatic Instantiation (within the main clock domain) ---
@@ -115,7 +116,7 @@
     }.toMap
 
     // Create an asynchronous FIFO for each host in a different clock domain.
-    val asyncHostFifo = cfg.hosts.filter(_.clockDomain != "main").map { host =>
+    val asyncHostFifo = cfg.hosts(enableTestHarness).filter(_.clockDomain != "main").map { host =>
       val hostId = hostMap(host.name)
       host.name -> Module(new TlulFifoAsync(hostParams(hostId)))
     }.toMap
@@ -173,7 +174,7 @@
 
   // Connect top-level host IOs to the host-side of the 1-to-N sockets.
   for ((hostName, socket) <- hostSockets) {
-    val hostConfig = cfg.hosts.find(_.name == hostName).get
+    val hostConfig = cfg.hosts(enableTestHarness).find(_.name == hostName).get
     if (hostConfig.clockDomain != "main") {
       asyncHostFifos(hostName).io.tl_h <> io.hosts(hostMap(hostName))
       socket.io.tl_h <> asyncHostFifos(hostName).io.tl_d
@@ -193,7 +194,7 @@
   }
 
   for ((hostName, fifo) <- asyncHostFifos) {
-    val hostConfig = cfg.hosts.find(_.name == hostName).get
+    val hostConfig = cfg.hosts(enableTestHarness).find(_.name == hostName).get
     val domainIndex = asyncHostDomainMap(hostConfig.clockDomain)
     fifo.io.clk_h_i := io.async_ports_hosts(domainIndex).clock
     fifo.io.rst_h_i := !io.async_ports_hosts(domainIndex).reset.asBool
@@ -263,8 +264,12 @@
  */
 @nowarn
 object KelvinXbarEmitter extends App {
+  // Basic argument parsing for --enableTestHarness
+  val enableTestHarness = args.contains("--enableTestHarness")
+  val chiselArgs = args.filterNot(_ == "--enableTestHarness")
+
   // Create a sequence of TLULParameters for hosts and devices based on the config.
-  val hostParams = CrossbarConfig.hosts.map { host =>
+  val hostParams = CrossbarConfig.hosts(enableTestHarness).map { host =>
     val p = new Parameters
     p.lsuDataBits = host.width
     new bus.TLULParameters(p)
@@ -277,10 +282,10 @@
 
   // Use ChiselStage to generate the Verilog.
   (new ChiselStage).execute(
-    Array("--target", "systemverilog") ++ args,
+    Array("--target", "systemverilog") ++ chiselArgs,
     Seq(
       ChiselGeneratorAnnotation(() =>
-        new KelvinXbar(hostParams, deviceParams)
+        new KelvinXbar(hostParams, deviceParams, enableTestHarness)
       )
     ) ++ Seq(FirtoolOption("-enable-layers=Verification"))
   )
diff --git a/hdl/chisel/src/soc/SoCChiselConfig.scala b/hdl/chisel/src/soc/SoCChiselConfig.scala
new file mode 100644
index 0000000..d7a106f
--- /dev/null
+++ b/hdl/chisel/src/soc/SoCChiselConfig.scala
@@ -0,0 +1,121 @@
+package kelvin.soc
+
+import kelvin.{MemoryRegion, MemoryRegionType}
+
+// --- External Port Definitions ---
+
+/** A simple enumeration for port directions. */
+sealed trait PortDirection
+case object In extends PortDirection
+case object Out extends PortDirection
+
+/** A simple enumeration for basic port types. */
+sealed trait PortType
+case object Clk extends PortType
+case object Bool extends PortType
+
+/**
+ * Defines a non-TileLink port to be exposed at the subsystem boundary.
+ *
+ * @param name The name of the port on the subsystem's IO bundle.
+ * @param portType The Chisel type of the port (e.g., Clock, Bool).
+ * @param direction The direction of the port (In or Out).
+ * @param modulePort The full path to the port on the instantiated module
+ *                   (e.g., "io.halted", "io.spi.csb").
+ */
+case class ExternalPort(
+  name: String,
+  portType: PortType,
+  direction: PortDirection,
+  modulePort: String
+)
+
+// --- Type-Safe Module Parameter Definitions ---
+
+/** A trait representing the parameters for any configurable Chisel module. */
+sealed trait ModuleParameters
+
+/** Parameters for the CoreTlul module. */
+case class CoreTlulParameters(
+  lsuDataBits: Int,
+  enableRvv: Boolean,
+  enableFetchL0: Boolean,
+  fetchDataBits: Int,
+  enableVector: Boolean,
+  enableFloat: Boolean,
+  memoryRegions: Seq[MemoryRegion]
+) extends ModuleParameters
+
+/** Parameters for the Spi2TLUL module. */
+case class Spi2TlulParameters(
+  lsuDataBits: Int
+) extends ModuleParameters
+
+
+/**
+ * Defines the parameters for a Chisel module to be instantiated within the subsystem.
+ *
+ * @param name A unique instance name for the module.
+ * @param moduleClass The fully qualified Scala class name of the Chisel Module to instantiate.
+ * @param hostConnections A map where keys are port names on the module that are TileLink hosts,
+ *                        and values are the names of the host ports on the crossbar to connect to.
+ * @param deviceConnections A map where keys are port names on the module that are TileLink devices,
+ *                          and values are the names of the device ports on the crossbar to connect to.
+ * @param externalPorts A sequence of non-TileLink ports that need to be wired to the subsystem's top-level IO.
+ */
+case class ChiselModuleConfig(
+  name: String,
+  moduleClass: String,
+  params: ModuleParameters,
+  hostConnections: Map[String, String] = Map.empty,
+  deviceConnections: Map[String, String] = Map.empty,
+  externalPorts: Seq[ExternalPort] = Seq.empty
+)
+
+/**
+ * The single source of truth for the entire Chisel-based portion of the SoC.
+ */
+object SoCChiselConfig {
+  val crossbar = CrossbarConfig
+
+  val modules = Seq(
+    ChiselModuleConfig(
+      name = "rvv_core",
+      moduleClass = "kelvin.CoreTlul",
+      params = CoreTlulParameters(
+        lsuDataBits = 128,
+        enableRvv = true,
+        enableFetchL0 = false,
+        fetchDataBits = 128,
+        enableVector = false,
+        enableFloat = true,
+        memoryRegions = Seq(
+          new MemoryRegion(0x0000, 0x2000, MemoryRegionType.IMEM),
+          new MemoryRegion(0x10000, 0x8000, MemoryRegionType.DMEM),
+          new MemoryRegion(0x30000, 0x2000, MemoryRegionType.Peripheral)
+        )
+      ),
+      hostConnections = Map("io.tl_host" -> "kelvin_core"),
+      deviceConnections = Map("io.tl_device" -> "kelvin_device"),
+      externalPorts = Seq(
+        ExternalPort("halted", Bool, Out, "io.halted"),
+        ExternalPort("fault",  Bool, Out, "io.fault"),
+        ExternalPort("wfi",    Bool, Out, "io.wfi"),
+        ExternalPort("irq",    Bool, In,  "io.irq"),
+        ExternalPort("te",     Bool, In,  "io.te")
+      )
+    ),
+    ChiselModuleConfig(
+      name = "spi2tlul",
+      moduleClass = "bus.Spi2TLUL",
+      params = Spi2TlulParameters(lsuDataBits = 128),
+      hostConnections = Map("io.tl" -> "spi2tlul"),
+      externalPorts = Seq(
+        ExternalPort("spi_clk",  Clk,  In,  "io.spi.clk"),
+        ExternalPort("spi_csb",  Bool, In,  "io.spi.csb"),
+        ExternalPort("spi_mosi", Bool, In,  "io.spi.mosi"),
+        ExternalPort("spi_miso", Bool, Out, "io.spi.miso")
+      )
+    )
+  )
+}
diff --git a/kelvin_test_utils/TileLinkULInterface.py b/kelvin_test_utils/TileLinkULInterface.py
index 57c1cb5..ac54a67 100644
--- a/kelvin_test_utils/TileLinkULInterface.py
+++ b/kelvin_test_utils/TileLinkULInterface.py
@@ -155,6 +155,7 @@
         """Monitors the host D channel and puts transactions into host_d_fifo."""
         d_valid = getattr(self.dut, f"{prefix}_d_valid")
         d_ready = getattr(self.dut, f"{prefix}_d_ready")
+        x_count = 0
 
         d_ready.value = 1
         while True:
@@ -173,14 +174,17 @@
 
                     await self.host_d_fifo.put(txn)
             except Exception as e:
-                print('X seen in _host_d_monitor: ' + str(e) + ' ' + prefix)
-                # raise e
+                x_count += 1
+                self.dut._log.warning(f"X seen in _host_d_monitor ({prefix}): {e} ({x_count}/3)")
+                if x_count >= 3:
+                    assert False, f"Too many 'X' values detected in _host_d_monitor on {prefix}"
 
     # master_aragent
     async def _device_a_monitor(self, prefix):
         """Monitors the device A channel and puts transactions into device_a_fifo."""
         a_valid = getattr(self.dut, f"{prefix}_a_valid")
         a_ready = getattr(self.dut, f"{prefix}_a_ready")
+        x_count = 0
 
         a_ready.value = 1
         while True:
@@ -198,7 +202,10 @@
                                                          signal_name).value
                     await self.device_a_fifo.put(txn)
             except Exception as e:
-                print('X seen in _device_a_monitor: ' + str(e) + ' ' + prefix)
+                x_count += 1
+                self.dut._log.warning(f"X seen in _device_a_monitor ({prefix}): {e} ({x_count}/3)")
+                if x_count >= 3:
+                    assert False, f"Too many 'X' values detected in _device_a_monitor on {prefix}"
 
     # master_bagent
     async def _device_d_driver(self, prefix, timeout=4096):
diff --git a/tests/cocotb/tlul/BUILD b/tests/cocotb/tlul/BUILD
index 2044827..33e8d82 100644
--- a/tests/cocotb/tlul/BUILD
+++ b/tests/cocotb/tlul/BUILD
@@ -315,12 +315,12 @@
 # BEGIN_TESTCASES_FOR_kelvin_xbar_cocotb
 KELVIN_XBAR_TESTCASES = [
     "test_kelvin_core_to_sram",
-    "test_ibex_d_to_invalid_addr",
+    "test_kelvin_core_to_invalid_addr",
     "test_kelvin_core_to_uart1",
-    "test_ibex_d_to_kelvin_device",
+    "test_test_host_32_to_kelvin_device",
     "test_kelvin_core_to_kelvin_device",
-    "test_ibex_d_to_kelvin_device_csr_read",
-    "test_ibex_d_to_kelvin_device_specific_addr",
+    "test_test_host_32_to_kelvin_device_csr_read",
+    "test_test_host_32_to_kelvin_device_specific_addr",
     "test_wide_to_narrow_integrity",
 ]
 # END_TESTCASES_FOR_kelvin_xbar_cocotb
@@ -331,7 +331,7 @@
     testcases = KELVIN_XBAR_TESTCASES,
     testcases_vname = "KELVIN_XBAR_TESTCASES",
     tests_kwargs = {
-        "hdl_toplevel": "KelvinXbar",
+        "hdl_toplevel": "KelvinXbarTestHarness",
         "waves": True,
         "seed": "42",
         "test_module": ["kelvin_xbar_test.py"],
@@ -340,8 +340,8 @@
             "//kelvin_test_utils:secded_golden",
         ],
     },
-    verilator_model = ":kelvin_xbar_model",
-    vcs_verilog_sources = ["//hdl/chisel/src/soc:kelvin_xbar_cc_library_verilog"],
+    verilator_model = "//hdl/chisel/src/soc:kelvin_xbar_testharness_model",
+    vcs_verilog_sources = ["//hdl/chisel/src/soc:kelvin_xbar_testharness_cc_library_emit_verilog"],
     vcs_build_args = VCS_BUILD_ARGS,
     vcs_test_args = VCS_TEST_ARGS,
     vcs_defines = VCS_DEFINES,
@@ -386,3 +386,41 @@
     vcs_test_args = VCS_TEST_ARGS,
     vcs_defines = VCS_DEFINES,
 )
+
+# BEGIN_TESTCASES_FOR_kelvin_chisel_subsystem_cocotb
+KELVIN_CHISEL_SUBSYSTEM_TESTCASES = [
+    "test_tlul_passthrough",
+    "test_program_execution_via_host",
+    "test_program_execution_via_spi",
+]
+# END_TESTCASES_FOR_kelvin_chisel_subsystem_cocotb
+
+cocotb_test_suite(
+    name = "kelvin_chisel_subsystem_cocotb",
+    simulators = ["verilator", "vcs"],
+    testcases = KELVIN_CHISEL_SUBSYSTEM_TESTCASES,
+    testcases_vname = "KELVIN_CHISEL_SUBSYSTEM_TESTCASES",
+    tests_kwargs = {
+        "hdl_toplevel": "KelvinChiselSubsystemTestHarness",
+        "waves": True,
+        "seed": "42",
+        "test_module": ["test_subsystem.py"],
+        "deps": [
+            "//kelvin_test_utils:TileLinkULInterface",
+            "//kelvin_test_utils:spi_master",
+            requirement("pyelftools"),
+            "@bazel_tools//tools/python/runfiles",
+        ],
+        "data": [
+            "//tests/cocotb/rvv/arithmetics:rvv_add_int32_m1.elf",
+        ],
+    },
+    verilator_model = "//hdl/chisel/src/soc:kelvin_chisel_subsystem_testharness_model",
+    vcs_verilog_sources = ["//hdl/chisel/src/soc:kelvin_chisel_subsystem_testharness_cc_library_emit_verilog"],
+    vcs_data = [
+        "//tests/cocotb/rvv/arithmetics:rvv_add_int32_m1.elf",
+    ],
+    vcs_build_args = VCS_BUILD_ARGS,
+    vcs_test_args = VCS_TEST_ARGS,
+    vcs_defines = VCS_DEFINES,
+)
diff --git a/tests/cocotb/tlul/kelvin_xbar_test.py b/tests/cocotb/tlul/kelvin_xbar_test.py
index 0e62adb..2bece48 100644
--- a/tests/cocotb/tlul/kelvin_xbar_test.py
+++ b/tests/cocotb/tlul/kelvin_xbar_test.py
@@ -12,18 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# BEGIN_TESTCASES_FOR_kelvin_xbar_cocotb
-KELVIN_XBAR_TESTCASES = [
-    "test_kelvin_core_to_sram",
-    "test_ibex_d_to_invalid_addr",
-    "test_kelvin_core_to_uart1",
-    "test_ibex_d_to_kelvin_device",
-    "test_kelvin_core_to_kelvin_device",
-    "test_ibex_d_to_kelvin_device_specific_addr",
-    "test_wide_to_narrow_integrity",
-]
-# END_TESTCASES_FOR_kelvin_xbar_cocotb
-
 import cocotb
 from cocotb.clock import Clock
 from cocotb.triggers import RisingEdge, ClockCycles, with_timeout
@@ -33,19 +21,17 @@
 
 # --- Configuration Constants ---
 # These constants are derived from CrossbarConfig.scala to make tests readable.
-HOST_MAP = {"kelvin_core": 0, "ibex_core_i": 1, "ibex_core_d": 2}
+HOST_MAP = {"kelvin_core": 0, "spi2tlul": 1, "test_host_32": 2}
 DEVICE_MAP = {
     "kelvin_device": 0,
     "rom": 1,
     "sram": 2,
     "uart0": 3,
     "uart1": 4,
-    "spi0": 5,
 }
 SRAM_BASE = 0x20000000
 UART1_BASE = 0x40010000
 KELVIN_DEVICE_BASE = 0x00000000
-SPI0_BASE = 0x40020000
 INVALID_ADDR = 0xDEADBEEF
 TIMEOUT_CYCLES = 500
 
@@ -57,38 +43,28 @@
     clock = Clock(dut.io_clk_i, 5)
     cocotb.start_soon(clock.start())
 
-    # Start the asynchronous SPI clock
-    spi_clock = Clock(dut.io_async_ports_devices_0_clock, 20)
-    cocotb.start_soon(spi_clock.start())
-
-    # Start the asynchronous Ibex clock
-    ibex_clock = Clock(dut.io_async_ports_hosts_0_clock, 10)
-    cocotb.start_soon(ibex_clock.start())
+    # Start the asynchronous test clock
+    test_clock = Clock(dut.io_async_ports_hosts_0_clock, 10)
+    cocotb.start_soon(test_clock.start())
 
     # Create a dictionary of TileLink interfaces for all hosts and devices
-    host_widths = {"kelvin_core": 128, "ibex_core_i": 32, "ibex_core_d": 32}
+    host_widths = {"kelvin_core": 128, "spi2tlul": 128, "test_host_32": 32}
     device_widths = {
         "kelvin_device": 128,
         "rom": 32,
         "sram": 32,
         "uart0": 32,
         "uart1": 32,
-        "spi0": 32
     }
 
     interfaces = {
         "hosts": [
             TileLinkULInterface(dut,
                                 host_if_name=f"io_hosts_{i}",
-                                clock_name="io_clk_i",
-                                reset_name="io_rst_ni",
+                                clock_name="io_clk_i" if name != "test_host_32" else "io_async_ports_hosts_0_clock",
+                                reset_name="io_rst_ni" if name != "test_host_32" else "io_async_ports_hosts_0_reset",
                                 width=host_widths[name])
-            if name == "kelvin_core" else TileLinkULInterface(
-                dut,
-                host_if_name=f"io_hosts_{i}",
-                clock_name="io_async_ports_hosts_0_clock",
-                reset_name="io_async_ports_hosts_0_reset",
-                width=host_widths[name]) for name, i in HOST_MAP.items()
+            for name, i in HOST_MAP.items()
         ],
         "devices": [
             TileLinkULInterface(dut,
@@ -99,27 +75,19 @@
             for name, i in DEVICE_MAP.items()
         ],
     }
-    # Special case for the async SPI port
-    interfaces["devices"][DEVICE_MAP["spi0"]] = TileLinkULInterface(
-        dut,
-        device_if_name=f"io_devices_{DEVICE_MAP['spi0']}",
-        clock_name="io_async_ports_devices_0_clock",
-        reset_name="io_async_ports_devices_0_reset",
-        width=32)
 
     # Reset the DUT
     dut.io_rst_ni.value = 0
-    dut.io_async_ports_devices_0_reset.value = 0
     dut.io_async_ports_hosts_0_reset.value = 0
     await ClockCycles(dut.io_clk_i, 5)
     dut.io_rst_ni.value = 1
-    dut.io_async_ports_devices_0_reset.value = 1
     dut.io_async_ports_hosts_0_reset.value = 1
     await ClockCycles(dut.io_clk_i, 5)
 
     return interfaces, clock
 
 
+
 # --- Test Cases ---
 
 
@@ -170,10 +138,10 @@
 
 
 @cocotb.test(timeout_time=10, timeout_unit="us")
-async def test_ibex_d_to_invalid_addr(dut):
+async def test_kelvin_core_to_invalid_addr(dut):
     """Verify that a request to an unmapped address gets an error response."""
     interfaces, clock = await setup_dut(dut)
-    host_if = interfaces["hosts"][HOST_MAP["ibex_core_d"]]
+    host_if = interfaces["hosts"][HOST_MAP["kelvin_core"]]
     timeout_ns = TIMEOUT_CYCLES * clock.period
 
     # Send a write request to an invalid address
@@ -242,10 +210,10 @@
 
 
 @cocotb.test(timeout_time=10, timeout_unit="us")
-async def test_ibex_d_to_kelvin_device(dut):
+async def test_test_host_32_to_kelvin_device(dut):
     """Verify a 32-bit to 128-bit write transaction."""
     interfaces, clock = await setup_dut(dut)
-    host_if = interfaces["hosts"][HOST_MAP["ibex_core_d"]]
+    host_if = interfaces["hosts"][HOST_MAP["test_host_32"]]
     device_if = interfaces["devices"][DEVICE_MAP["kelvin_device"]]
     timeout_ns = TIMEOUT_CYCLES * clock.period
 
@@ -310,13 +278,13 @@
 
 
 @cocotb.test(timeout_time=10, timeout_unit="us")
-async def test_ibex_d_to_kelvin_device_csr_read(dut):
-    """Verify that Ibex can correctly read a CSR from the Kelvin device.
+async def test_test_host_32_to_kelvin_device_csr_read(dut):
+    """Verify that test_host_32 can correctly read a CSR from the Kelvin device.
 
     This test specifically checks the return path through the width bridge.
     """
     interfaces, clock = await setup_dut(dut)
-    host_if = interfaces["hosts"][HOST_MAP["ibex_core_d"]]
+    host_if = interfaces["hosts"][HOST_MAP["test_host_32"]]
     device_if = interfaces["devices"][DEVICE_MAP["kelvin_device"]]
     timeout_ns = TIMEOUT_CYCLES * clock.period
     csr_addr = KELVIN_DEVICE_BASE + 0x8  # Match the CSR address
@@ -374,10 +342,10 @@
 
 
 @cocotb.test(timeout_time=10, timeout_unit="us")
-async def test_ibex_d_to_kelvin_device_specific_addr(dut):
+async def test_test_host_32_to_kelvin_device_specific_addr(dut):
     """Verify a write to a specific address in the kelvin_device range."""
     interfaces, clock = await setup_dut(dut)
-    host_if = interfaces["hosts"][HOST_MAP["ibex_core_d"]]
+    host_if = interfaces["hosts"][HOST_MAP["test_host_32"]]
     device_if = interfaces["devices"][DEVICE_MAP["kelvin_device"]]
     timeout_ns = TIMEOUT_CYCLES * clock.period
 
diff --git a/tests/cocotb/tlul/test_subsystem.py b/tests/cocotb/tlul/test_subsystem.py
new file mode 100644
index 0000000..ac7c023
--- /dev/null
+++ b/tests/cocotb/tlul/test_subsystem.py
@@ -0,0 +1,406 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cocotb
+from cocotb.clock import Clock
+from cocotb.triggers import ClockCycles
+from elftools.elf.elffile import ELFFile
+from bazel_tools.tools.python.runfiles import runfiles
+
+from kelvin_test_utils.TileLinkULInterface import TileLinkULInterface, create_a_channel_req
+from kelvin_test_utils.spi_master import SPIMaster
+from kelvin_test_utils.spi_constants import SpiRegAddress, SpiCommand, TlStatus
+
+# --- Constants ---
+BUS_WIDTH_BITS = 128
+BUS_WIDTH_BYTES = 16
+
+async def setup_dut(dut):
+    """Common setup logic for all tests."""
+    # Default all TL-UL input signals to a safe state
+    for i in range(4): # 4 external device ports
+        getattr(dut, f"io_external_devices_ports_{i}_d_valid").value = 0
+
+    # Start the main clock
+    clock = Clock(dut.io_clk_i, 10)
+    cocotb.start_soon(clock.start())
+
+    # Start the asynchronous test clock
+    test_clock = Clock(dut.io_async_ports_hosts_clocks_0, 20)
+    cocotb.start_soon(test_clock.start())
+
+    # Reset the DUT
+    dut.io_rst_ni.value = 0
+    dut.io_async_ports_hosts_resets_0.value = 0
+    await ClockCycles(dut.io_clk_i, 5)
+    dut.io_rst_ni.value = 1
+    dut.io_async_ports_hosts_resets_0.value = 1
+    await ClockCycles(dut.io_clk_i, 5)
+
+    # Add a final delay to ensure all reset synchronizers have settled
+    await ClockCycles(dut.io_clk_i, 10)
+
+    return clock
+
+async def load_elf(dut, elf_file, host_if):
+    """Parses an ELF file and loads its segments into memory via TileLink."""
+    elf = ELFFile(elf_file)
+    entry_point = elf.header.e_entry
+
+    for segment in elf.iter_segments():
+        if segment.header.p_type == 'PT_LOAD':
+            paddr = segment.header.p_paddr
+            data = segment.data()
+            dut._log.info(f"Loading segment at 0x{paddr:08x}, size {len(data)} bytes")
+
+            # Write segment data word by word (32 bits)
+            for i in range(0, len(data), 4):
+                word_addr = paddr + i
+                # Handle potentially short final word
+                word_data = data[i:i+4]
+                while len(word_data) < 4:
+                    word_data += b'\x00'
+
+                # Convert bytes to integer for the transaction
+                int_data = int.from_bytes(word_data, byteorder='little')
+
+                # Create and send the write transaction
+                write_txn = create_a_channel_req(
+                    address=word_addr,
+                    data=int_data,
+                    mask=0xF,  # Full 32-bit mask
+                    width=host_if.width
+                )
+                await host_if.host_put(write_txn)
+
+                # Wait for the acknowledgment
+                resp = await host_if.host_get_response()
+                assert resp["error"] == 0, f"Received error response while writing to 0x{word_addr:08x}"
+
+    return entry_point
+
+async def load_elf_via_spi(dut, elf_file, spi_master):
+    """Parses an ELF file and loads its segments into memory via SPI."""
+    elf = ELFFile(elf_file)
+    entry_point = elf.header.e_entry
+
+    for segment in elf.iter_segments():
+        if segment.header.p_type == 'PT_LOAD':
+            paddr = segment.header.p_paddr
+            data = segment.data()
+            dut._log.info(f"Loading segment at 0x{paddr:08x}, size {len(data)} bytes via SPI")
+
+            # Load data line by line
+            for i in range(0, len(data), BUS_WIDTH_BYTES):
+                line_addr = paddr + i
+                line_data = data[i:i+BUS_WIDTH_BYTES]
+                while len(line_data) < BUS_WIDTH_BYTES:
+                    line_data += b'\x00'
+                int_data = int.from_bytes(line_data, byteorder='little')
+                dut._log.info(f"Loading line at 0x{line_addr:08x}")
+                await write_line_via_spi(spi_master, line_addr, int_data)
+
+    return entry_point
+
+
+async def read_line_via_spi(spi_master, address):
+    """Reads a full 128-bit bus line from a given address via the SPI bridge."""
+    assert address % BUS_WIDTH_BYTES == 0, f"Address 0x{address:X} is not aligned to the bus width of {BUS_WIDTH_BYTES} bytes"
+
+    # 1. Configure the TileLink read via SPI
+    # Write address (32 bits) byte by byte
+    for j in range(4):
+        addr_byte = (address >> (j * 8)) & 0xFF
+        await spi_master.write_reg(SpiRegAddress.TL_ADDR_REG_0 + j, addr_byte)
+
+    # Write length (0 means 1 beat of 128 bits)
+    await spi_master.write_reg_16b(SpiRegAddress.TL_LEN_REG_L, 0)
+
+    # 2. Issue the read command
+    await spi_master.write_reg(SpiRegAddress.TL_CMD_REG, SpiCommand.CMD_READ_START, wait_cycles=0)
+
+    # 3. Poll the status register until the transaction is done
+    assert await spi_master.poll_reg_for_value(SpiRegAddress.TL_STATUS_REG, TlStatus.DONE), \
+        f"Timed out waiting for SPI read from 0x{address:08x} to complete"
+
+    # 4. Read the data from the buffer port
+    read_data = await spi_master.bulk_read(BUS_WIDTH_BYTES)
+
+    # 5. Clear the status to return FSM to Idle
+    await spi_master.write_reg(SpiRegAddress.TL_CMD_REG, SpiCommand.CMD_NULL)
+
+    return int.from_bytes(bytes(read_data), byteorder='little')
+
+
+async def update_line_via_spi(spi_master, address, data, mask):
+    """Performs a read-modify-write to update a 128-bit line via SPI."""
+    assert address % BUS_WIDTH_BYTES == 0, f"Address 0x{address:X} is not aligned to the bus width of {BUS_WIDTH_BYTES} bytes"
+    # Read the current line from memory
+    line_data = await read_line_via_spi(spi_master, address)
+
+    # Apply the masked data update
+    # The mask is a bitmask where each bit corresponds to a byte.
+    updated_data = 0
+    for i in range(BUS_WIDTH_BYTES):
+        byte_mask = (mask >> i) & 1
+        if byte_mask:
+            updated_data |= ((data >> (i * 8)) & 0xFF) << (i * 8)
+        else:
+            updated_data |= ((line_data >> (i * 8)) & 0xFF) << (i * 8)
+
+    # Write the modified line back to memory
+    await write_line_via_spi(spi_master, address, updated_data)
+
+
+async def write_line_via_spi(spi_master, address, data):
+    """Writes a 128-bit bus line to a given address via the SPI bridge."""
+    assert address % BUS_WIDTH_BYTES == 0, f"Address 0x{address:X} is not aligned to the bus width of {BUS_WIDTH_BYTES} bytes"
+
+    # Emit a full transaction for the line.
+    await spi_master.packed_write_transaction(target_addr=address, data=[data])
+
+    # Poll status register until the transaction is done.
+    assert await spi_master.poll_reg_for_value(SpiRegAddress.TL_WRITE_STATUS_REG, TlStatus.DONE), \
+        f"Timed out waiting for SPI write to 0x{address:08x} to complete"
+
+    # Clear the status to return FSM to Idle.
+    await spi_master.write_reg(SpiRegAddress.TL_CMD_REG, SpiCommand.CMD_NULL)
+
+
+async def write_word_via_spi(spi_master, address, data):
+    """Writes a 32-bit value to a specific address using the SPI bridge.
+
+    Note: This function performs a read-modify-write operation on the underlying
+    128-bit bus. It is not suitable for writing to memory-mapped registers
+    where the read operation has side effects.
+    """
+    line_addr = (address // BUS_WIDTH_BYTES) * BUS_WIDTH_BYTES
+    offset = address % BUS_WIDTH_BYTES
+    mask = 0xF << offset  # 4-byte mask at the correct offset
+    shifted_data = data << (offset * 8)
+    await update_line_via_spi(spi_master, line_addr, shifted_data, mask)
+
+@cocotb.test()
+async def test_tlul_passthrough(dut):
+    """Drives a TL-UL transaction through an external host and device port."""
+    clock = await setup_dut(dut)
+
+    # Instantiate a TL-UL host to drive the first external host port (ibex_core_i)
+    host_if = TileLinkULInterface(
+        dut,
+        host_if_name="io_external_hosts_ports_0",
+        clock_name="io_async_ports_hosts_clocks_0",
+        reset_name="io_async_ports_hosts_resets_0",
+        width=32)
+
+    # Instantiate a TL-UL device to act as the first external device (rom)
+    device_if = TileLinkULInterface(
+        dut,
+        device_if_name="io_external_devices_ports_0",
+        clock_name="io_clk_i",
+        reset_name="io_rst_ni",
+        width=32)
+
+    # Initialize the interfaces
+    await host_if.init()
+    await device_if.init()
+
+    # --- Device Responder Task ---
+    # This task mimics the behavior of the external ROM device.
+    ROM_BASE_ADDR = 0x10000000
+    TEST_SOURCE_ID = 5
+    TEST_DATA = 0xCAFED00D
+
+    async def device_responder():
+        """A mock responder for the external ROM."""
+        req = await device_if.device_get_request()
+
+        # Verify the incoming request
+        assert req["opcode"] == 0, f"Expected PutFullData opcode (0), got {req['opcode']}"
+        assert req["address"] == ROM_BASE_ADDR, f"Expected address {ROM_BASE_ADDR:X}, got {req['address']:X}"
+        assert req["data"] == TEST_DATA, f"Expected data {TEST_DATA:X}, got {req['data']:X}"
+
+        # Send an AccessAck response
+        await device_if.device_respond(
+            opcode=0,  # AccessAck
+            param=0,
+            size=req["size"],
+            source=req["source"],
+            error=0
+        )
+
+    # Start the device responder coroutine
+    responder_task = cocotb.start_soon(device_responder())
+
+    # --- Host Stimulus ---
+    # Create and send a 'PutFullData' request from the host.
+    write_txn = create_a_channel_req(
+        address=ROM_BASE_ADDR,
+        source=TEST_SOURCE_ID,
+        data=TEST_DATA,
+        mask=0xF, # Full mask for 32 bits
+        width=host_if.width
+    )
+    await host_if.host_put(write_txn)
+
+    # Wait for and verify the response.
+    resp = await host_if.host_get_response()
+    assert resp["error"] == 0, "Response indicated an error"
+    assert resp["source"] == TEST_SOURCE_ID, f"Expected source ID {TEST_SOURCE_ID}, got {resp['source']}"
+    assert resp["opcode"] == 0, f"Expected AccessAck opcode (0), got {resp['opcode']}"
+
+    # Ensure the responder task finished cleanly.
+    await responder_task
+
+@cocotb.test()
+async def test_program_execution_via_host(dut):
+    """Loads and executes a program via an external host port."""
+    clock = await setup_dut(dut)
+
+    # Instantiate a TL-UL host to drive the 0-th external host port (test_host_32)
+    host_if = TileLinkULInterface(
+        dut,
+        host_if_name="io_external_hosts_ports_0",
+        clock_name="io_async_ports_hosts_clocks_0",
+        reset_name="io_async_ports_hosts_resets_0",
+        width=32)
+
+    # Initialize the interface
+    await host_if.init()
+
+    # Find and load the ELF file
+    r = runfiles.Create()
+    elf_path = r.Rlocation("kelvin_hw/tests/cocotb/rvv/arithmetics/rvv_add_int32_m1.elf")
+    assert elf_path, "Could not find ELF file"
+
+    with open(elf_path, "rb") as f:
+        entry_point = await load_elf(dut, f, host_if)
+
+    dut._log.info(f"Program loaded. Entry point: 0x{entry_point:08x}")
+
+    # --- Execute Program ---
+    # From the integration guide:
+    # 1. Program the start PC
+    # 2. Release clock gate
+    # 3. Release reset
+
+    kelvin_pc_csr_addr = 0x30004
+    kelvin_reset_csr_addr = 0x30000
+
+    # Program the start PC
+    dut._log.info(f"Programming start PC to 0x{entry_point:08x}")
+    write_txn = create_a_channel_req(
+        address=kelvin_pc_csr_addr,
+        data=entry_point,
+        mask=0xF,
+        width=host_if.width
+    )
+    await host_if.host_put(write_txn)
+    resp = await host_if.host_get_response()
+    assert resp["error"] == 0
+
+    # Release clock gate
+    dut._log.info("Releasing clock gate...")
+    write_txn = create_a_channel_req(
+        address=kelvin_reset_csr_addr,
+        data=1,
+        mask=0xF,
+        width=host_if.width
+    )
+    await host_if.host_put(write_txn)
+    resp = await host_if.host_get_response()
+    assert resp["error"] == 0
+
+    await ClockCycles(dut.io_clk_i, 1)
+
+    # Release reset
+    dut._log.info("Releasing reset...")
+    write_txn = create_a_channel_req(
+        address=kelvin_reset_csr_addr,
+        data=0,
+        mask=0xF,
+        width=host_if.width
+    )
+    await host_if.host_put(write_txn)
+    resp = await host_if.host_get_response()
+    assert resp["error"] == 0
+
+    # --- Wait for Completion ---
+    dut._log.info("Waiting for program to halt...")
+    timeout_cycles = 100000
+    for i in range(timeout_cycles):
+        if dut.io_external_ports_0.value == 1:  # halted is port 0
+            break
+        await ClockCycles(dut.io_clk_i, 1)
+    else:  # This else belongs to the for loop, executed if the loop finishes without break
+        assert False, f"Timeout: Program did not halt within {timeout_cycles} cycles."
+
+    dut._log.info("Program halted.")
+    assert dut.io_external_ports_1.value == 0, "Program halted with fault!"
+
+@cocotb.test()
+async def test_program_execution_via_spi(dut):
+    """Loads and executes a program via the SPI to TL-UL bridge."""
+    clock = await setup_dut(dut)
+
+    spi_master = SPIMaster(
+        clk=dut.io_external_ports_5,
+        csb=dut.io_external_ports_6,
+        mosi=dut.io_external_ports_7,
+        miso=dut.io_external_ports_8,
+        main_clk=dut.io_clk_i,
+        log=dut._log
+    )
+    await spi_master.idle_clocking(20)
+
+    # Find and load the ELF file
+    r = runfiles.Create()
+    elf_path = r.Rlocation("kelvin_hw/tests/cocotb/rvv/arithmetics/rvv_add_int32_m1.elf")
+    assert elf_path, "Could not find ELF file"
+
+    with open(elf_path, "rb") as f:
+        entry_point = await load_elf_via_spi(dut, f, spi_master)
+
+    dut._log.info(f"Program loaded via SPI. Entry point: 0x{entry_point:08x}")
+
+    # --- Execute Program ---
+    kelvin_pc_csr_addr = 0x30004
+    kelvin_reset_csr_addr = 0x30000
+
+    # Program the start PC
+    dut._log.info(f"Programming start PC to 0x{entry_point:08x}")
+    await write_word_via_spi(spi_master, kelvin_pc_csr_addr, entry_point)
+
+    # Release clock gate
+    dut._log.info("Releasing clock gate...")
+    await write_word_via_spi(spi_master, kelvin_reset_csr_addr, 1)
+
+    await ClockCycles(dut.io_clk_i, 1)
+
+    # Release reset
+    dut._log.info("Releasing reset...")
+    await write_word_via_spi(spi_master, kelvin_reset_csr_addr, 0)
+
+    # --- Wait for Completion ---
+    dut._log.info("Waiting for program to halt...")
+    timeout_cycles = 100000
+    for i in range(timeout_cycles):
+        if dut.io_external_ports_0.value == 1:  # halted is port 0
+            break
+        await ClockCycles(dut.io_clk_i, 1)
+    else:  # This else belongs to the for loop, executed if the loop finishes without break
+        assert False, f"Timeout: Program did not halt within {timeout_cycles} cycles."
+
+    dut._log.info("Program halted.")
+    assert dut.io_external_ports_1.value == 0, "Program halted with fault!"