[fpga, sw] Enable SCA on ChipWhisperer CW310 FPGA board

This involves the following main changes:
- Two new outputs are defined on the CW310 chip-level to provide the
  target clock and capture trigger to the capture board.
- The capture trigger can now be selected at run time through software
  and GPIO. This allows for taking SCA measurements for different IP
  cores (KMAC, OTBN) without having to re-generate a different
- The capture trigger is synchronized to the target clock (100 MHz).
  This is needed to prevent metastability issues in the scope depending
  on place and route on the target FPGA.
- UART1 is enabled and muxed to pins IOC9/IOC8 using the pinmux. This
  is needed for the simple serial communication between target and
  capture board, while UART0 remains available for debug prints and
- The simple serial library is adjusted to use dif_uart_* only instead
  of a mix of dif_uart_* and base_printf() (this required the UART used
  for simple serial and stdout to be the same device previously).

Signed-off-by: Pirmin Vogel <vogelpi@lowrisc.org>
diff --git a/hw/top_earlgrey/data/autogen/top_earlgrey.gen.hjson b/hw/top_earlgrey/data/autogen/top_earlgrey.gen.hjson
index 6f8ec32..13b520b 100644
--- a/hw/top_earlgrey/data/autogen/top_earlgrey.gen.hjson
+++ b/hw/top_earlgrey/data/autogen/top_earlgrey.gen.hjson
@@ -11320,6 +11320,20 @@
             connection: manual
             desc: Manual USB UPHY signal for FPGA target
+          {
+            name: IO_CLKOUT
+            type: BidirStd
+            bank: VCC
+            connection: manual
+            desc: Manual clock output for SCA setup
+          }
+          {
+            name: IO_TRIGGER
+            type: BidirStd
+            bank: VCC
+            connection: manual
+            desc: Manual trigger output for SCA setup
+          }
diff --git a/hw/top_earlgrey/data/clocks.xdc b/hw/top_earlgrey/data/clocks.xdc
index c82c5cf..e020776 100644
--- a/hw/top_earlgrey/data/clocks.xdc
+++ b/hw/top_earlgrey/data/clocks.xdc
@@ -27,5 +27,5 @@
 create_clock -add -name clk_spi_in  -period 100.00 -waveform {0 5} [get_pin top_*/u_spi_device/u_clk_spi_in_buf/gen_xilinx.u_impl_xilinx/bufg_i/O]
 create_clock -add -name clk_spi_out -period 100.00 -waveform {0 5} [get_pin top_*/u_spi_device/u_clk_spi_out_buf/gen_xilinx.u_impl_xilinx/bufg_i/O]
-set_clock_groups -group ${clks_10_unbuf} -group ${clks_48_unbuf} -group ${clks_aon_unbuf} -group clk_io_div2 -group clk_io_div4 -group lc_jtag_tck -group rv_jtag_tck -group clk_spi_in -group clk_spi_out -asynchronous
+set_clock_groups -group ${clks_10_unbuf} -group ${clks_48_unbuf} -group ${clks_aon_unbuf} -group clk_io_div2 -group clk_io_div4 -group lc_jtag_tck -group rv_jtag_tck -group clk_spi_in -group clk_spi_out -group sys_clk_pin -asynchronous
diff --git a/hw/top_earlgrey/data/pins_cw310.xdc b/hw/top_earlgrey/data/pins_cw310.xdc
index fabacc7..6a2f58a 100644
--- a/hw/top_earlgrey/data/pins_cw310.xdc
+++ b/hw/top_earlgrey/data/pins_cw310.xdc
@@ -60,18 +60,14 @@
 set_property -dict { PACKAGE_PIN E10 IOSTANDARD LVCMOS33 } [get_ports { IOC5 }]; #USERIOB-14
 set_property -dict { PACKAGE_PIN D8  IOSTANDARD LVCMOS33 } [get_ports { IOC6 }]; #USERIOB-16
 set_property -dict { PACKAGE_PIN D9  IOSTANDARD LVCMOS33 } [get_ports { IOC7 }]; #USERIOB-18
-set_property -dict { PACKAGE_PIN C9  IOSTANDARD LVCMOS33 } [get_ports { IOC8 }]; #USERIOB-24
-set_property -dict { PACKAGE_PIN D10 IOSTANDARD LVCMOS33 } [get_ports { IOC9 }]; #USERIOB-26
+#set_property -dict { PACKAGE_PIN C9  IOSTANDARD LVCMOS33 } [get_ports { IOC8 }]; #USERIOB-24
+#set_property -dict { PACKAGE_PIN D10 IOSTANDARD LVCMOS33 } [get_ports { IOC9 }]; #USERIOB-26
 ## ChipWhisperer 20-Pin Connector (J14)
-## TODO: This needs to be adapted to enable captures on the CW310. In particular,
-## - a precise capture trigger and the target clock need to be output, and
-## - a separate UART should be used for the simpleserial communication with the capture board.
-## See also chiplevel.sv.tpl
-#set_property -dict { PACKAGE_PIN AE25 IOSTANDARD LVCMOS33 } [get_ports { IOC11 }];      #J14 PIN 10 CWIO_IO1 (UART)
-#set_property -dict { PACKAGE_PIN AF25 IOSTANDARD LVCMOS33 } [get_ports { IOC10 }];      #J14 PIN 12 CWIO_IO2 (UART)
-#set_property -dict { PACKAGE_PIN AF24 IOSTANDARD LVCMOS33 } [get_ports { IOB6 }];       #J14 PIN 16 CWIO_IO4 (Trigger)
-#set_property -dict { PACKAGE_PIN AB21 IOSTANDARD LVCMOS33 } [get_ports { TIO_CLKOUT }]; #J14 PIN  4 CWIO_HS1 (Target clock)
+set_property -dict { PACKAGE_PIN AF25 IOSTANDARD LVCMOS33 } [get_ports { IOC9 }];       #J14 PIN 12 CWIO_IO2 - OpenTitan UART1 TX
+set_property -dict { PACKAGE_PIN AE25 IOSTANDARD LVCMOS33 } [get_ports { IOC8 }];       #J14 PIN 10 CWIO_IO1 - OpenTitan UART1 RX
+set_property -dict { PACKAGE_PIN AF24 IOSTANDARD LVCMOS33 } [get_ports { IO_TRIGGER }]; #J14 PIN 16 CWIO_IO4 - Capture Trigger
+set_property -dict { PACKAGE_PIN AB21 IOSTANDARD LVCMOS33 } [get_ports { IO_CLKOUT }];  #J14 PIN  4 CWIO_HS1 - Target clock
 ## TI TUSB1106 USB PHY usbdev testing
 set_property -dict { PACKAGE_PIN AF19  IOSTANDARD LVCMOS18 } [get_ports { IO_UPHY_DP_TX }]; #USRUSB_VPO
@@ -93,8 +89,8 @@
 set_property -dict { PACKAGE_PIN A13   IOSTANDARD LVCMOS33 } [get_ports { IO_USB_DPPULLUP0 }]; #USERIOB-27
 ## UART
-set_property -dict { PACKAGE_PIN AA22 IOSTANDARD LVCMOS33 } [get_ports { IOC11 }]; #UART1RXD
-set_property -dict { PACKAGE_PIN W24  IOSTANDARD LVCMOS33 } [get_ports { IOC10 }]; #UART1TXD
+set_property -dict { PACKAGE_PIN AA22 IOSTANDARD LVCMOS33 } [get_ports { IOC11 }]; #UART1RXD - OpenTitan UART0 TX
+set_property -dict { PACKAGE_PIN W24  IOSTANDARD LVCMOS33 } [get_ports { IOC10 }]; #UART1TXD - OpenTitan UART0 RX
 ## Configuration options, can be used for all designs
 set_property CONFIG_VOLTAGE 3.3 [current_design]
diff --git a/hw/top_earlgrey/data/top_earlgrey.hjson b/hw/top_earlgrey/data/top_earlgrey.hjson
index 6c9df7a..6df1b83 100644
--- a/hw/top_earlgrey/data/top_earlgrey.hjson
+++ b/hw/top_earlgrey/data/top_earlgrey.hjson
@@ -1396,6 +1396,9 @@
           { name: 'IO_UPHY_OE_N',     type: 'BidirStd', bank: 'VCC', connection: 'manual', desc: 'Manual USB UPHY signal for FPGA target'}
           { name: 'IO_UPHY_SENSE',    type: 'BidirStd', bank: 'VCC', connection: 'manual', desc: 'Manual USB UPHY signal for FPGA target'}
           { name: 'IO_UPHY_DPPULLUP', type: 'BidirStd', bank: 'VCC', connection: 'manual', desc: 'Manual USB UPHY signal for FPGA target'}
+          // ChipWhisperer IO
+          { name: 'IO_CLKOUT',        type: 'BidirStd', bank: 'VCC', connection: 'manual', desc: 'Manual clock output for SCA setup'}
+          { name: 'IO_TRIGGER',       type: 'BidirStd', bank: 'VCC', connection: 'manual', desc: 'Manual trigger output for SCA setup'}
diff --git a/hw/top_earlgrey/rtl/autogen/chip_earlgrey_asic.sv b/hw/top_earlgrey/rtl/autogen/chip_earlgrey_asic.sv
index 0ff33a3..4fa0959 100644
--- a/hw/top_earlgrey/rtl/autogen/chip_earlgrey_asic.sv
+++ b/hw/top_earlgrey/rtl/autogen/chip_earlgrey_asic.sv
@@ -1159,5 +1159,4 @@
 endmodule : chip_earlgrey_asic
diff --git a/hw/top_earlgrey/rtl/autogen/chip_earlgrey_cw310.sv b/hw/top_earlgrey/rtl/autogen/chip_earlgrey_cw310.sv
index c29e049..4870180 100644
--- a/hw/top_earlgrey/rtl/autogen/chip_earlgrey_cw310.sv
+++ b/hw/top_earlgrey/rtl/autogen/chip_earlgrey_cw310.sv
@@ -38,6 +38,8 @@
   inout IO_UPHY_OE_N, // Manual Pad
   inout IO_UPHY_SENSE, // Manual Pad
   inout IO_UPHY_DPPULLUP, // Manual Pad
+  inout IO_CLKOUT, // Manual Pad
+  inout IO_TRIGGER, // Manual Pad
   // Muxed Pads
   inout IOA0, // MIO Pad 0
@@ -216,6 +218,8 @@
   logic manual_in_io_uphy_oe_n, manual_out_io_uphy_oe_n, manual_oe_io_uphy_oe_n;
   logic manual_in_io_uphy_sense, manual_out_io_uphy_sense, manual_oe_io_uphy_sense;
   logic manual_in_io_uphy_dppullup, manual_out_io_uphy_dppullup, manual_oe_io_uphy_dppullup;
+  logic manual_in_io_clkout, manual_out_io_clkout, manual_oe_io_clkout;
+  logic manual_in_io_trigger, manual_out_io_trigger, manual_oe_io_trigger;
   pad_attr_t manual_attr_por_n;
   pad_attr_t manual_attr_usb_p;
@@ -233,6 +237,8 @@
   pad_attr_t manual_attr_io_uphy_oe_n;
   pad_attr_t manual_attr_io_uphy_sense;
   pad_attr_t manual_attr_io_uphy_dppullup;
+  pad_attr_t manual_attr_io_clkout;
+  pad_attr_t manual_attr_io_trigger;
   // Stubbed pad tie-off //
@@ -323,9 +329,11 @@
   padring #(
     // Padring specific counts may differ from pinmux config due
     // to custom, stubbed or added pads.
-    .NDioPads(20),
+    .NDioPads(22),
     .DioPadType ({
+      BidirStd, // IO_TRIGGER
+      BidirStd, // IO_CLKOUT
       BidirStd, // IO_UPHY_DPPULLUP
       BidirStd, // IO_UPHY_SENSE
       BidirStd, // IO_UPHY_OE_N
@@ -385,6 +393,8 @@
     .dio_in_raw_o ( ),
     // Chip IOs
     .dio_pad_io ({
+      IO_TRIGGER,
+      IO_CLKOUT,
@@ -441,6 +451,8 @@
     // Core-facing
     .dio_in_o ({
+        manual_in_io_trigger,
+        manual_in_io_clkout,
@@ -463,6 +475,8 @@
     .dio_out_i ({
+        manual_out_io_trigger,
+        manual_out_io_clkout,
@@ -485,6 +499,8 @@
     .dio_oe_i ({
+        manual_oe_io_trigger,
+        manual_oe_io_clkout,
@@ -507,6 +523,8 @@
     .dio_attr_i ({
+        manual_attr_io_trigger,
+        manual_attr_io_clkout,
@@ -724,7 +742,6 @@
   // for verilator purposes, make these two the same.
   lc_ctrl_pkg::lc_tx_t lc_clk_bypass;
 // TODO: align this with ASIC version to minimize the duplication.
 // Also need to add AST simulation and FPGA emulation models for things like entropy source -
 // otherwise Verilator / FPGA will hang.
@@ -803,5 +820,59 @@
+  /////////////////////////////////////////////////////
+  // ChipWhisperer CW310/305 Capture Board Interface //
+  /////////////////////////////////////////////////////
+  // This is used to interface OpenTitan as a target with a capture board trough the ChipWhisperer
+  // 20-pin connector. This is used for SCA/FI experiments only.
+  logic unused_inputs;
+  assign unused_inputs = manual_in_io_clkout ^ manual_in_io_trigger;
+  // Synchronous clock output to capture board.
+  assign manual_out_io_clkout = manual_in_io_clk;
+  assign manual_oe_io_clkout = 1'b1;
+  // Capture trigger.
+  // We use the clkmgr_aon_idle signal of the IP of interest to form a precise capture trigger.
+  // GPIO[11:9] is used for selecting the IP of interest. The encoding is as follows (see
+  // hint_names_e enum in clkmgr_pkg.sv for details).
+  //
+  // IP              - GPIO[11:9] - Index for clkmgr_aon_idle
+  // ------------------------------------------------------------
+  //  AES            -   000      -  0
+  //  HMAC           -   001      -  1
+  //  KMAC           -   010      -  2 - not implemented on CW305
+  //  OTBN (IO_DIV4) -   011      -  3 - not implemented on CW305
+  //  OTBN           -   100      -  4 - not implemented on CW305
+  //
+  // In addition, GPIO8 is used for gating the capture trigger in software.
+  // Note that GPIO[11:8] are connected to LED[3:0] on the CW310.
+  // On the CW305, GPIO[9,8] are connected to LED[5,7].
+  clkmgr_pkg::hint_names_e trigger_sel;
+  always_comb begin : trigger_sel_mux
+    unique case ({mio_out[MioOutGpioGpio11], mio_out[MioOutGpioGpio10], mio_out[MioOutGpioGpio9]})
+      3'b000:  trigger_sel = clkmgr_pkg::HintMainAes;
+      3'b001:  trigger_sel = clkmgr_pkg::HintMainHmac;
+      3'b010:  trigger_sel = clkmgr_pkg::HintMainKmac;
+      3'b011:  trigger_sel = clkmgr_pkg::HintIoDiv4Otbn;
+      3'b100:  trigger_sel = clkmgr_pkg::HintMainOtbn;
+      default: trigger_sel = clkmgr_pkg::HintMainAes;
+    endcase;
+  end
+  logic trigger, trigger_oe;
+  assign trigger = mio_out[MioOutGpioGpio8] & ~top_earlgrey.clkmgr_aon_idle[trigger_sel];
+  assign trigger_oe = mio_oe[MioOutGpioGpio8];
+  // Synchronize trigger to manual_in_io_clk.
+  prim_flop_2sync #(
+    .Width ( 2 )
+  ) u_sync_trigger (
+    .clk_i  ( manual_in_io_clk                              ),
+    .rst_ni ( manual_in_por_n                               ),
+    .d_i    ( {trigger,               trigger_oe}           ),
+    .q_o    ( {manual_out_io_trigger, manual_oe_io_trigger} )
+  );
 endmodule : chip_earlgrey_cw310
diff --git a/hw/top_earlgrey/rtl/autogen/chip_earlgrey_nexysvideo.sv b/hw/top_earlgrey/rtl/autogen/chip_earlgrey_nexysvideo.sv
index 6090110..1950931 100644
--- a/hw/top_earlgrey/rtl/autogen/chip_earlgrey_nexysvideo.sv
+++ b/hw/top_earlgrey/rtl/autogen/chip_earlgrey_nexysvideo.sv
@@ -724,7 +724,6 @@
   // for verilator purposes, make these two the same.
   lc_ctrl_pkg::lc_tx_t lc_clk_bypass;
 // TODO: align this with ASIC version to minimize the duplication.
 // Also need to add AST simulation and FPGA emulation models for things like entropy source -
 // otherwise Verilator / FPGA will hang.
@@ -808,5 +807,4 @@
 endmodule : chip_earlgrey_nexysvideo
diff --git a/hw/top_englishbreakfast/data/pins_cw305.xdc b/hw/top_englishbreakfast/data/pins_cw305.xdc
index ad5bcd2..1aec16d 100644
--- a/hw/top_englishbreakfast/data/pins_cw305.xdc
+++ b/hw/top_englishbreakfast/data/pins_cw305.xdc
@@ -40,6 +40,7 @@
 set_property -dict { PACKAGE_PIN F15 IOSTANDARD LVCMOS33 } [get_ports { IOB3 }]; #JP3.F15
 set_property -dict { PACKAGE_PIN E11 IOSTANDARD LVCMOS33 } [get_ports { IOB4 }]; #JP3.E11
 set_property -dict { PACKAGE_PIN F13 IOSTANDARD LVCMOS33 } [get_ports { IOB5 }]; #JP3.F13
+set_property -dict { PACKAGE_PIN A12 IOSTANDARD LVCMOS33 } [get_ports { IOB6 }]; #JP3.A12
 set_property -dict { PACKAGE_PIN C16 IOSTANDARD LVCMOS33 DRIVE 8 SLEW FAST } [get_ports { USB_P }]; #JP3.C16
 set_property -dict { PACKAGE_PIN D13 IOSTANDARD LVCMOS33 DRIVE 8 SLEW FAST } [get_ports { USB_N }]; #JP3.D13
@@ -49,14 +50,14 @@
 set_property -dict { PACKAGE_PIN G16 IOSTANDARD LVCMOS33 } [get_ports { IO_UTX_DEBUG }]; #JP3.G16 (UART) for debugging
-## Unused pins of JP3: A12, B12, F12
+## Unused pins of JP3: B12, F12
 ## 20-Pin Connector (JP1)
-set_property -dict { PACKAGE_PIN R16 IOSTANDARD LVCMOS33 } [get_ports { IOC11 }];     #JP1 PIN 10 (UART)
-set_property -dict { PACKAGE_PIN P16 IOSTANDARD LVCMOS33 } [get_ports { IOC10 }];     #JP1 PIN 12 (UART)
-set_property -dict { PACKAGE_PIN T14 IOSTANDARD LVCMOS33 } [get_ports { IOB6 }];    #JP1 PIN 16 TIO4 (Trigger)
-set_property -dict { PACKAGE_PIN M16 IOSTANDARD LVCMOS33 } [get_ports { TIO_CLKOUT }]; #JP1 PIN 4 TIO_HS1. Clock sync capture board.
+set_property -dict { PACKAGE_PIN R16 IOSTANDARD LVCMOS33 } [get_ports { IOC11 }];      #JP1 PIN 12 TIO2    - OpenTitan UART0 TX
+set_property -dict { PACKAGE_PIN P16 IOSTANDARD LVCMOS33 } [get_ports { IOC10 }];      #JP1 PIN 10 TIO1    - OpenTitan UART0 RX
+set_property -dict { PACKAGE_PIN T14 IOSTANDARD LVCMOS33 } [get_ports { IO_TRIGGER }]; #JP1 PIN 16 TIO4    - Capture Trigger
+set_property -dict { PACKAGE_PIN M16 IOSTANDARD LVCMOS33 } [get_ports { IO_CLKOUT }];  #JP1 PIN  4 TIO_HS1 - Target clock
 ## USB Connector
diff --git a/hw/top_englishbreakfast/data/top_englishbreakfast.hjson b/hw/top_englishbreakfast/data/top_englishbreakfast.hjson
index 5da4319..9c4c8d1 100644
--- a/hw/top_englishbreakfast/data/top_englishbreakfast.hjson
+++ b/hw/top_englishbreakfast/data/top_englishbreakfast.hjson
@@ -981,8 +981,9 @@
           { name: 'IO_USB_DNPULLUP0', type: 'BidirStd', bank: 'VCC', connection: 'manual', desc: 'Manual USB signal for FPGA target'}
           { name: 'IO_USB_DPPULLUP0', type: 'BidirStd', bank: 'VCC', connection: 'manual', desc: 'Manual USB signal for FPGA target'}
           // ChipWhisperer IO
-          { name: 'TIO_CLKOUT',       type: 'BidirStd', bank: 'VCC', connection: 'manual', desc: 'Manual clock output for SCA setup'}
-          { name: 'IO_UTX_DEBUG',     type: 'BidirStd', bank: 'VCC', connection: 'manual', desc: 'Manual UART TX debug output'      }
+          { name: 'IO_CLKOUT',        type: 'BidirStd', bank: 'VCC', connection: 'manual', desc: 'Manual clock output for SCA setup'}
+          { name: 'IO_TRIGGER',       type: 'BidirStd', bank: 'VCC', connection: 'manual', desc: 'Manual trigger output for SCA setup'}
+          { name: 'IO_UTX_DEBUG',     type: 'BidirStd', bank: 'VCC', connection: 'manual', desc: 'Manual UART TX debug output'}
diff --git a/sw/device/lib/pinmux.c b/sw/device/lib/pinmux.c
index 476429a..8861f54 100644
--- a/sw/device/lib/pinmux.c
+++ b/sw/device/lib/pinmux.c
@@ -41,7 +41,7 @@
-  // Configure UART RX input to connect to MIO pad IOR2
+  // Configure UART0 RX input to connect to MIO pad IOC10
   mmio_region_t reg32 = mmio_region_from_addr(
   uint32_t reg_value = kTopEarlgreyPinmuxInselIoc10;
@@ -51,7 +51,7 @@
   uint32_t mask = PINMUX_MIO_PERIPH_INSEL_0_IN_0_MASK;
   mmio_region_write32(reg32, reg_offset, reg_value & mask);
-  // Configure UART TX output to connect to MIO pad IOR3
+  // Configure UART0 TX output to connect to MIO pad IOC11
   reg32 =
       mmio_region_from_addr(PINMUX0_BASE_ADDR + PINMUX_MIO_OUTSEL_0_REG_OFFSET);
   reg_value = kTopEarlgreyPinmuxOutselUart0Tx;
@@ -60,4 +60,24 @@
   reg_offset = kTopEarlgreyPinmuxMioOutIoc11 << 2;
   mmio_region_write32(reg32, reg_offset, reg_value & mask);
+  // Configure UART1 RX input to connect to MIO pad IOC8
+  reg32 = mmio_region_from_addr(PINMUX0_BASE_ADDR +
+                                PINMUX_MIO_PERIPH_INSEL_0_REG_OFFSET);
+  reg_value = kTopEarlgreyPinmuxInselIoc8;
+  // We've got one insel configuration field per register. Hence, we have to
+  // convert the enumeration index into a byte address using << 2.
+  reg_offset = kTopEarlgreyPinmuxPeripheralInUart1Rx << 2;
+  mmio_region_write32(reg32, reg_offset, reg_value & mask);
+  // Configure UART1 TX output to connect to MIO pad IOC9
+  reg32 =
+      mmio_region_from_addr(PINMUX0_BASE_ADDR + PINMUX_MIO_OUTSEL_0_REG_OFFSET);
+  reg_value = kTopEarlgreyPinmuxOutselUart1Tx;
+  // We've got one insel configuration field per register. Hence, we have to
+  // convert the enumeration index into a byte address using << 2.
+  reg_offset = kTopEarlgreyPinmuxMioOutIoc9 << 2;
+  mmio_region_write32(reg32, reg_offset, reg_value & mask);
diff --git a/sw/device/sca/aes_serial.c b/sw/device/sca/aes_serial.c
index 372421e..a6cd8c0 100644
--- a/sw/device/sca/aes_serial.c
+++ b/sw/device/sca/aes_serial.c
@@ -198,18 +198,23 @@
  * UART.
 int main(void) {
-  const dif_uart_t *uart;
+  const dif_uart_t *uart1;
-  sca_get_uart(&uart);
+  sca_get_uart(&uart1);
-  simple_serial_init(uart);
+  LOG_INFO("Running AES serial");
+  LOG_INFO("Initializing simple serial interface to capture board.");
+  simple_serial_init(uart1);
   simple_serial_register_handler('k', aes_serial_set_key);
   simple_serial_register_handler('p', aes_serial_single_encrypt);
   simple_serial_register_handler('b', aes_serial_batch_encrypt);
+  LOG_INFO("Initializing AES unit.");
+  LOG_INFO("Starting simple serial packet handling.");
   while (true) {
diff --git a/sw/device/sca/lib/sca.c b/sw/device/sca/lib/sca.c
index 87c6147..758d330 100644
--- a/sw/device/sca/lib/sca.c
+++ b/sw/device/sca/lib/sca.c
@@ -3,6 +3,7 @@
 // SPDX-License-Identifier: Apache-2.0
 #include "sw/device/sca/lib/sca.h"
 #include "sw/device/lib/arch/device.h"
 #include "sw/device/lib/dif/dif_gpio.h"
 #include "sw/device/lib/dif/dif_rv_timer.h"
@@ -27,8 +28,15 @@
 enum {
    * GPIO capture trigger values.
+   *
+   * GPIO10[11:9]: Trigger select, 000 for AES, see chiplevel.sv.tpl for
+   *               details.
+   * GPIO8:        Trigger enable
-  kGpioCaptureTriggerHigh = 0x08200,
+  kGpioCaptureTriggerSelMask = 0x00E00,
+  kGpioCaptureTriggerEnMask = 0x00100,
+  kGpioCaptureTriggerSel = 0x00000,
+  kGpioCaptureTriggerHigh = 0x00100,
   kGpioCaptureTriggerLow = 0x00000,
    * RV timer settings.
@@ -37,7 +45,8 @@
   kRvTimerHart = kTopEarlgreyPlicTargetIbex0,
-static dif_uart_t uart;
+static dif_uart_t uart0;
+static dif_uart_t uart1;
 static dif_gpio_t gpio;
 static dif_rv_timer_t timer;
@@ -47,19 +56,27 @@
  * Initializes the UART peripheral.
 static void sca_init_uart(void) {
+  const dif_uart_config_t uart_config = {
+      .baudrate = kUartBaudrate,
+      .clk_freq_hz = kClockFreqPeripheralHz,
+      .parity_enable = kDifUartToggleDisabled,
+      .parity = kDifUartParityEven,
+  };
           .base_addr = mmio_region_from_addr(TOP_EARLGREY_UART0_BASE_ADDR),
-      &uart));
-      dif_uart_configure(&uart, (dif_uart_config_t){
-                                    .baudrate = kUartBaudrate,
-                                    .clk_freq_hz = kClockFreqPeripheralHz,
-                                    .parity_enable = kDifUartToggleDisabled,
-                                    .parity = kDifUartParityEven,
-                                }));
-  base_uart_stdout(&uart);
+      &uart0));
+  IGNORE_RESULT(dif_uart_configure(&uart0, uart_config));
+  base_uart_stdout(&uart0);
+  IGNORE_RESULT(dif_uart_init(
+      (dif_uart_params_t){
+          .base_addr = mmio_region_from_addr(TOP_EARLGREY_UART1_BASE_ADDR),
+      },
+      &uart1));
+  IGNORE_RESULT(dif_uart_configure(&uart1, uart_config));
@@ -69,8 +86,10 @@
   dif_gpio_params_t gpio_params = {
       .base_addr = mmio_region_from_addr(TOP_EARLGREY_GPIO_BASE_ADDR)};
   IGNORE_RESULT(dif_gpio_init(gpio_params, &gpio));
-      dif_gpio_output_set_enabled_all(&gpio, kGpioCaptureTriggerHigh));
+  IGNORE_RESULT(dif_gpio_output_set_enabled_all(
+      &gpio, kGpioCaptureTriggerSelMask | kGpioCaptureTriggerEnMask));
+  IGNORE_RESULT(dif_gpio_write_masked(&gpio, kGpioCaptureTriggerSelMask,
+                                      kGpioCaptureTriggerSel));
@@ -112,14 +131,16 @@
-void sca_get_uart(const dif_uart_t **uart_out) { *uart_out = &uart; }
+void sca_get_uart(const dif_uart_t **uart_out) { *uart_out = &uart1; }
 void sca_set_trigger_high() {
-  IGNORE_RESULT(dif_gpio_write_all(&gpio, kGpioCaptureTriggerHigh));
+  IGNORE_RESULT(dif_gpio_write_masked(&gpio, kGpioCaptureTriggerEnMask,
+                                      kGpioCaptureTriggerHigh));
 void sca_set_trigger_low() {
-  IGNORE_RESULT(dif_gpio_write_all(&gpio, kGpioCaptureTriggerLow));
+  IGNORE_RESULT(dif_gpio_write_masked(&gpio, kGpioCaptureTriggerEnMask,
+                                      kGpioCaptureTriggerLow));
 void sca_call_and_sleep(sca_callee callee, uint32_t sleep_cycles) {
diff --git a/sw/device/sca/lib/simple_serial.c b/sw/device/sca/lib/simple_serial.c
index 2c4ebe5..c07f94f 100644
--- a/sw/device/sca/lib/simple_serial.c
+++ b/sw/device/sca/lib/simple_serial.c
@@ -3,6 +3,7 @@
 // SPDX-License-Identifier: Apache-2.0
 #include "sw/device/sca/lib/simple_serial.h"
 #include "sw/device/lib/arch/device.h"
 #include "sw/device/lib/base/memory.h"
 #include "sw/device/lib/dif/dif_uart.h"
@@ -205,9 +206,12 @@
 void simple_serial_send_packet(const uint8_t cmd, const uint8_t *data,
                                size_t data_len) {
-  base_printf("%c", cmd);
+  char buf;
+  base_snprintf(&buf, 1, "%c", cmd);
+  IGNORE_RESULT(dif_uart_byte_send_polled(uart, buf));
   simple_serial_print_hex(data, data_len);
-  base_printf("\n");
+  base_snprintf(&buf, 1, "\n");
+  IGNORE_RESULT(dif_uart_byte_send_polled(uart, buf));
 void simple_serial_send_status(uint8_t res) {
@@ -215,7 +219,10 @@
 void simple_serial_print_hex(const uint8_t *data, size_t data_len) {
+  char buf[2];
   for (size_t i = 0; i < data_len; ++i) {
-    base_printf("%2x", data[i]);
+    base_snprintf(&buf[0], 2, "%2x", data[i]);
+    IGNORE_RESULT(dif_uart_byte_send_polled(uart, buf[0]));
+    IGNORE_RESULT(dif_uart_byte_send_polled(uart, buf[1]));
diff --git a/util/topgen/templates/chiplevel.sv.tpl b/util/topgen/templates/chiplevel.sv.tpl
index 2bd7596..5426c00 100644
--- a/util/topgen/templates/chiplevel.sv.tpl
+++ b/util/topgen/templates/chiplevel.sv.tpl
@@ -1060,11 +1060,6 @@
   // for verilator purposes, make these two the same.
   lc_ctrl_pkg::lc_tx_t lc_clk_bypass;
-% if target["name"] == "cw305":
-  // This is used for outputting the capture trigger
-  logic [pinmux_reg_pkg::NMioPads-1:0] mio_out_pre;
-% endif
 // TODO: align this with ASIC version to minimize the duplication.
 // Also need to add AST simulation and FPGA emulation models for things like entropy source -
 // otherwise Verilator / FPGA will hang.
@@ -1141,11 +1136,7 @@
     // Multiplexed I/O
     .mio_in_i        ( mio_in   ),
-% if target["name"] == "cw305":
-    .mio_out_o       ( mio_out_pre  ),
-% else:
     .mio_out_o       ( mio_out  ),
-% endif
     .mio_oe_o        ( mio_oe   ),
     // Dedicated I/O
@@ -1170,45 +1161,74 @@
 % endif
-## CW305 capture board interface                                 ##
+## CW310/305 capture board interface                             ##
-## TODO: This needs to be adapted to enable captures on the CW310. In particular,
-## - a precise capture trigger and the target clock need to be output, and
-## - a separate UART should be used for the simpleserial communication with the capture board.
-## See also pins_cw310.xdc
-% if target["name"] in ["cw305"]:
+% if target["name"] in ["cw310", "cw305"]:
-  //////////////////////////////////////
-  // Generate precise capture trigger //
-  //////////////////////////////////////
-  // TODO: make this a "manual" IO specific to the CW305 target
-  // such that we can decouple this from the MIO signals.
-  localparam int MioIdxTrigger = 15;
-  // To obtain a more precise capture trigger for side-channel analysis, we only forward the
-  // software-controlled capture trigger when the AES module is actually busy (performing
-  // either encryption/decryption or clearing internal registers).
-  // GPIO15 is used as capture trigger (mapped to IOB6 at the moment in pinmux.c).
-  always_comb begin : p_trigger
-    mio_out = mio_out_pre;
-    mio_out[MioIdxTrigger] = mio_out_pre[MioIdxTrigger] &
-                             ~top_${top["name"]}.clkmgr_aon_idle[clkmgr_pkg::HintMainAes];
-  end
-  //////////////////////
-  // ChipWhisperer IO //
-  //////////////////////
+  /////////////////////////////////////////////////////
+  // ChipWhisperer CW310/305 Capture Board Interface //
+  /////////////////////////////////////////////////////
+  // This is used to interface OpenTitan as a target with a capture board trough the ChipWhisperer
+  // 20-pin connector. This is used for SCA/FI experiments only.
   logic unused_inputs;
-  assign unused_inputs = manual_in_tio_clkout ^ manual_in_io_utx_debug;
+  % if target["name"] == "cw305":
+  assign unused_inputs = manual_in_io_clkout ^ manual_in_io_trigger ^ manual_in_io_utx_debug;
+  % else:
+  assign unused_inputs = manual_in_io_clkout ^ manual_in_io_trigger;
+  % endif
-  // Clock output to capture board.
-  assign manual_out_tio_clkout = manual_in_io_clk;
-  assign manual_oe_tio_clkout = 1'b1;
+  // Synchronous clock output to capture board.
+  assign manual_out_io_clkout = manual_in_io_clk;
+  assign manual_oe_io_clkout = 1'b1;
+  // Capture trigger.
+  // We use the clkmgr_aon_idle signal of the IP of interest to form a precise capture trigger.
+  // GPIO[11:9] is used for selecting the IP of interest. The encoding is as follows (see
+  // hint_names_e enum in clkmgr_pkg.sv for details).
+  //
+  // IP              - GPIO[11:9] - Index for clkmgr_aon_idle
+  // ------------------------------------------------------------
+  //  AES            -   000      -  0
+  //  HMAC           -   001      -  1
+  //  KMAC           -   010      -  2 - not implemented on CW305
+  //  OTBN (IO_DIV4) -   011      -  3 - not implemented on CW305
+  //  OTBN           -   100      -  4 - not implemented on CW305
+  //
+  // In addition, GPIO8 is used for gating the capture trigger in software.
+  // Note that GPIO[11:8] are connected to LED[3:0] on the CW310.
+  // On the CW305, GPIO[9,8] are connected to LED[5,7].
+  clkmgr_pkg::hint_names_e trigger_sel;
+  % if target["name"] == "cw305":
+  assign trigger_sel = mio_out[MioOutGpioGpio9] ? clkmgr_pkg::HintMainHmac :
+                                                  clkmgr_pkg::HintMainAes;
+  % else:
+  always_comb begin : trigger_sel_mux
+    unique case ({mio_out[MioOutGpioGpio11], mio_out[MioOutGpioGpio10], mio_out[MioOutGpioGpio9]})
+      3'b000:  trigger_sel = clkmgr_pkg::HintMainAes;
+      3'b001:  trigger_sel = clkmgr_pkg::HintMainHmac;
+      3'b010:  trigger_sel = clkmgr_pkg::HintMainKmac;
+      3'b011:  trigger_sel = clkmgr_pkg::HintIoDiv4Otbn;
+      3'b100:  trigger_sel = clkmgr_pkg::HintMainOtbn;
+      default: trigger_sel = clkmgr_pkg::HintMainAes;
+    endcase;
+  end
+  % endif
+  logic trigger, trigger_oe;
+  assign trigger = mio_out[MioOutGpioGpio8] & ~top_${top["name"]}.clkmgr_aon_idle[trigger_sel];
+  assign trigger_oe = mio_oe[MioOutGpioGpio8];
+  // Synchronize trigger to manual_in_io_clk.
+  prim_flop_2sync #(
+    .Width ( 2 )
+  ) u_sync_trigger (
+    .clk_i  ( manual_in_io_clk                              ),
+    .rst_ni ( manual_in_por_n                               ),
+    .d_i    ( {trigger,               trigger_oe}           ),
+    .q_o    ( {manual_out_io_trigger, manual_oe_io_trigger} )
+  );
 % endif
 ## This separate UART debugging output is needed for the CW305 only.
 % if target["name"] == "cw305":
@@ -1216,7 +1236,6 @@
   // UART Tx for debugging. The UART itself is connected to the capture board.
   assign manual_out_io_utx_debug = top_${top["name"]}.cio_uart0_tx_d2p;
   assign manual_oe_io_utx_debug = 1'b1;
 % endif
 endmodule : chip_${top["name"]}_${target["name"]}