[util, reggen] Support standardized cdc handling for regfile

- Support two asynchronous schemes on regfile
  - Fully asynchronous - a tlul_fifo_async directly instantiated in regfile
  - Mixed asynchronous - allow user to selectively mark registers as async
  - These two schemes are mutually exclusive

- Add an 'async' key for bus interfaces to indicate the reg block is fully asynchornous
- Add an 'async' key per register to indicate register clock domain
- Add extra clock/reset ports to reg module
- Add prim_subreg_cdc to handle regiser domain clock crossing

Signed-off-by: Timothy Chen <timothytim@google.com>

[aon_timer] Updates to aon_timer to experiment with new cdc scheme

Signed-off-by: Timothy Chen <timothytim@google.com>
diff --git a/util/reggen/bus_interfaces.py b/util/reggen/bus_interfaces.py
index 98ce995..32d3312 100644
--- a/util/reggen/bus_interfaces.py
+++ b/util/reggen/bus_interfaces.py
@@ -14,30 +14,36 @@
     def __init__(self,
                  has_unnamed_host: bool,
                  named_hosts: List[str],
+                 host_async: Dict[Optional[str], str],
                  has_unnamed_device: bool,
-                 named_devices: List[str]):
+                 named_devices: List[str],
+                 device_async: Dict[Optional[str], str]):
         assert has_unnamed_device or named_devices
         assert len(named_hosts) == len(set(named_hosts))
         assert len(named_devices) == len(set(named_devices))
 
         self.has_unnamed_host = has_unnamed_host
         self.named_hosts = named_hosts
+        self.host_async = host_async
         self.has_unnamed_device = has_unnamed_device
         self.named_devices = named_devices
+        self.device_async = device_async
 
     @staticmethod
     def from_raw(raw: object, where: str) -> 'BusInterfaces':
         has_unnamed_host = False
         named_hosts = []
+        host_async = {}
 
         has_unnamed_device = False
         named_devices = []
+        device_async = {}
 
         for idx, raw_entry in enumerate(check_list(raw, where)):
             entry_what = 'entry {} of {}'.format(idx + 1, where)
             ed = check_keys(raw_entry, entry_what,
                             ['protocol', 'direction'],
-                            ['name'])
+                            ['name', 'async'])
 
             protocol = check_str(ed['protocol'],
                                  'protocol field of ' + entry_what)
@@ -54,6 +60,9 @@
             name = check_optional_str(ed.get('name'),
                                       'name field of ' + entry_what)
 
+            async_clk = check_optional_str(ed.get('async'),
+                                           'async field of ' + entry_what)
+
             if direction == 'host':
                 if name is None:
                     if has_unnamed_host:
@@ -67,6 +76,9 @@
                                          'with name {!r} at {}'
                                          .format(name, where))
                     named_hosts.append(name)
+
+                if async_clk is not None:
+                    host_async[name] = async_clk
             else:
                 if name is None:
                     if has_unnamed_device:
@@ -81,11 +93,14 @@
                                          .format(name, where))
                     named_devices.append(name)
 
+                if async_clk is not None:
+                    device_async[name] = async_clk
+
         if not (has_unnamed_device or named_devices):
             raise ValueError('No device interface at ' + where)
 
-        return BusInterfaces(has_unnamed_host, named_hosts,
-                             has_unnamed_device, named_devices)
+        return BusInterfaces(has_unnamed_host, named_hosts, host_async,
+                             has_unnamed_device, named_devices, device_async)
 
     def has_host(self) -> bool:
         return bool(self.has_unnamed_host or self.named_hosts)
diff --git a/util/reggen/clocking.py b/util/reggen/clocking.py
index 41108fd..b2330c4 100644
--- a/util/reggen/clocking.py
+++ b/util/reggen/clocking.py
@@ -7,6 +7,7 @@
 from typing import Dict, List, Optional
 
 from .lib import check_keys, check_list, check_bool, check_optional_name
+import re
 
 
 class ClockingItem:
@@ -14,12 +15,14 @@
                  clock: Optional[str],
                  reset: Optional[str],
                  idle: Optional[str],
-                 primary: bool):
+                 primary: bool,
+                 clock_base_name: Optional[str]):
         if primary:
             assert clock is not None
             assert reset is not None
 
         self.clock = clock
+        self.clock_base_name = clock_base_name
         self.reset = reset
         self.primary = primary
         self.idle = idle
@@ -35,6 +38,15 @@
         primary = check_bool(rd.get('primary', only_item),
                              'primary field of ' + what)
 
+        match = re.match(r'^clk_([A-Za-z0-9_]+)_i', str(clock))
+        if not clock or clock in ['clk_i', 'scan_clk_i']:
+            clock_base_name = ""
+        elif match:
+            clock_base_name = match.group(1)
+        else:
+            raise ValueError(f'clock name must be of the form clk_*_i or clk_i. '
+                             f'{clock} is illegal.')
+
         if primary:
             if clock is None:
                 raise ValueError('No clock signal for primary '
@@ -43,7 +55,7 @@
                 raise ValueError('No reset signal for primary '
                                  f'clocking item at {what}.')
 
-        return ClockingItem(clock, reset, idle, primary)
+        return ClockingItem(clock, reset, idle, primary, clock_base_name)
 
     def _asdict(self) -> Dict[str, object]:
         ret = {}  # type: Dict[str, object]
@@ -100,3 +112,15 @@
 
     def reset_signals(self) -> List[str]:
         return [item.reset for item in self.items if item.reset is not None]
+
+    def get_by_clock(self, name: Optional[str]) -> ClockingItem:
+        ret = None
+        for item in self.items:
+            if name == item.clock:
+                 ret = item
+                 break
+
+        if ret is None:
+            raise ValueError(f'The requested clock {name} does not exist.')
+        else:
+            return ret
diff --git a/util/reggen/ip_block.py b/util/reggen/ip_block.py
index 06d473c..9a59d3c 100644
--- a/util/reggen/ip_block.py
+++ b/util/reggen/ip_block.py
@@ -202,8 +202,6 @@
 
         scan = check_bool(rd.get('scan', False), 'scan field of ' + what)
 
-        reg_blocks = RegBlock.build_blocks(init_block, rd['registers'])
-
         r_inter_signals = check_list(rd.get('inter_signal_list', []),
                                      'inter_signal_list field')
         inter_signals = [
@@ -224,6 +222,10 @@
         clocking = Clocking.from_raw(rd['clocking'],
                                      'clocking field of ' + what)
 
+        reg_blocks = RegBlock.build_blocks(init_block, rd['registers'],
+                                           bus_interfaces,
+                                           clocking)
+
         xputs = (
             Signal.from_raw_list('available_inout_list for block ' + name,
                                  rd.get('available_inout_list', [])),
diff --git a/util/reggen/multi_register.py b/util/reggen/multi_register.py
index 1963bd4..004bd26 100644
--- a/util/reggen/multi_register.py
+++ b/util/reggen/multi_register.py
@@ -5,6 +5,7 @@
 from typing import Dict, List
 
 from reggen import register
+from .clocking import Clocking
 from .field import Field
 from .lib import check_keys, check_str, check_name, check_bool
 from .params import ReggenParams
@@ -38,7 +39,13 @@
     'compact': [
         'pb', "If true, allow multireg compacting."
         "If false, do not compact."
-    ]
+    ],
+    'cdc': [
+        's',
+        "indicates the register must cross to a different "
+        "clock domain before use.  The value shown here "
+        "should correspond to one of the module's clocks."
+    ],
 })
 
 
@@ -48,7 +55,8 @@
                  addrsep: int,
                  reg_width: int,
                  params: ReggenParams,
-                 raw: object):
+                 raw: object,
+                 clocks: Clocking):
         super().__init__(offset)
 
         rd = check_keys(raw, 'multireg',
@@ -64,7 +72,12 @@
         reg_rd = {key: value
                   for key, value in rd.items()
                   if key in reg_allowed_keys}
-        self.reg = Register.from_raw(reg_width, offset, params, reg_rd)
+        self.reg = Register.from_raw(reg_width, offset, params, reg_rd, clocks)
+
+        # The entire multi-reg block is always on the same clock
+        # This is guaranteed by design
+        self.async_name = self.reg.async_name
+        self.async_clk = self.reg.async_clk
 
         self.cname = check_name(rd['cname'],
                                 'cname field of multireg {}'
diff --git a/util/reggen/reg_block.py b/util/reggen/reg_block.py
index fa9d3c4..b75415d 100644
--- a/util/reggen/reg_block.py
+++ b/util/reggen/reg_block.py
@@ -9,6 +9,8 @@
 
 from .alert import Alert
 from .access import SWAccess, HWAccess
+from .bus_interfaces import BusInterfaces
+from .clocking import Clocking, ClockingItem
 from .field import Field
 from .signal import Signal
 from .lib import check_int, check_list, check_str_dict, check_str
@@ -25,6 +27,8 @@
         self._reg_width = reg_width
         self._params = params
 
+        self.name = ""  # type: str
+        self.clocks = {}  # type: Dict[str, ClockingItem]
         self.offset = 0
         self.multiregs = []  # type: List[MultiRegister]
         self.registers = []  # type: List[Register]
@@ -57,9 +61,14 @@
         # A list of all write enable names
         self.wennames = []  # type: List[str]
 
+        # Boolean indication that the block is fully asynchronous
+        self.async_if = False
+
     @staticmethod
     def build_blocks(block: 'RegBlock',
-                     raw: object) -> Dict[Optional[str], 'RegBlock']:
+                     raw: object,
+                     bus: BusInterfaces,
+                     clocks: Clocking) -> Dict[Optional[str], 'RegBlock']:
         '''Build a dictionary of blocks for a 'registers' field in the hjson
 
         There are two different syntaxes we might see here. The simple syntax
@@ -76,7 +85,10 @@
         '''
         if isinstance(raw, list):
             # This is the simple syntax
-            block.add_raw_registers(raw, 'registers field at top-level')
+            block.add_raw_registers(raw,
+                                    'registers field at top-level',
+                                    clocks,
+                                    bus.device_async.get(None))
             return {None: block}
 
         # This is the more complicated syntax
@@ -101,22 +113,38 @@
             block.add_raw_registers(rb_val,
                                     'item {} of the registers '
                                     'dictionary at top-level'
-                                    .format(idx + 1))
+                                    .format(idx + 1),
+                                    clocks,
+                                    bus.device_async.get(r_key))
             block.validate()
 
             assert rb_key not in ret
+            block.name = rb_key
             ret[rb_key] = block
 
         return ret
 
-    def add_raw_registers(self, raw: object, what: str) -> None:
+    def add_raw_registers(self,
+                          raw: object,
+                          what: str,
+                          clocks: Clocking,
+                          async_if: Optional[str]) -> None:
+
+        # the interface is fully asynchronous
+        if async_if:
+            self.async_if = True
+            self.clocks[async_if] = clocks.get_by_clock(async_if)
+
         rl = check_list(raw, 'registers field at top-level')
         for entry_idx, entry_raw in enumerate(rl):
             where = ('entry {} of the top-level registers field'
                      .format(entry_idx + 1))
-            self.add_raw(where, entry_raw)
+            self.add_raw(where, entry_raw, clocks)
 
-    def add_raw(self, where: str, raw: object) -> None:
+    def add_raw(self,
+                where: str,
+                raw: object,
+                clocks: Clocking) -> None:
         entry = check_str_dict(raw, where)
 
         handlers = {
@@ -151,14 +179,49 @@
         entry_where = ('At offset {:#x}, {}, type {!r}'
                        .format(self.offset, where, entry_type))
 
-        handlers[entry_type](entry_where, entry_body)
+        handlers[entry_type](entry_where, entry_body, clocks)
 
-    def _handle_register(self, where: str, body: object) -> None:
+    def _validate_async(self, name: Optional[str], clk: object) -> None:
+        '''Check for async definition consistency
+
+        If a reg block is marked fully asynchronous through its bus interface,
+        its register definition cannot also mark individual registers with
+        asynchronous designations.
+
+        The two asynchronous regfile schemes are mutually exclusive.
+        '''
+
+        if self.name:
+            block_name = self.name
+        else:
+            block_name = "Default"
+
+        if self.async_if and name:
+            raise ValueError(f'''
+            {block_name} register block has incompatible async definitions.
+            The corresponding device interface is marked fully async, however
+            there are individual registers that also contain the async_clk
+            designation, this is not allowed.
+
+            Either remove all register async_clk designations, or remove
+            async designation of the bus interface.
+            ''')
+
+        # If there is an asynchronous clock defined, then the clock must be a
+        # valid clocking item
+        if name:
+            assert isinstance(clk, ClockingItem)
+            self.clocks[name] = clk
+
+    def _handle_register(self, where: str, body: object, clocks: Clocking) -> None:
         reg = Register.from_raw(self._reg_width,
-                                self.offset, self._params, body)
+                                self.offset, self._params, body, clocks)
+
+        self._validate_async(reg.async_name, reg.async_clk)
+
         self.add_register(reg)
 
-    def _handle_reserved(self, where: str, body: object) -> None:
+    def _handle_reserved(self, where: str, body: object, clocks: Optional[Clocking]) -> None:
         nreserved = check_int(body, 'body of ' + where)
         if nreserved <= 0:
             raise ValueError('Reserved count in {} is {}, '
@@ -167,7 +230,7 @@
 
         self.offset += self._addrsep * nreserved
 
-    def _handle_skipto(self, where: str, body: object) -> None:
+    def _handle_skipto(self, where: str, body: object, clocks: Optional[Clocking]) -> None:
         skipto = check_int(body, 'body of ' + where)
         if skipto < self.offset:
             raise ValueError('Destination of skipto in {} is {:#x}, '
@@ -179,7 +242,7 @@
                              .format(where, skipto, self._addrsep))
         self.offset = skipto
 
-    def _handle_window(self, where: str, body: object) -> None:
+    def _handle_window(self, where: str, body: object, clocks: Optional[Clocking]) -> None:
         window = Window.from_raw(self.offset,
                                  self._reg_width, self._params, body)
         if window.name is not None:
@@ -191,9 +254,14 @@
                                          self.name_to_offset[lname]))
         self.add_window(window)
 
-    def _handle_multireg(self, where: str, body: object) -> None:
+    def _handle_multireg(self, where: str, body: object, clocks: Clocking) -> None:
         mr = MultiRegister(self.offset,
-                           self._addrsep, self._reg_width, self._params, body)
+                           self._addrsep, self._reg_width, self._params, body,
+                           clocks)
+
+        # validate async schemes
+        self._validate_async(mr.async_name, mr.async_clk)
+
         for reg in mr.regs:
             lname = reg.name.lower()
             if lname in self.name_to_offset:
@@ -344,6 +412,8 @@
         reg = Register(self.offset,
                        reg_name,
                        reg_desc,
+                       async_name="",
+                       async_clk=None,
                        hwext=is_testreg,
                        hwqe=is_testreg,
                        hwre=False,
diff --git a/util/reggen/reg_top.sv.tpl b/util/reggen/reg_top.sv.tpl
index aa29048..b9d9d34 100644
--- a/util/reggen/reg_top.sv.tpl
+++ b/util/reggen/reg_top.sv.tpl
@@ -9,6 +9,7 @@
   from reggen.lib import get_basename
   from reggen.register import Register
   from reggen.multi_register import MultiRegister
+  from reggen.bits import Bits
 
   num_wins = len(rb.windows)
   num_wins_width = ((num_wins+1).bit_length()) - 1
@@ -42,12 +43,28 @@
   common_data_intg_gen = 0 if rb.has_data_intg_passthru else 1
   adapt_data_intg_gen = 1 if rb.has_data_intg_passthru else 0
   assert common_data_intg_gen != adapt_data_intg_gen
+
+  # declare a fully asynchronous interface
+  reg_clk_expr = "clk_i"
+  reg_rst_expr = "rst_ni"
+  tl_h2d_expr = "tl_i"
+  tl_d2h_expr = "tl_o"
+  if rb.async_if:
+    tl_h2d_expr = "tl_async_h2d"
+    tl_d2h_expr = "tl_async_d2h"
+    for clock in rb.clocks.values():
+      reg_clk_expr = clock.clock
+      reg_rst_expr = clock.reset
 %>
 `include "prim_assert.sv"
 
 module ${mod_name} (
   input clk_i,
   input rst_ni,
+% for clock in rb.clocks.values():
+  input ${clock.clock},
+  input ${clock.reset},
+% endfor
 
   input  tlul_pkg::tl_h2d_t tl_i,
   output tlul_pkg::tl_d2h_t tl_o,
@@ -94,21 +111,40 @@
   logic          addrmiss, wr_err;
 
   logic [DW-1:0] reg_rdata_next;
+  logic reg_busy;
 
   tlul_pkg::tl_h2d_t tl_reg_h2d;
   tlul_pkg::tl_d2h_t tl_reg_d2h;
 % endif
 
+  % if rb.async_if:
+  tlul_pkg::tl_h2d_t tl_async_h2d;
+  tlul_pkg::tl_d2h_t tl_async_d2h;
+  tlul_fifo_async #(
+    .ReqDepth(2),
+    .RspDepth(2)
+  ) u_if_sync (
+    .clk_h_i(clk_i),
+    .rst_h_ni(rst_ni),
+    .clk_d_i(${reg_clk_expr}),
+    .rst_d_ni(${reg_rst_expr}),
+    .tl_h_i(tl_i),
+    .tl_h_o(tl_o),
+    .tl_d_o(${tl_h2d_expr}),
+    .tl_d_i(${tl_d2h_expr})
+  );
+  % endif
+
   // incoming payload check
   logic intg_err;
   tlul_cmd_intg_chk u_chk (
-    .tl_i,
+    .tl_i(${tl_h2d_expr}),
     .err_o(intg_err)
   );
 
   logic intg_err_q;
-  always_ff @(posedge clk_i or negedge rst_ni) begin
-    if (!rst_ni) begin
+  always_ff @(posedge ${reg_clk_expr} or negedge ${reg_rst_expr}) begin
+    if (!${reg_rst_expr}) begin
       intg_err_q <= '0;
     end else if (intg_err) begin
       intg_err_q <= 1'b1;
@@ -126,17 +162,17 @@
     .EnableDataIntgGen(${common_data_intg_gen})
   ) u_rsp_intg_gen (
     .tl_i(tl_o_pre),
-    .tl_o
+    .tl_o(${tl_d2h_expr})
   );
 
 % if num_dsp == 1:
   ## Either no windows (and just registers) or no registers and only
   ## one window.
   % if num_wins == 0:
-  assign tl_reg_h2d = tl_i;
+  assign tl_reg_h2d = ${tl_h2d_expr};
   assign tl_o_pre   = tl_reg_d2h;
   % else:
-  assign tl_win_o = tl_i;
+  assign tl_win_o = ${tl_h2d_expr};
   assign tl_o_pre = tl_win_i;
   % endif
 % else:
@@ -183,9 +219,9 @@
     .DReqDepth  ({${num_dsp}{4'h0}}),
     .DRspDepth  ({${num_dsp}{4'h0}})
   ) u_socket (
-    .clk_i,
-    .rst_ni,
-    .tl_h_i (tl_i),
+    .clk_i  (${reg_clk_expr}),
+    .rst_ni (${reg_rst_expr}),
+    .tl_h_i (${tl_h2d_expr}),
     .tl_h_o (tl_o_pre),
     .tl_d_o (tl_socket_h2d),
     .tl_d_i (tl_socket_d2h),
@@ -202,12 +238,12 @@
       base_addr = w.offset
       limit_addr = w.offset + w.size_in_bytes
 
-      hi_check = 'tl_i.a_address[AW-1:0] < {}'.format(limit_addr)
+      hi_check = f'{tl_h2d_expr}.a_address[AW-1:0] < {limit_addr}'
       addr_checks = []
       if base_addr > 0:
-        addr_checks.append('tl_i.a_address[AW-1:0] >= {}'.format(base_addr))
+        addr_checks.append(f'{tl_h2d_expr}.a_address[AW-1:0] >= {base_addr}')
       if limit_addr < 2**addr_width:
-        addr_checks.append('tl_i.a_address[AW-1:0] < {}'.format(limit_addr))
+        addr_checks.append(f'{tl_h2d_expr}.a_address[AW-1:0] < {limit_addr}')
 
       addr_test = ' && '.join(addr_checks)
 %>\
@@ -231,8 +267,8 @@
     .RegDw(DW),
     .EnableDataIntgGen(${adapt_data_intg_gen})
   ) u_reg_if (
-    .clk_i,
-    .rst_ni,
+    .clk_i  (${reg_clk_expr}),
+    .rst_ni (${reg_rst_expr}),
 
     .tl_i (tl_reg_h2d),
     .tl_o (tl_reg_d2h),
@@ -242,10 +278,32 @@
     .addr_o  (reg_addr),
     .wdata_o (reg_wdata),
     .be_o    (reg_be),
+    .busy_i  (reg_busy),
     .rdata_i (reg_rdata),
     .error_i (reg_error)
   );
 
+% if not rb.async_if:
+  // cdc oversampling signals
+  % for clock in rb.clocks.values():
+  <%
+    clk_name = clock.clock_base_name
+    tgl_expr = clk_name + "_tgl"
+    cname = clock.clock
+    rname = clock.reset
+  %>\
+  logic sync_${clk_name}_update;
+  prim_pulse_sync u_${tgl_expr} (
+    .clk_src_i(${cname}),
+    .rst_src_ni(${rname}),
+    .src_pulse_i(1'b1),
+    .clk_dst_i(${reg_clk_expr}),
+    .rst_dst_ni(${reg_rst_expr}),
+    .dst_pulse_o(sync_${clk_name}_update)
+  );
+  % endfor
+% endif
+
   % if block.expose_reg_if:
   assign reg2hw.reg_if.reg_we    = reg_we;
   assign reg2hw.reg_if.reg_re    = reg_re;
@@ -267,7 +325,7 @@
         fld_suff = '_' + f.name.lower() if len(r.fields) > 1 else ''
         sig_name = r.name.lower() + fld_suff
 %>\
-${field_sig_decl(f, sig_name, r.hwext, r.shadowed)}\
+${field_sig_decl(f, sig_name, r.hwext, r.shadowed, r.async_clk)}\
     % endfor
   % endfor
 
@@ -401,6 +459,35 @@
       end
     endcase
   end
+
+  // register busy
+  % if rb.async_if:
+  assign reg_busy = '0;
+  % else:
+  always_comb begin
+    reg_busy = '0;
+    unique case (1'b1)
+      % for i, r in enumerate(regs_flat):
+        % if r.async_clk and len(r.fields) == 1:
+      addr_hit[${i}]: begin
+        reg_busy = ${r.name.lower() + "_busy"};
+      end
+        % elif r.async_clk:
+      addr_hit[${i}]: begin
+        reg_busy =
+          % for f in r.fields:
+          ${r.name.lower() + "_" + f.name.lower() + "_busy"}${";" if loop.last else " |"}
+          % endfor
+      end
+        % endif
+      % endfor
+      default: begin
+        reg_busy  = '0;
+      end
+    endcase
+  end
+  % endif
+
 % endif
 
   // Unused signal tieoff
@@ -420,12 +507,12 @@
 % if rb.all_regs:
 
   // Assertions for Register Interface
-  `ASSERT_PULSE(wePulse, reg_we)
-  `ASSERT_PULSE(rePulse, reg_re)
+  `ASSERT_PULSE(wePulse, reg_we, ${reg_clk_expr}, !${reg_rst_expr})
+  `ASSERT_PULSE(rePulse, reg_re, ${reg_clk_expr}, !${reg_rst_expr})
 
-  `ASSERT(reAfterRv, $rose(reg_re || reg_we) |=> tl_o.d_valid)
+  `ASSERT(reAfterRv, $rose(reg_re || reg_we) |=> tl_o_pre.d_valid, ${reg_clk_expr}, !${reg_rst_expr})
 
-  `ASSERT(en2addrHit, (reg_we || reg_re) |-> $onehot0(addr_hit))
+  `ASSERT(en2addrHit, (reg_we || reg_re) |-> $onehot0(addr_hit), ${reg_clk_expr}, !${reg_rst_expr})
 
   // this is formulated as an assumption such that the FPV testbenches do disprove this
   // property by mistake
@@ -453,13 +540,16 @@
   logic ${reg.name.lower()}_we;
   % endif
 </%def>\
-<%def name="field_sig_decl(field, sig_name, hwext, shadowed)">\
+<%def name="field_sig_decl(field, sig_name, hwext, shadowed, async_clk)">\
   % if field.swaccess.allows_read():
   logic ${str_arr_sv(field.bits)}${sig_name}_qs;
   % endif
   % if field.swaccess.allows_write():
   logic ${str_arr_sv(field.bits)}${sig_name}_wd;
   % endif
+  % if async_clk:
+  logic ${str_arr_sv(Bits(0,0))}${sig_name}_busy;
+  % endif
 </%def>\
 <%def name="finst_gen(reg, field, finst_name, fsig_name)">\
 <%
@@ -498,10 +588,27 @@
 
     qs_expr = f'{finst_name}_qs' if field.swaccess.allows_read() else ''
 %>\
+<%
+    clk_expr = reg.async_clk.clock if reg.async_clk else reg_clk_expr
+    rst_expr = reg.async_clk.reset if reg.async_clk else reg_rst_expr
+    if reg.async_clk:
+      update_expr = "sync_" + reg.async_clk.clock.strip("_iclk_")+ "_update"
+%>\
   % if reg.hwext:       ## if hwext, instantiate prim_subreg_ext
-  prim_subreg_ext #(
+<%
+    subreg_block = "prim_subreg_ext_async" if reg.async_clk else "prim_subreg_ext"
+%>\
+  ${subreg_block} #(
     .DW    (${field.bits.width()})
   ) u_${finst_name} (
+    % if reg.async_clk:
+    .clk_src_i    (${reg_clk_expr}),
+    .rst_src_ni   (${reg_rst_expr}),
+    .clk_dst_i    (${clk_expr}),
+    .rst_dst_ni   (${rst_expr}),
+    .src_update_i (${update_expr}),
+    .src_busy_o   (${finst_name}_busy),
+    % endif
     .re     (${re_expr}),
     .we     (${we_expr}),
     .wd     (${wd_expr}),
@@ -527,14 +634,34 @@
     % if is_const_reg:
   // constant-only read
   assign ${finst_name}_qs = ${resval_expr};
+    % elif reg.async_clk:
+  prim_subreg_async #(
+    .DW      (${field.bits.width()}),
+    .SwAccess(prim_subreg_pkg::SwAccess${field.swaccess.value[1].name.upper()}),
+    .RESVAL  (${resval_expr})
+  ) u_${finst_name} (
+    .clk_src_i    (${reg_clk_expr}),
+    .rst_src_ni   (${reg_rst_expr}),
+    .clk_dst_i    (${clk_expr}),
+    .rst_dst_ni   (${rst_expr}),
+    .src_update_i (${update_expr}),
+    .src_we_i     (${we_expr}),
+    .src_wd_i     (${wd_expr}),
+    .dst_de_i     (${de_expr}),
+    .dst_d_i      (${d_expr}),
+    .src_busy_o   (${finst_name}_busy),
+    .src_qs_o     (${qs_expr}),
+    .dst_qe_o     (${qe_expr}),
+    .q            (${q_expr})
+  );
     % else:
   ${subreg_block} #(
     .DW      (${field.bits.width()}),
     .SwAccess(prim_subreg_pkg::SwAccess${field.swaccess.value[1].name.upper()}),
     .RESVAL  (${resval_expr})
   ) u_${finst_name} (
-    .clk_i   (clk_i),
-    .rst_ni  (rst_ni),
+    .clk_i   (${reg_clk_expr}),
+    .rst_ni  (${reg_rst_expr}),
 
     // from register interface
       % if reg.shadowed:
@@ -594,3 +721,25 @@
         reg_rdata_next[${str_bits_sv(field.bits)}] = '0;
 % endif
 </%def>\
+<%def name="reg_enable_gen(reg, idx)">\
+  % if reg.needs_re():
+  assign ${reg.name.lower()}_re = addr_hit[${idx}] & reg_re & !reg_error;
+  % endif
+  % if reg.needs_we():
+  assign ${reg.name.lower()}_we = addr_hit[${idx}] & reg_we & !reg_error;
+  % endif
+</%def>\
+<%def name="reg_cdc_gen(field, sig_name, hwext, shadowed, idx)">\
+<%
+    needs_wd = field.swaccess.allows_write()
+    space = '\n' if needs_wd or needs_re else ''
+%>\
+${space}\
+% if needs_wd:
+  % if field.swaccess.swrd() == SwRdAccess.RC:
+  assign ${sig_name}_wd = '1;
+  % else:
+  assign ${sig_name}_wd = reg_wdata[${str_bits_sv(field.bits)}];
+  % endif
+% endif
+</%def>\
diff --git a/util/reggen/register.py b/util/reggen/register.py
index 86cd005..537ac8c 100644
--- a/util/reggen/register.py
+++ b/util/reggen/register.py
@@ -5,6 +5,7 @@
 from typing import Dict, List, Optional
 
 from .access import SWAccess, HWAccess
+from .clocking import Clocking
 from .field import Field
 from .lib import (check_keys, check_str, check_name, check_bool,
                   check_list, check_str_list, check_int)
@@ -20,6 +21,12 @@
 }
 
 OPTIONAL_FIELDS = {
+    'async': [
+        's',
+        "indicates the register must cross to a different "
+        "clock domain before use.  The value shown here "
+        "should correspond to one of the module's clocks."
+    ],
     'swaccess': [
         's',
         "software access permission to use for "
@@ -82,6 +89,8 @@
                  offset: int,
                  name: str,
                  desc: str,
+                 async_name: str,
+                 async_clk: object,
                  hwext: bool,
                  hwqe: bool,
                  hwre: bool,
@@ -95,6 +104,8 @@
         super().__init__(offset)
         self.name = name
         self.desc = desc
+        self.async_name = async_name
+        self.async_clk = async_clk
         self.hwext = hwext
         self.hwqe = hwqe
         self.hwre = hwre
@@ -182,7 +193,8 @@
     def from_raw(reg_width: int,
                  offset: int,
                  params: ReggenParams,
-                 raw: object) -> 'Register':
+                 raw: object,
+                 clocks: Clocking) -> 'Register':
         rd = check_keys(raw, 'register',
                         list(REQUIRED_FIELDS.keys()),
                         list(OPTIONAL_FIELDS.keys()))
@@ -190,6 +202,20 @@
         name = check_name(rd['name'], 'name of register')
         desc = check_str(rd['desc'], 'desc for {} register'.format(name))
 
+        async_name = check_str(rd.get('async', ''), 'async clock for {} register'.format(name))
+        async_clk = None
+
+        if async_name:
+            valid_clocks = clocks.clock_signals()
+            if async_name not in valid_clocks:
+                raise ValueError('async clock {} defined for {} does not exist '
+                                 'in valid module clocks {}.'
+                                 .format(async_name,
+                                         name,
+                                         valid_clocks))
+            else:
+                async_clk = clocks.get_by_clock(async_name)
+
         swaccess = SWAccess('{} register'.format(name),
                             rd.get('swaccess', 'none'))
         hwaccess = HWAccess('{} register'.format(name),
@@ -260,7 +286,7 @@
                                            'storage_err_alert for {} register'
                                            .format(name))
 
-        return Register(offset, name, desc,
+        return Register(offset, name, desc, async_name, async_clk,
                         hwext, hwqe, hwre, regwen,
                         tags, resval, shadowed, fields,
                         update_err_alert, storage_err_alert)
@@ -380,7 +406,7 @@
         # we've replicated fields).
         new_resval = None
 
-        return Register(offset, new_name, self.desc,
+        return Register(offset, new_name, self.desc, self.async_name, self.async_clk,
                         self.hwext, self.hwqe, self.hwre, new_regwen,
                         self.tags, new_resval, self.shadowed, new_fields,
                         self.update_err_alert, self.storage_err_alert)