[doc] Created reggen preprocessor

The preprocessor finds ip configs in SUMMARY.md and converts them into a
html document with tables for hardware interfaces and registers.

Signed-off-by: Hugo McNally <hugo.mcnally@gmail.com>
diff --git a/book.toml b/book.toml
index 1a1eaf2..84676f5 100644
--- a/book.toml
+++ b/book.toml
@@ -18,3 +18,8 @@
 
 [preprocessor.readme2index]
 command = "./util/mdbook_readme2index.py"
+
+[preprocessor.reggen]
+command = "./util/mdbook_reggen.py"
+# Python RegEx identifying ip block config paths.
+ip-cfg-py-regex = '(ip|ip_autogen)/.+/data/.+\.hjson'
diff --git a/util/mdbook/utils.py b/util/mdbook/utils.py
index cf0c320..3533145 100644
--- a/util/mdbook/utils.py
+++ b/util/mdbook/utils.py
@@ -3,7 +3,50 @@
 # SPDX-License-Identifier: Apache-2.0
 """Common utilities used by mdbook pre-processors."""
 
-from typing import List, Any, Dict, Generator
+import sys
+import re
+from os import path
+from typing import List, Any, Dict, Generator, Set
+from pathlib import Path
+
+LINK_PATTERN_STR = r"\[(.*?)\]\(([^#\?\)]*)(.*?)\)"
+LINK_PATTERN = re.compile(LINK_PATTERN_STR)
+
+
+def change_link_ext(
+        file_list: Set[Path],
+        content: str,
+        new_suffix: str,
+        book_root: Path,
+        page_path: Path,
+) -> str:
+    def suffix_swap(match: re.Match) -> str:
+        """Swaps the extension of the file being linked to if it is a ip block config."""
+        try:
+            # relative_to can fail with a value error, if it isn't a local link
+            book_relative_path = (page_path / match.group(2)).resolve().relative_to(book_root)
+        except ValueError:
+            return match.group(0)
+
+        if book_relative_path in file_list:
+            return "[{}]({}{}{})".format(
+                match.group(1),
+                path.splitext(match.group(2))[0],
+                new_suffix,
+                match.group(3),
+            )
+        else:
+            return match.group(0)
+
+    return LINK_PATTERN.sub(suffix_swap, content)
+
+
+def supports_html_only() -> None:
+    if len(sys.argv) > 2:
+        if (sys.argv[1], sys.argv[2]) == ("supports", "html"):
+            sys.exit(0)
+        else:
+            sys.exit(1)
 
 
 def chapters(items: List[Dict[str, Any]]) -> Generator[Dict[str, Any], None, None]:
diff --git a/util/mdbook_reggen.py b/util/mdbook_reggen.py
new file mode 100755
index 0000000..cbee3a2
--- /dev/null
+++ b/util/mdbook_reggen.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+"""mdbook preprocessor that generates interface and register tables for ip blocks.
+
+The preprocessor finds ip configs in SUMMARY.md and converts them into a html document
+with tables for hardware interfaces and registers.
+"""
+
+import json
+import sys
+import re
+import io
+from pathlib import Path
+
+from mdbook import utils as md_utils
+from reggen.ip_block import IpBlock
+import reggen.gen_cfg_html as gen_cfg_html
+import reggen.gen_html as gen_html
+
+REGREF_PATTERN = re.compile(r"\{\{#regref\s+?(.+?)\s*?\}\}")
+
+
+def main() -> None:
+    md_utils.supports_html_only()
+
+    # load both the context and the book from stdin
+    context, book = json.load(sys.stdin)
+    book_root = context["root"]
+
+    try:
+        ip_cfg_str = context["config"]["preprocessor"]["reggen"]["ip-cfg-py-regex"]
+        ip_cfg_pattern = re.compile(ip_cfg_str)
+    except KeyError:
+        sys.exit(
+            "No RegEx pattern given in book.toml to identify ip block configuration files.\n"
+            "Provide regex as preprocessor.reggen.ip-cfg-py-regex .",
+        )
+
+    name2path = {}
+    for chapter in md_utils.chapters(book["sections"]):
+        src_path = chapter["source_path"]
+        if not src_path or not ip_cfg_pattern.search(src_path):
+            continue
+
+        block = IpBlock.from_text(
+            chapter["content"],
+            [],
+            "file at {}/{}".format(context["root"], chapter["source_path"])
+        )
+        buffer = io.StringIO()
+        buffer.write("# Hardware Interfaces and Registers\n")
+        buffer.write("## Interfaces\n")
+        gen_cfg_html.gen_cfg_html(block, buffer)
+        buffer.write("\n## Registers\n")
+        gen_html.gen_html(block, buffer)
+        chapter["content"] = buffer.getvalue()
+
+        name2path[block.name] = src_path
+
+    cfg_files = set(Path(p) for p in name2path.values())
+    for chapter in md_utils.chapters(book["sections"]):
+        if not chapter["source_path"]:
+            continue
+        src_dir = Path(chapter["source_path"]).parent
+
+        chapter["content"] = md_utils.change_link_ext(
+            cfg_files,
+            chapter["content"],
+            ".html",
+            book_root,
+            src_dir,
+        )
+
+        def regref_swap(match: re.Match) -> str:
+            """Replaces regref with a link to the register."""
+            reg = match.group(1).split(".")
+            if len(reg) > 3 or len(reg) < 2:
+                sys.exit(
+                    f"{match.group(0)} is invalid. "
+                    "Should be in the form: 'ip_block.register.field', where 'field' is optional.",
+                )
+            try:
+                # Make the path to the config file absolute (to root of the site),
+                # so we don't have to worry about what page we are in.
+                # Also, do the hjson -> html conversion.
+                path = "/{}.html".format(
+                    name2path[reg[0]].removeprefix("./").removesuffix(".hjson")
+                )
+            except KeyError:
+                sys.exit(f"Ip block with name '{reg[0]}' could not be found.")
+
+            name = reg[1] + "." + reg[2] if len(reg) == 3 else reg[1]
+
+            return "[`{}`]({}#Reg_{})".format(name, path, reg[1].lower())
+
+        chapter["content"] = REGREF_PATTERN.sub(regref_swap, chapter["content"])
+
+    # dump the book into stdout
+    print(json.dumps(book))
+
+
+if __name__ == "__main__":
+    main()