[doc] Added doxygen preprocessor

Signed-off-by: Hugo McNally <hugo.mcnally@gmail.com>
Co-authored-by: Harry Callahan <hcallahan@lowrisc.org>
diff --git a/book.toml b/book.toml
index cf089eb..3e98ede 100644
--- a/book.toml
+++ b/book.toml
@@ -36,3 +36,9 @@
 command = "./util/mdbook_reggen.py"
 # Python RegEx identifying ip block config paths.
 ip-cfg-py-regex = '(ip|ip_autogen)/.+/data/.+\.hjson'
+
+[preprocessor.doxygen]
+command = "./util/mdbook_doxygen.py"
+out-dir = "docs/"
+html-out-dir = "doxy/"
+dif-src-py-regex = 'dif_\S*\.h'
diff --git a/util/difgen/__init__.py b/util/difgen/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/util/difgen/__init__.py
+++ /dev/null
diff --git a/util/difgen/gen_dif_listing.py b/util/mdbook/difgen.py
similarity index 89%
rename from util/difgen/gen_dif_listing.py
rename to util/mdbook/difgen.py
index efc993b..3ee18f3 100644
--- a/util/difgen/gen_dif_listing.py
+++ b/util/mdbook/difgen.py
@@ -20,31 +20,35 @@
         str(doxygen_xml_path.joinpath("index.xml")),
     ]
 
-    combined_xml_res = subprocess.run(xsltproc_args, check=True,
+    combined_xml_res = subprocess.run(
+        xsltproc_args, check=True,
         cwd=str(doxygen_xml_path), stdout=subprocess.PIPE,
-        universal_newlines=True)
+        universal_newlines=True,
+    )
     return ET.fromstring(combined_xml_res.stdout)
 
+
 # Get all information about individual DIF functions that are specified in one
 # DIF header. This returns only the Info from the XML that we require.
 def get_difref_info(combined_xml, dif_header):
     compound = _get_dif_file_compound(combined_xml, dif_header)
-    if compound == None:
+    if compound is None:
         return []
 
     file_id = _get_dif_file_id(compound)
     functions = _get_dif_function_info(compound, file_id)
     return functions
 
+
 # Create HTML List of DIFs, using the info from the combined xml
-def gen_listing_html(combined_xml, dif_header, dif_listings_html):
+def gen_listing_html(html_path: str, combined_xml, dif_header, dif_listings_html):
     compound = _get_dif_file_compound(combined_xml, dif_header)
-    if compound == None:
+    if compound is None:
         log.error("Doxygen output not found for {}".format(dif_header))
         return
 
     file_id = _get_dif_file_id(compound)
-    functions = _get_dif_function_info(compound, file_id)
+    functions = _get_dif_function_info(html_path, compound, file_id)
 
     if len(functions) == 0:
         log.error("No DIF functions found for {}".format(dif_header))
@@ -53,7 +57,9 @@
     # Generate DIF listing header
     dif_listings_html.write('<p>To use this DIF, include the following C header:</p>')
     dif_listings_html.write('<pre><code class=language-c data-lang=c>')
-    dif_listings_html.write('#include "<a href="/sw/apis/{}.html">{}</a>"'.format(file_id, dif_header))
+    dif_listings_html.write('#include "<a href="{}/{}.html">{}</a>"'.format(
+        html_path, file_id, dif_header,
+    ))
     dif_listings_html.write('</code></pre>\n')
 
     # Generate DIF function list.
@@ -68,6 +74,7 @@
         dif_listings_html.write('</li>\n')
     dif_listings_html.write('</ul>\n')
 
+
 # Generate HTML link for single function, using info returned from
 # get_difref_info
 def gen_difref_html(function_info, difref_html):
@@ -75,18 +82,21 @@
     difref_html.write('<code>{name}</code>'.format(**function_info))
     difref_html.write('</a>\n')
 
+
 def _get_dif_file_compound(combined_xml, dif_header):
     for c in combined_xml.findall('compounddef[@kind="file"]'):
         if c.find("location").attrib["file"] == dif_header:
             return c
     return None
 
+
 def _get_dif_file_id(compound):
     return compound.attrib["id"]
 
-def _get_dif_function_info(compound, file_id):
+
+def _get_dif_function_info(html_path: str, compound, file_id):
     funcs = compound.find('sectiondef[@kind="func"]')
-    if funcs == None:
+    if funcs is None:
         return []
 
     # Collect useful info on each function
@@ -105,7 +115,7 @@
         func_info["id"] = m.attrib["id"]
         func_info["file_id"] = file_id
         func_info["local_id"] = func_id
-        func_info["full_url"] = "/sw/apis/{}.html#{}".format(file_id, func_id)
+        func_info["full_url"] = "{}/{}.html#{}".format(html_path, file_id, func_id)
 
         func_info["name"] = _get_text_or_empty(m, "name")
         func_info["prototype"] = _get_text_or_empty(
diff --git a/util/mdbook_doxygen.py b/util/mdbook_doxygen.py
new file mode 100755
index 0000000..06bad74
--- /dev/null
+++ b/util/mdbook_doxygen.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+"""mdbook preprocessor that adds an overview to linked header files.
+
+This overview holds links to the generated doxygen api documentation
+as well as the actual file.
+"""
+import io
+import json
+import re
+import sys
+from pathlib import Path, PurePath
+
+from mdbook import difgen
+from mdbook import utils as md_utils
+
+SRCTREE_TOP = Path(__file__).parents[1].resolve()
+
+
+def main() -> None:
+    md_utils.supports_html_only()
+
+    # load both the context and the book from stdin
+    context, book = json.load(sys.stdin)
+    book_root = Path(context["root"])
+
+    try:
+        site_url = PurePath(context["config"]["output"]["html"]["site-url"])
+    except KeyError:
+        site_url = PurePath("/")
+
+    try:
+        preproc_cfg = context["config"]["preprocessor"]["doxygen"]
+        out_dir = SRCTREE_TOP / preproc_cfg["out-dir"]
+        html_out_dir = "/" + preproc_cfg["html-out-dir"]
+        dif_src_regex = re.compile(preproc_cfg["dif-src-py-regex"])
+    except KeyError:
+        sys.exit(
+            "mdbook_doxygen.py requires are set in the book.toml configuration.\n"
+            "\tpreprocessor.reggen.out-dir -- Doxygen's output directory.\n"
+            "\tpreprocessor.reggen.html-out-dir -- Doxygen's html out directory.\n"
+            "\tpreprocessor.reggen.dif-src-py-regex -- A regex for identifying dif files.\n"
+        )
+
+    combined_xml = difgen.get_combined_xml(out_dir / 'api-xml')
+
+    header_files = set()
+    for chapter in md_utils.chapters(book["sections"]):
+        src_path = chapter["source_path"]
+        if src_path is None or dif_src_regex.search(src_path) is None:
+            continue
+
+        file_name = Path(src_path).name
+
+        # First calculate the path to the generated dif header,
+        # relative to the root of the project. This is the form ingested
+        # by the difgen library scripts.
+        rel_dif_h = (book_root / src_path).relative_to(SRCTREE_TOP)
+
+        buffer = io.StringIO()
+        buffer.write(f"# {file_name}\n")
+        difgen.gen_listing_html(html_out_dir, combined_xml, str(rel_dif_h),
+                                buffer)
+        buffer.write(
+            "\n<details><summary>\nGenerated from <a href=\"{}\">{}</a></summary>\n"
+            .format(
+                site_url / src_path,
+                file_name,
+            ),
+        )
+        buffer.write("\n```c\n{}\n```\n".format(chapter["content"]))
+        buffer.write("</details>")
+        chapter["content"] = buffer.getvalue()
+
+        # Rewrite path so `dif_*.h` files don't collide with `dif_*.md` files.
+        if Path(chapter["path"]).suffix == ".h":
+            chapter["path"] = str(Path(chapter["path"]).with_suffix(""))
+        chapter["path"] = chapter["path"] + "_h.html"
+
+        header_files.add(Path(src_path))
+
+    for chapter in md_utils.chapters(book["sections"]):
+        if chapter["source_path"] is None:
+            continue
+        page_path = Path(chapter["source_path"]).parent
+
+        chapter["content"] = md_utils.change_link_ext(
+            header_files,
+            chapter["content"],
+            "_h.html",
+            book_root,
+            page_path,
+        )
+
+    # dump the book into stdout
+    print(json.dumps(book))
+
+
+if __name__ == "__main__":
+    main()