[doc] Created readme2index pre-processor We use the built-in `index` preprocessor to rename our `README.md` files to `index.md`, but it doesn't currently fixup links: <https://github.com/rust-lang/mdBook/issues/984>. This is a temporary preprocessor to regex replace those links until upstream gets fixed. Signed-off-by: Hugo McNally <hugo.mcnally@gmail.com>

commit: 8c02d9df75e454b94ae2ebbe7dd7eab95665adf6 [log] [tgz]
author: Hugo McNally <hugo.mcnally@gmail.com> Fri Feb 10 08:37:21 2023 +0000
committer: Greg Chadwick <mail@gregchadwick.co.uk> Tue Mar 07 11:41:43 2023 +0000
tree: 05ec50869d966b5c664b179fa82b88a8b7fd48af
parent: 934f088e2efac7738745998097a877b3327e7517 [diff]
diff --git a/book.toml b/book.toml
index 47d0292..1a1eaf2 100644
--- a/book.toml
+++ b/book.toml

@@ -15,3 +15,6 @@
 git-repository-url = "https://github.com/lowrisc/opentitan"
 edit-url-template = "https://github.com/lowrisc/opentitan/edit/master/{path}"
 curly-quotes = true
+
+[preprocessor.readme2index]
+command = "./util/mdbook_readme2index.py"

diff --git a/util/mdbook/__init__.py b/util/mdbook/__init__.py
new file mode 100644
index 0000000..fee181e
--- /dev/null
+++ b/util/mdbook/__init__.py

@@ -0,0 +1,4 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+"""Common module for mdbook pre-processors."""

diff --git a/util/mdbook/utils.py b/util/mdbook/utils.py
new file mode 100644
index 0000000..cf0c320
--- /dev/null
+++ b/util/mdbook/utils.py

@@ -0,0 +1,18 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+"""Common utilities used by mdbook pre-processors."""
+
+from typing import List, Any, Dict, Generator
+
+
+def chapters(items: List[Dict[str, Any]]) -> Generator[Dict[str, Any], None, None]:
+    """Recursively yields all chapters"""
+    for chapter in (item.get("Chapter") for item in items):
+        if not chapter:
+            continue
+
+        for sub_chapter in chapters(chapter["sub_items"]):
+            yield sub_chapter
+
+        yield chapter

diff --git a/util/mdbook_readme2index.py b/util/mdbook_readme2index.py
new file mode 100755
index 0000000..d436318
--- /dev/null
+++ b/util/mdbook_readme2index.py

@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+"""mdbook preprocessor that converts README.md to index.md
+
+We use the built-in `index` preprocessor to rename our `README.md` files to `index.md`,
+but it doesn't currently fixup links: https://github.com/rust-lang/mdBook/issues/984.
+This is a temporary preprocessor to regex replace those links until upstream gets fixed.
+"""
+
+import json
+import sys
+import re
+
+import mdbook.utils as md_utils
+
+# Finds all markdown links, `[...](...)`,
+# which link to a file called readme.md
+# To check it isn't a link no colon, `:`, is allowed before the readme.md .
+# `#` and '?' also aren't allowed before the readme.md,
+# in case `readme.md` is not the filename but in fact a fragment or selector.
+# It matches the link content before and after the readme into groups
+# so that it can be substituted back into the file.
+RM2IDX_PATTERN_INLINE = re.compile(
+    r"(\[[^\]]*\]\([^\)|#|:|\?]*)readme(\.md[^\)]*\))",
+    re.IGNORECASE,
+)
+
+# Similar to the pattern above but for `[...]: ...` style links.
+RM2IDX_PATTERN_NOT_INLINE = re.compile(
+    r"^(\[[^\]]*\]:\s+[^\n|#|:|\?]*)readme(\.md[^\n]*$)",
+    re.IGNORECASE | re.MULTILINE,
+)
+
+
+def main() -> None:
+    if len(sys.argv) > 2:
+        if (sys.argv[1], sys.argv[2]) == ("supports", "html"):
+            sys.exit(0)
+        else:
+            sys.exit(1)
+
+    # load both the context and the book from stdin
+    context, book = json.load(sys.stdin)
+
+    for chapter in md_utils.chapters(book["sections"]):
+        chapter["content"] = RM2IDX_PATTERN_INLINE.sub(r"\1index\2", chapter["content"])
+        chapter["content"] = RM2IDX_PATTERN_NOT_INLINE.sub(r"\1index\2", chapter["content"])
+
+    # dump the book into stdout
+    print(json.dumps(book))
+
+
+if __name__ == "__main__":
+    main()
commit	8c02d9df75e454b94ae2ebbe7dd7eab95665adf6	[log] [tgz]
author	Hugo McNally <hugo.mcnally@gmail.com>	Fri Feb 10 08:37:21 2023 +0000
committer	Greg Chadwick <mail@gregchadwick.co.uk>	Tue Mar 07 11:41:43 2023 +0000
tree	05ec50869d966b5c664b179fa82b88a8b7fd48af
parent	934f088e2efac7738745998097a877b3327e7517 [diff]