blob: 7fc410b035ba58050a18b1d634f488a4444fa458 [file] [log] [blame]
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
"""
Provides lowRISC extension support for rendering Markdown to html.
{{% }} directives
!!Reg !!Reg.Field to generate cross reference to registers
Syntax highlighting with pygments
Conversion of WaveJSON timing diagrams
Adapted from examples in mistletoe.contrib
<https://github.com/miyuchina/mistletoe/blob/master/contrib/>
"""
import io
import logging as log
import os.path as path
import re
import subprocess
import sys
from itertools import chain
from os import walk
from pathlib import Path
from urllib.parse import urlparse, urlunparse
import hjson
import mistletoe.block_token
import mistletoe.span_token
from mistletoe import HTMLRenderer
from mistletoe.block_token import BlockToken, CodeFence, add_token, tokenize
from mistletoe.span_token import EscapeSequence, RawText, SpanToken
from pkg_resources import resource_filename
from pygments import highlight
from pygments.formatters.html import HtmlFormatter
from pygments.lexers import get_lexer_by_name as get_lexer
from pygments.lexers import guess_lexer
from pygments.styles import get_style_by_name as get_style
import reggen.gen_cfg_html as gen_cfg_html
import reggen.gen_html as gen_html
import reggen.validate as validate
from docgen import html_data, mathjax
from docgen.hjson_lexer import HjsonLexer
from wavegen import wavesvg
# mirrors Document but adds includes
# have to pull all the sub-files in to the main text so cross-links work
# By default anchor links only resolve within a single file
# arguably this is correct isolation but we want to be able to include anchors
class Document(BlockToken):
"""
Document token with includes.
"""
# Called when the include directive starts with a !
# to indicate execute the first word as a command with rest as opts
# To help avoid mistakes (and mimimally help avoid attacks in the case
# of a trusted docgen given untrusted input files) the command must
# live inside the repo (the example uses a local ls script to
# run a command from outside, but the script was reviewed and checkedin)
def exec_include(self, include_text, basedir):
expand = include_text.split(maxsplit=1)
cmd = expand[0]
opts = '' if len(expand) < 2 else expand[1]
abscmd = path.abspath(path.join(basedir, cmd))
if not abscmd.startswith(self.treetop):
log.error("Blocked include: " + cmd + ' (' + abscmd +
") is outside the repo.")
raise NameError('Command file must be in the repo')
# do the cd in the subprocess to avoid save/restore of cwd
res = subprocess.run(
'cd ' + basedir + '; ' + abscmd + ' ' + opts,
shell=True,
universal_newlines=True,
stdout=subprocess.PIPE).stdout
return res.splitlines(keepends=True)
def add_include(self, l, pat, basedir):
lines = []
for line in l:
match = pat.search(line)
# because this is pre-processed a sepcial case is needed to
# allow documentation with include command inside back-ticks
if (match and not (match.start() > 0 and
line[match.start() - 1] == '`')):
lines.append(line[:match.start()] + line[match.end():])
if match.group(1)[0] == "!":
try:
res = self.exec_include(match.group(1)[1:], basedir)
lines.extend(self.add_include(res, pat, basedir))
except NameError:
lines.append("Blocked execution of " + match.group(1))
else:
incfname = path.join(basedir, match.group(1))
try:
incfile = open(incfname, 'r', encoding='UTF-8')
with incfile:
newdir = path.dirname(incfname)
lines.extend(
self.add_include(incfile, pat, newdir))
except OSError as err:
log.error("Could not open include file: " + str(err))
lines.append("Failed to include " + incfname + "\n\n")
else:
lines.append(line)
return lines
def __init__(self, lines, srcfile):
docdir = path.dirname(resource_filename('docgen', 'md_html.css'))
self.treetop = path.abspath(path.join(docdir, "../.."))
pat = re.compile(r"\{\{\% *include +(.+?) *\}\}")
basedir = ""
if len(srcfile) > 0:
basedir = path.dirname(srcfile)
if basedir == '':
basedir = '.'
if isinstance(lines, str):
lines = lines.splitlines(keepends=True)
lines = self.add_include(lines, pat, basedir)
self.footnotes = {}
mistletoe.block_token._root_node = self
mistletoe.span_token._root_node = self
self.children = tokenize(lines)
mistletoe.span_token._root_node = None
mistletoe.block_token._root_node = None
# mirrors the CodeFence in mistletoe but with additional parameter
# note this maintains the bug with `~` matching the RE
class CodeFenceDirective(CodeFence):
"""
Code fence with language and directive
Supports code blocks starting
```language {directive}
Up to 3 spaces indentation, minimum of 3 fence characters,
optional spaces, language text, optional spaces, open {,
optional spaces, directive text, optional spaces, close }
at the moment there cannot be spaces inside language or directive
"""
# future may want something like \{ *([^\}]*\} for multiple directives
pattern = re.compile(r'( {0,3})((?:`|~){3,}) *(\S+) *\{ *(\S*) *\}')
_open_info = None
def __init__(self, match):
lines, open_info = match
self.language = EscapeSequence.strip(open_info[2])
self.directive = EscapeSequence.strip(open_info[3])
self.children = (RawText(''.join(lines)), )
@classmethod
def start(cls, line):
match_obj = cls.pattern.match(line)
if not match_obj:
return False
prepend, leader, lang, direct = match_obj.groups()
if (leader[0] in lang or leader[0] in direct or
leader[0] in line[match_obj.end():]):
return False
cls._open_info = len(prepend), leader, lang, direct
return True
class LowriscEscape(SpanToken):
pattern = re.compile(r"\{\{\% *(.+?) +(.+?) *\}\}")
def __init__(self, match):
self.type = match.group(1)
self.text = match.group(2)
class RegRef(SpanToken):
pattern = re.compile(r"!!([A-Za-z0-9_.]+)")
def __init__(self, match):
self.rname = match.group(1)
class LowriscRenderer(mathjax.MathJaxRenderer):
formatter = HtmlFormatter()
formatter.noclasses = True
def __init__(self, *extras, style='default', srcfile='', wavejs=False):
# yapf requests different formatting for this code block depending on
# the Python3 version. Work around that by disabling yapf for this code
# block.
# Bug: https://github.com/google/yapf/issues/696
# yapf: disable
super().__init__(*chain((LowriscEscape, RegRef,
CodeFenceDirective), extras))
# yapf: enable
self.formatter.style = get_style(style)
self.regs = None
self.wavejs = wavejs
self.num_svg = 0
# compute base of srcfile to allow relative imports
basedir = ""
if len(srcfile) > 0:
basedir = path.dirname(srcfile)
self.basedir = basedir
self.toc = []
# Convert the inner text of header or section into id for html href
# inner is a flat string but may have html tags
# html id rules are:
# Must contain at least one character
# Must not contain any space characters
# Want to match github, can't find its exact rules
# The id is derived from the heading text by stripping html tags,
# changing whitespace to - and lower-casing.
# e.g. 'Theory of operation' becomes 'theory-of-operation
# TODO worry about & eg 'Foo & Bar' becomes 'foo-&-bar'
def id_from_inner(self, inner):
return re.sub(r'\s+', '-', re.sub(r'<.+?>', '', inner)).lower()
def render_lowrisc_code(self, token, directive):
code = token.children[0].content
# parser seems to get confused (eg by `~`) and makes empty calls
if len(code) == 0:
log.warn('Unexpected empty code block. Check for `~`')
return ""
# waveforms look like embedded code in the markdown
# but the WaveDrom javascript wants it in a script tag
if token.language == "wavejson":
if self.wavejs:
return '<script type="WaveDrom">' + code + '</script>'
else:
try:
wvobj = hjson.loads(code, use_decimal=True)
except ValueError as err:
log.warn('wavejson parse failed at line ' +
str(err.lineno) + ': ' + err.msg)
return '<pre>Error line ' + str(err.lineno) + \
': ' + err.msg + " in:\n" + code[:err.pos] + \
'</pre><pre style="color:red">' + \
code[err.pos:] + '</pre>'
self.num_svg += 1
return wavesvg.convert(wvobj, self.num_svg - 1)
else:
# pygments.util.ClassNotFound subclass of ValueError
lexer = None
if (token.language):
if token.language == 'hjson':
lexer = HjsonLexer()
else:
try:
lexer = get_lexer(token.language)
except ValueError:
log.info('Failed to get lexer for language=' +
token.language)
lexer = None
if lexer == None:
try:
lexer = guess_lexer(code)
log.info('Guess lexer as ' + lexer.name)
except ValueError:
log.info('Failed to guess lexer for code=' + code)
lexer = None
if lexer:
if directive == '.good':
self.formatter.cssstyles='background:#e0ffe0; ' \
'border-left-color: #108040;'
elif directive == '.bad':
self.formatter.cssstyles='background:#ffe0e0; ' \
'border-left-color: #c04030'
else:
self.formatter.cssstyles = ''
return highlight(code, lexer, self.formatter)
else:
return super().render_block_code(token)
def render_code_fence_directive(self, token):
return self.render_lowrisc_code(token, token.directive)
def render_block_code(self, token):
return self.render_lowrisc_code(token, '')
def render_lowrisc_escape(self, token):
# plan eventually to allow lowrisc-doc-hdr=doctype
if token.type[:15] == "lowrisc-doc-hdr":
return html_data.lowrisc_title_head + token.text + \
html_data.lowrisc_title_tail
if token.type == "toc":
return html_data.toc_mark_head + token.text + \
html_data.toc_mark_tail
if token.type == "regfile":
regfile = open(
path.join(self.basedir, token.text), 'r', encoding='UTF-8')
with regfile:
try:
obj = hjson.load(
regfile,
use_decimal=True,
object_pairs_hook=validate.checking_dict)
except ValueError:
raise SystemExit(sys.exc_info()[1])
if validate.validate(obj) == 0:
log.info("Generated register object\n")
self.regs = obj
else:
log.warn("Register import failed\n")
self.regs = None
return ""
if token.type == "registers":
if self.regs == None:
return "<B>Errors parsing registers prevents insertion.</B>"
outbuf = io.StringIO()
# note for CSS need to escape the mdown class on the div
outbuf.write("</div>" + html_data.register_header)
gen_html.gen_html(self.regs, outbuf, toclist=self.toc, toclevel=3)
outbuf.write(html_data.register_trailer + '<div class="mdown">')
generated = outbuf.getvalue()
outbuf.close()
return generated
if token.type == "cfgfile":
log.error("Deprecated lowRISC token cfgfile ignored. Config is now"\
" in a single file with the registers!")
return ""
if token.type == "hwcfg":
if self.regs == None:
return "<B>Errors parsing configuration prevents insertion.</B>"
outbuf = io.StringIO()
# note for CSS need to escape the mdown class on the div
outbuf.write("</div>" + html_data.hwcfg_header)
gen_cfg_html.gen_cfg_html(self.regs, outbuf)
outbuf.write(html_data.hwcfg_trailer + '<div class="mdown">')
generated = outbuf.getvalue()
outbuf.close()
return generated
if token.type == "section1":
# TODO should token.text get parsed to allow markdown in it?
id = self.id_from_inner(token.text)
self.toc.append((2, token.text, id))
return html_data.section_template.format(
cls="section_heading", id=id, inner=token.text)
if token.type == "section2":
# TODO should token.text get parsed to allow markdown in it?
id = self.id_from_inner(token.text)
self.toc.append((3, token.text, id))
return html_data.section_template.format(
cls="subsection_heading", id=id, inner=token.text)
if token.type == "doctree":
md_paths = []
return_string = ''
subdirs = [path.join(self.basedir, s) for s in token.text.split()]
for subdir in sorted(subdirs):
md_paths.extend(sorted(Path(subdir).rglob('*.md')))
for md_path in md_paths:
rel_md_path = md_path.relative_to(self.basedir)
return_string += html_data.doctree_template.format(
link=rel_md_path.with_suffix('.html'),
text=rel_md_path.with_suffix(''))
return html_data.doctree_head + return_string + html_data.doctree_tail
bad_tag = '{{% ' + token.type + ' ' + token.text + ' }}'
log.warn("Unknown lowRISC tag " + bad_tag)
return bad_tag
def render_reg_ref(self, token):
if self.regs == None:
log.warn("!!" + token.rname + ": no register import was done.")
return '!!' + token.rname
cname = self.regs['name']
base = token.rname.partition('.')[0].lower()
if not base in self.regs['genrnames']:
log.warn("!!" + token.rname + " not found in register list.")
return '!!' + token.rname
if token.rname[-1] == ".":
return '<a href="#Reg_' + base + '"><code class=\"reg\">' + \
cname + "." + token.rname[:-1] + '</code></a>.'
else:
return '<a href="#Reg_' + base + '"><code class=\"reg\">' + \
cname + "." + token.rname + '</code></a>'
# copied from mistletoe/html_renderer.py and id added
# override heading to insert reference for anchor
def render_heading(self, token):
template = '<h{level} id="{id}">{inner}</h{level}>'
inner = self.render_inner(token)
id = self.id_from_inner(inner)
self.toc.append((token.level, inner, id))
return template.format(level=token.level, inner=inner, id=id)
# decorator for link rendering functions in class HTMLRenderer
# converts relative .md link targets to .html link targets
def _convert_local_links(func):
def _wrapper_convert_local_links(*args, **kwargs):
target_url = urlparse(args[1].target)
target_path = Path(target_url.path)
# check link is not absolute
if not target_url.netloc and target_path.suffix in ['.md', '.mkd']:
target_url = target_url._replace(
path=str(target_path.with_suffix('.html')))
args[1].target = urlunparse(target_url)
return func(*args, **kwargs)
return _wrapper_convert_local_links
# apply to the link rendering functions inherited from HTMLRenderer
render_link = _convert_local_links(HTMLRenderer.render_link)
render_auto_link = _convert_local_links(HTMLRenderer.render_auto_link)