# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
"""
Provides lowRISC extension support for rendering Markdown to html.
{{% }} directives
!!Reg !!Reg.Field to generate cross reference to registers
Syntax highlighting with pygments
Conversion of WaveJSON timing diagrams
Adapted from examples in mistletoe.contrib
<https://github.com/miyuchina/mistletoe/blob/master/contrib/>
"""

import io
import logging as log
import os.path as path
import re
import subprocess
import sys
from itertools import chain
from os import walk
from pathlib import Path
from urllib.parse import urlparse, urlunparse

import hjson
import mistletoe.block_token
import mistletoe.span_token
from mistletoe import HTMLRenderer
from mistletoe.block_token import BlockToken, CodeFence, add_token, tokenize
from mistletoe.span_token import EscapeSequence, RawText, SpanToken
from pkg_resources import resource_filename
from pygments import highlight
from pygments.formatters.html import HtmlFormatter
from pygments.lexers import get_lexer_by_name as get_lexer
from pygments.lexers import guess_lexer
from pygments.styles import get_style_by_name as get_style

import reggen.gen_cfg_html as gen_cfg_html
import reggen.gen_html as gen_html
import reggen.validate as validate
from docgen import html_data, mathjax
from docgen.hjson_lexer import HjsonLexer
from wavegen import wavesvg


# mirrors Document but adds includes
# have to pull all the sub-files in to the main text so cross-links work
# By default anchor links only resolve within a single file
# arguably this is correct isolation but we want to be able to include anchors
class Document(BlockToken):
    """
    Document token with includes.
    """

    # Called when the include directive starts with a !
    # to indicate execute the first word as a command with rest as opts
    # To help avoid mistakes (and mimimally help avoid attacks in the case
    # of a trusted docgen given untrusted input files) the command must
    # live inside the repo (the example uses a local ls script to
    # run a command from outside, but the script was reviewed and checkedin)
    def exec_include(self, include_text, basedir):
        expand = include_text.split(maxsplit=1)
        cmd = expand[0]
        opts = '' if len(expand) < 2 else expand[1]
        abscmd = path.abspath(path.join(basedir, cmd))
        if not abscmd.startswith(self.treetop):
            log.error("Blocked include: " + cmd + ' (' + abscmd +
                      ") is outside the repo.")
            raise NameError('Command file must be in the repo')
        # do the cd in the subprocess to avoid save/restore of cwd
        res = subprocess.run(
            'cd ' + basedir + '; ' + abscmd + ' ' + opts,
            shell=True,
            universal_newlines=True,
            stdout=subprocess.PIPE).stdout
        return res.splitlines(keepends=True)

    def add_include(self, l, pat, basedir):
        lines = []
        for line in l:
            match = pat.search(line)
            # because this is pre-processed a sepcial case is needed to
            # allow documentation with include command inside back-ticks
            if (match and not (match.start() > 0 and
                               line[match.start() - 1] == '`')):
                lines.append(line[:match.start()] + line[match.end():])
                if match.group(1)[0] == "!":
                    try:
                        res = self.exec_include(match.group(1)[1:], basedir)
                        lines.extend(self.add_include(res, pat, basedir))
                    except NameError:
                        lines.append("Blocked execution of " + match.group(1))
                else:
                    incfname = path.join(basedir, match.group(1))
                    try:
                        incfile = open(incfname, 'r', encoding='UTF-8')
                        with incfile:
                            newdir = path.dirname(incfname)
                            lines.extend(
                                self.add_include(incfile, pat, newdir))
                    except OSError as err:
                        log.error("Could not open include file: " + str(err))
                        lines.append("Failed to include " + incfname + "\n\n")
            else:
                lines.append(line)
        return lines

    def __init__(self, lines, srcfile):
        docdir = path.dirname(resource_filename('docgen', 'md_html.css'))
        self.treetop = path.abspath(path.join(docdir, "../.."))
        pat = re.compile(r"\{\{\% *include +(.+?) *\}\}")
        basedir = ""
        if len(srcfile) > 0:
            basedir = path.dirname(srcfile)
        if basedir == '':
            basedir = '.'
        if isinstance(lines, str):
            lines = lines.splitlines(keepends=True)

        lines = self.add_include(lines, pat, basedir)
        self.footnotes = {}
        mistletoe.block_token._root_node = self
        mistletoe.span_token._root_node = self
        self.children = tokenize(lines)
        mistletoe.span_token._root_node = None
        mistletoe.block_token._root_node = None


# mirrors the CodeFence in mistletoe but with additional parameter
# note this maintains the bug with `~` matching the RE
class CodeFenceDirective(CodeFence):
    """
    Code fence with language and directive

    Supports code blocks starting
    ```language {directive}
    Up to 3 spaces indentation, minimum of 3 fence characters,
    optional spaces, language text, optional spaces, open {,
    optional spaces, directive text, optional spaces, close }
    at the moment there cannot be spaces inside language or directive
    """
    # future may want something like \{ *([^\}]*\} for multiple directives
    pattern = re.compile(r'( {0,3})((?:`|~){3,}) *(\S+) *\{ *(\S*) *\}')
    _open_info = None

    def __init__(self, match):
        lines, open_info = match
        self.language = EscapeSequence.strip(open_info[2])
        self.directive = EscapeSequence.strip(open_info[3])
        self.children = (RawText(''.join(lines)), )

    @classmethod
    def start(cls, line):
        match_obj = cls.pattern.match(line)
        if not match_obj:
            return False
        prepend, leader, lang, direct = match_obj.groups()
        if (leader[0] in lang or leader[0] in direct or
                leader[0] in line[match_obj.end():]):
            return False
        cls._open_info = len(prepend), leader, lang, direct
        return True


class LowriscEscape(SpanToken):
    pattern = re.compile(r"\{\{\% *(.+?) +(.+?) *\}\}")

    def __init__(self, match):
        self.type = match.group(1)
        self.text = match.group(2)


class RegRef(SpanToken):
    pattern = re.compile(r"!!([A-Za-z0-9_.]+)")

    def __init__(self, match):
        self.rname = match.group(1)


class LowriscRenderer(mathjax.MathJaxRenderer):
    formatter = HtmlFormatter()
    formatter.noclasses = True

    def __init__(self, *extras, style='default', srcfile='', wavejs=False):
        # yapf requests different formatting for this code block depending on
        # the Python3 version. Work around that by disabling yapf for this code
        # block.
        # Bug: https://github.com/google/yapf/issues/696
        # yapf: disable
        super().__init__(*chain((LowriscEscape, RegRef,
                                 CodeFenceDirective), extras))
        # yapf: enable
        self.formatter.style = get_style(style)
        self.regs = None
        self.wavejs = wavejs
        self.num_svg = 0
        # compute base of srcfile to allow relative imports
        basedir = ""
        if len(srcfile) > 0:
            basedir = path.dirname(srcfile)
        self.basedir = basedir
        self.toc = []

    # Convert the inner text of header or section into id for html href
    # inner is a flat string but may have html tags
    # html id rules are:
    #    Must contain at least one character
    #    Must not contain any space characters
    # Want to match github, can't find its exact rules
    # The id is derived from the heading text by stripping html tags,
    # changing whitespace to - and lower-casing.
    # e.g. 'Theory of operation' becomes 'theory-of-operation
    # TODO worry about & eg 'Foo & Bar' becomes 'foo-&-bar'
    def id_from_inner(self, inner):
        return re.sub(r'\s+', '-', re.sub(r'<.+?>', '', inner)).lower()

    def render_lowrisc_code(self, token, directive):
        code = token.children[0].content
        # parser seems to get confused (eg by `~`) and makes empty calls
        if len(code) == 0:
            log.warn('Unexpected empty code block. Check for `~`')
            return ""
        # waveforms look like embedded code in the markdown
        # but the WaveDrom javascript wants it in a script tag
        if token.language == "wavejson":
            if self.wavejs:
                return '<script type="WaveDrom">' + code + '</script>'
            else:
                try:
                    wvobj = hjson.loads(code, use_decimal=True)
                except ValueError as err:
                    log.warn('wavejson parse failed at line ' +
                             str(err.lineno) + ': ' + err.msg)
                    return '<pre>Error line '  + str(err.lineno) + \
                        ': ' + err.msg + " in:\n" + code[:err.pos] + \
                        '</pre><pre style="color:red">' + \
                        code[err.pos:] + '</pre>'
                self.num_svg += 1
                return wavesvg.convert(wvobj, self.num_svg - 1)
        else:
            # pygments.util.ClassNotFound subclass of ValueError
            lexer = None
            if (token.language):
                if token.language == 'hjson':
                    lexer = HjsonLexer()
                else:
                    try:
                        lexer = get_lexer(token.language)
                    except ValueError:
                        log.info('Failed to get lexer for language=' +
                                 token.language)
                        lexer = None
            if lexer == None:
                try:
                    lexer = guess_lexer(code)
                    log.info('Guess lexer as ' + lexer.name)
                except ValueError:
                    log.info('Failed to guess lexer for code=' + code)
                    lexer = None
            if lexer:
                if directive == '.good':
                    self.formatter.cssstyles='background:#e0ffe0; ' \
                        'border-left-color: #108040;'
                elif directive == '.bad':
                    self.formatter.cssstyles='background:#ffe0e0; ' \
                        'border-left-color: #c04030'
                else:
                    self.formatter.cssstyles = ''

                return highlight(code, lexer, self.formatter)
            else:
                return super().render_block_code(token)

    def render_code_fence_directive(self, token):
        return self.render_lowrisc_code(token, token.directive)

    def render_block_code(self, token):
        return self.render_lowrisc_code(token, '')

    def render_lowrisc_escape(self, token):
        # plan eventually to allow lowrisc-doc-hdr=doctype
        if token.type[:15] == "lowrisc-doc-hdr":
            return html_data.lowrisc_title_head + token.text + \
                   html_data.lowrisc_title_tail
        if token.type == "toc":
            return html_data.toc_mark_head + token.text + \
                   html_data.toc_mark_tail
        if token.type == "regfile":
            regfile = open(
                path.join(self.basedir, token.text), 'r', encoding='UTF-8')
            with regfile:
                try:
                    obj = hjson.load(
                        regfile,
                        use_decimal=True,
                        object_pairs_hook=validate.checking_dict)
                except ValueError:
                    raise SystemExit(sys.exc_info()[1])
            if validate.validate(obj) == 0:
                log.info("Generated register object\n")
                self.regs = obj
            else:
                log.warn("Register import failed\n")
                self.regs = None
            return ""
        if token.type == "registers":
            if self.regs == None:
                return "<B>Errors parsing registers prevents insertion.</B>"
            outbuf = io.StringIO()
            # note for CSS need to escape the mdown class on the div
            outbuf.write("</div>" + html_data.register_header)
            gen_html.gen_html(self.regs, outbuf, toclist=self.toc, toclevel=3)
            outbuf.write(html_data.register_trailer + '<div class="mdown">')
            generated = outbuf.getvalue()
            outbuf.close()
            return generated
        if token.type == "cfgfile":
            log.error("Deprecated lowRISC token cfgfile ignored. Config is now"\
                      " in a single file with the registers!")
            return ""
        if token.type == "hwcfg":
            if self.regs == None:
                return "<B>Errors parsing configuration prevents insertion.</B>"
            outbuf = io.StringIO()
            # note for CSS need to escape the mdown class on the div
            outbuf.write("</div>" + html_data.hwcfg_header)
            gen_cfg_html.gen_cfg_html(self.regs, outbuf)
            outbuf.write(html_data.hwcfg_trailer + '<div class="mdown">')
            generated = outbuf.getvalue()
            outbuf.close()
            return generated
        if token.type == "section1":
            # TODO should token.text get parsed to allow markdown in it?
            id = self.id_from_inner(token.text)
            self.toc.append((2, token.text, id))
            return html_data.section_template.format(
                cls="section_heading", id=id, inner=token.text)
        if token.type == "section2":
            # TODO should token.text get parsed to allow markdown in it?
            id = self.id_from_inner(token.text)
            self.toc.append((3, token.text, id))
            return html_data.section_template.format(
                cls="subsection_heading", id=id, inner=token.text)
        if token.type == "doctree":
            md_paths = []
            return_string = ''
            subdirs = [path.join(self.basedir, s) for s in token.text.split()]
            for subdir in sorted(subdirs):
                md_paths.extend(sorted(Path(subdir).rglob('*.md')))
            for md_path in md_paths:
                rel_md_path = md_path.relative_to(self.basedir)
                return_string += html_data.doctree_template.format(
                    link=rel_md_path.with_suffix('.html'),
                    text=rel_md_path.with_suffix(''))
            return html_data.doctree_head + return_string + html_data.doctree_tail

        bad_tag = '{{% ' + token.type + ' ' + token.text + ' }}'
        log.warn("Unknown lowRISC tag " + bad_tag)
        return bad_tag

    def render_reg_ref(self, token):
        if self.regs == None:
            log.warn("!!" + token.rname + ": no register import was done.")
            return '!!' + token.rname
        cname = self.regs['name']
        base = token.rname.partition('.')[0].lower()
        if not base in self.regs['genrnames']:
            log.warn("!!" + token.rname + " not found in register list.")
            return '!!' + token.rname

        if token.rname[-1] == ".":
            return '<a href="#Reg_' + base + '"><code class=\"reg\">' + \
                cname + "." + token.rname[:-1] + '</code></a>.'
        else:
            return '<a href="#Reg_' + base + '"><code class=\"reg\">' + \
                cname + "." + token.rname + '</code></a>'

    # copied from mistletoe/html_renderer.py and id added
    # override heading to insert reference for anchor
    def render_heading(self, token):
        template = '<h{level} id="{id}">{inner}</h{level}>'
        inner = self.render_inner(token)
        id = self.id_from_inner(inner)
        self.toc.append((token.level, inner, id))
        return template.format(level=token.level, inner=inner, id=id)

    # decorator for link rendering functions in class HTMLRenderer
    # converts relative .md link targets to .html link targets
    def _convert_local_links(func):
        def _wrapper_convert_local_links(*args, **kwargs):
            target_url = urlparse(args[1].target)
            target_path = Path(target_url.path)
            # check link is not absolute
            if not target_url.netloc and target_path.suffix in ['.md', '.mkd']:
                target_url = target_url._replace(
                    path=str(target_path.with_suffix('.html')))
                args[1].target = urlunparse(target_url)

            return func(*args, **kwargs)

        return _wrapper_convert_local_links

    # apply to the link rendering functions inherited from HTMLRenderer
    render_link = _convert_local_links(HTMLRenderer.render_link)
    render_auto_link = _convert_local_links(HTMLRenderer.render_auto_link)
