epicyon/markdown.py

__filename__ = "markdown.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.3.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Web Interface"


def _markdown_emphasis_html(markdown: str) -> str:
    """Add italics and bold html markup to the given markdown
    """
    replacements = {
        ' **': ' <b>',
        '** ': '</b> ',
        '**.': '</b>.',
        '**:': '</b>:',
        '**;': '</b>;',
        '**,': '</b>,',
        '**\n': '</b>\n',
        ' *': ' <i>',
        '* ': '</i> ',
        '*.': '</i>.',
        '*:': '</i>:',
        '*;': '</i>;',
        '*,': '</i>,',
        '*\n': '</i>\n',
        ' _': ' <ul>',
        '_ ': '</ul> ',
        '_.': '</ul>.',
        '_:': '</ul>:',
        '_;': '</ul>;',
        '_,': '</ul>,',
        '_\n': '</ul>\n',
        ' `': ' <em>',
        '`.': '</em>.',
        '`:': '</em>:',
        '`;': '</em>;',
        '`\n': '</em>\n',
        '` ': '</em> '
    }
    for md_str, html in replacements.items():
        markdown = markdown.replace(md_str, html)

    if markdown.startswith('**'):
        markdown = markdown[2:] + '<b>'
    elif markdown.startswith('*'):
        markdown = markdown[1:] + '<i>'
    elif markdown.startswith('_'):
        markdown = markdown[1:] + '<ul>'

    if markdown.endswith('**'):
        markdown = markdown[:len(markdown) - 2] + '</b>'
    elif markdown.endswith('*'):
        markdown = markdown[:len(markdown) - 1] + '</i>'
    elif markdown.endswith('_'):
        markdown = markdown[:len(markdown) - 1] + '</ul>'
    return markdown


def _markdown_replace_quotes(markdown: str) -> str:
    """Replaces > quotes with html blockquote
    """
    if '> ' not in markdown:
        return markdown
    lines = markdown.split('\n')
    result = ''
    prev_quote_line = None
    code_section = False
    for line in lines:
        # avoid code sections
        if not code_section:
            if '<code>' in line:
                code_section = True
        else:
            if '</code>' in line:
                code_section = False
        if code_section:
            result += line + '\n'
            continue

        if '> ' not in line:
            result += line + '\n'
            prev_quote_line = None
            continue
        line_str = line.strip()
        if not line_str.startswith('> '):
            result += line + '\n'
            prev_quote_line = None
            continue
        line_str = line_str.replace('> ', '', 1).strip()
        if prev_quote_line:
            new_prev_line = prev_quote_line.replace('</i></blockquote>\n', '')
            result = result.replace(prev_quote_line, new_prev_line) + ' '
            line_str += '</i></blockquote>\n'
        else:
            line_str = '<blockquote><i>' + line_str + '</i></blockquote>\n'
        result += line_str
        prev_quote_line = line_str

    if '</blockquote>\n' in result:
        result = result.replace('</blockquote>\n', '</blockquote>')

    if result.endswith('\n') and \
       not markdown.endswith('\n'):
        result = result[:len(result) - 1]
    return result


def _markdown_replace_links(markdown: str, images: bool = False) -> str:
    """Replaces markdown links with html
    Optionally replace image links
    """
    replace_links = {}
    text = markdown
    start_chars = '['
    if images:
        start_chars = '!['
    while start_chars in text:
        if ')' not in text:
            break
        text = text.split(start_chars, 1)[1]
        markdown_link = start_chars + text.split(')')[0] + ')'
        if ']' not in markdown_link or \
           '(' not in markdown_link:
            text = text.split(')', 1)[1]
            continue
        if not images:
            replace_links[markdown_link] = \
                '<a href="' + \
                markdown_link.split('(')[1].split(')')[0] + \
                '" target="_blank" rel="nofollow noopener noreferrer">' + \
                markdown_link.split(start_chars)[1].split(']')[0] + \
                '</a>'
        else:
            replace_links[markdown_link] = \
                '<img class="markdownImage" src="' + \
                markdown_link.split('(')[1].split(')')[0] + \
                '" alt="' + \
                markdown_link.split(start_chars)[1].split(']')[0] + \
                '" />'
        text = text.split(')', 1)[1]

    for md_link, html_link in replace_links.items():
        lines = markdown.split('\n')
        markdown = ''
        code_section = False
        ctr = 0
        for line in lines:
            if ctr > 0:
                markdown += '\n'
            # avoid code sections
            if not code_section:
                if '<code>' in line:
                    code_section = True
            else:
                if '</code>' in line:
                    code_section = False
            if code_section:
                markdown += line
                ctr += 1
                continue
            markdown += line.replace(md_link, html_link)
            ctr += 1
    return markdown


def _markdown_replace_bullet_points(markdown: str) -> str:
    """Replaces bullet points
    """
    lines = markdown.split('\n')
    bullet_style = ('* ', ' * ', '- ', ' - ')
    bullet_matched = ''
    start_line = -1
    line_ctr = 0
    changed = False
    code_section = False
    for line in lines:
        if not line.strip():
            # skip blank lines
            line_ctr += 1
            continue

        # skip over code sections
        if not code_section:
            if '<code>' in line:
                code_section = True
        else:
            if '</code>' in line:
                code_section = False
        if code_section:
            line_ctr += 1
            continue

        if not bullet_matched:
            for test_str in bullet_style:
                if line.startswith(test_str):
                    bullet_matched = test_str
                    start_line = line_ctr
                    break
        else:
            if not line.startswith(bullet_matched):
                for index in range(start_line, line_ctr):
                    line_text = lines[index].replace(bullet_matched, '', 1)
                    if index == start_line:
                        lines[index] = '<ul>\n<li>' + line_text + '</li>'
                    elif index == line_ctr - 1:
                        lines[index] = '<li>' + line_text + '</li>\n</ul>'
                    else:
                        lines[index] = '<li>' + line_text + '</li>'
                changed = True
                start_line = -1
                bullet_matched = ''
        line_ctr += 1

    if not changed:
        return markdown

    markdown = ''
    for line in lines:
        markdown += line + '\n'
    return markdown


def _markdown_replace_code(markdown: str) -> str:
    """Replaces code sections within markdown
    """
    lines = markdown.split('\n')
    start_line = -1
    line_ctr = 0
    changed = False
    section_active = False
    for line in lines:
        if not line.strip():
            # skip blank lines
            line_ctr += 1
            continue
        if line.startswith('```'):
            if not section_active:
                start_line = line_ctr
                section_active = True
            else:
                lines[start_line] = '<code>'
                lines[line_ctr] = '</code>'
                section_active = False
                changed = True
        line_ctr += 1

    if not changed:
        return markdown

    markdown = ''
    for line in lines:
        markdown += line + '\n'
    return markdown


def markdown_to_html(markdown: str) -> str:
    """Converts markdown formatted text to html
    """
    markdown = _markdown_replace_code(markdown)
    markdown = _markdown_replace_bullet_points(markdown)
    markdown = _markdown_replace_quotes(markdown)
    markdown = _markdown_emphasis_html(markdown)
    markdown = _markdown_replace_links(markdown, True)
    markdown = _markdown_replace_links(markdown)

    # replace headers
    lines_list = markdown.split('\n')
    html_str = ''
    ctr = 0
    code_section = False
    titles = {
        "h5": '#####',
        "h4": '####',
        "h3": '###',
        "h2": '##',
        "h1": '#'
    }
    for line in lines_list:
        if ctr > 0:
            html_str += '<br>\n'

        # avoid code sections
        if not code_section:
            if '<code>' in line:
                code_section = True
        else:
            if '</code>' in line:
                code_section = False
        if code_section:
            html_str += line
            ctr += 1
            continue

        for hsh, hashes in titles.items():
            if line.startswith(hashes):
                line = line.replace(hashes, '').strip()
                line = '<' + hsh + '>' + line + '</' + hsh + '>'
                ctr = -1
                break
        html_str += line
        ctr += 1
    return html_str
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`__filename__ = "markdown.py"`
			`__author__ = "Bob Mottram"`
			`__license__ = "AGPL3+"`
Version 1.3.0 2022-02-03 13:58:20 +00:00			`__version__ = "1.3.0"`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`__maintainer__ = "Bob Mottram"`
Change domain to libreserver.org 2021-09-10 16:14:50 +00:00			`__email__ = "bob@libreserver.org"`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`__status__ = "Production"`
			`__module_group__ = "Web Interface"`


Moving to snake case 2021-12-29 21:55:09 +00:00			`def _markdown_emphasis_html(markdown: str) -> str:`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`"""Add italics and bold html markup to the given markdown`
			`"""`
			`replacements = {`
			`' **': ' <b>',`
			`'** ': '</b> ',`
			`'**.': '</b>.',`
			`'**:': '</b>:',`
			`'**;': '</b>;',`
			`'**,': '</b>,',`
			`'**\n': '</b>\n',`
			`' *': ' <i>',`
			`'* ': '</i> ',`
			`'*.': '</i>.',`
			`'*:': '</i>:',`
			`'*;': '</i>;',`
			`'*,': '</i>,',`
			`'*\n': '</i>\n',`
			`' _': ' <ul>',`
			`'_ ': '</ul> ',`
			`'_.': '</ul>.',`
			`'_:': '</ul>:',`
			`'_;': '</ul>;',`
			`'_,': '</ul>,',`
Improve markdown support 2022-06-27 10:38:31 +00:00			`'_\n': '</ul>\n',`
Markdown replacements 2022-06-27 10:41:52 +00:00			' `': ' <em>',
			'`.': '</em>.',
			'`:': '</em>:',
			'`;': '</em>;',
			'`\n': '</em>\n',
			'` ': '</em> '
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`}`
Snake case 2022-01-02 22:35:39 +00:00			`for md_str, html in replacements.items():`
			`markdown = markdown.replace(md_str, html)`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00
			`if markdown.startswith('**'):`
			`markdown = markdown[2:] + '<b>'`
			`elif markdown.startswith('*'):`
			`markdown = markdown[1:] + '<i>'`
			`elif markdown.startswith('_'):`
			`markdown = markdown[1:] + '<ul>'`

			`if markdown.endswith('**'):`
			`markdown = markdown[:len(markdown) - 2] + '</b>'`
			`elif markdown.endswith('*'):`
			`markdown = markdown[:len(markdown) - 1] + '</i>'`
			`elif markdown.endswith('_'):`
			`markdown = markdown[:len(markdown) - 1] + '</ul>'`
			`return markdown`


Moving to snake case 2021-12-29 21:55:09 +00:00			`def _markdown_replace_quotes(markdown: str) -> str:`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`"""Replaces > quotes with html blockquote`
			`"""`
			`if '> ' not in markdown:`
			`return markdown`
			`lines = markdown.split('\n')`
			`result = ''`
Snake case 2022-01-02 22:35:39 +00:00			`prev_quote_line = None`
Improve markdown support 2022-06-27 10:38:31 +00:00			`code_section = False`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`for line in lines:`
Improve markdown support 2022-06-27 10:38:31 +00:00			`# avoid code sections`
			`if not code_section:`
			`if '<code>' in line:`
			`code_section = True`
			`else:`
			`if '</code>' in line:`
			`code_section = False`
			`if code_section:`
			`result += line + '\n'`
			`continue`

Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`if '> ' not in line:`
			`result += line + '\n'`
Snake case 2022-01-02 22:35:39 +00:00			`prev_quote_line = None`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`continue`
Snake case 2022-01-02 22:35:39 +00:00			`line_str = line.strip()`
			`if not line_str.startswith('> '):`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`result += line + '\n'`
Snake case 2022-01-02 22:35:39 +00:00			`prev_quote_line = None`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`continue`
Snake case 2022-01-02 22:35:39 +00:00			`line_str = line_str.replace('> ', '', 1).strip()`
			`if prev_quote_line:`
			`new_prev_line = prev_quote_line.replace('</i></blockquote>\n', '')`
			`result = result.replace(prev_quote_line, new_prev_line) + ' '`
			`line_str += '</i></blockquote>\n'`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`else:`
Snake case 2022-01-02 22:35:39 +00:00			`line_str = '<blockquote><i>' + line_str + '</i></blockquote>\n'`
			`result += line_str`
			`prev_quote_line = line_str`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00
			`if '</blockquote>\n' in result:`
			`result = result.replace('</blockquote>\n', '</blockquote>')`

			`if result.endswith('\n') and \`
			`not markdown.endswith('\n'):`
			`result = result[:len(result) - 1]`
			`return result`


Moving to snake case 2021-12-29 21:55:09 +00:00			`def _markdown_replace_links(markdown: str, images: bool = False) -> str:`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`"""Replaces markdown links with html`
			`Optionally replace image links`
			`"""`
Snake case 2022-01-02 22:35:39 +00:00			`replace_links = {}`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`text = markdown`
Snake case 2022-01-02 22:35:39 +00:00			`start_chars = '['`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`if images:`
Snake case 2022-01-02 22:35:39 +00:00			`start_chars = '!['`
			`while start_chars in text:`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`if ')' not in text:`
			`break`
Snake case 2022-01-02 22:35:39 +00:00			`text = text.split(start_chars, 1)[1]`
			`markdown_link = start_chars + text.split(')')[0] + ')'`
			`if ']' not in markdown_link or \`
			`'(' not in markdown_link:`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`text = text.split(')', 1)[1]`
			`continue`
			`if not images:`
Snake case 2022-01-02 22:35:39 +00:00			`replace_links[markdown_link] = \`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`'<a href="' + \`
Snake case 2022-01-02 22:35:39 +00:00			`markdown_link.split('(')[1].split(')')[0] + \`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`'" target="_blank" rel="nofollow noopener noreferrer">' + \`
Snake case 2022-01-02 22:35:39 +00:00			`markdown_link.split(start_chars)[1].split(']')[0] + \`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`'</a>'`
			`else:`
Snake case 2022-01-02 22:35:39 +00:00			`replace_links[markdown_link] = \`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`'<img class="markdownImage" src="' + \`
Snake case 2022-01-02 22:35:39 +00:00			`markdown_link.split('(')[1].split(')')[0] + \`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`'" alt="' + \`
Snake case 2022-01-02 22:35:39 +00:00			`markdown_link.split(start_chars)[1].split(']')[0] + \`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`'" />'`
			`text = text.split(')', 1)[1]`
Don't replace links within code sections 2022-06-27 11:14:42 +00:00
Snake case 2022-01-02 22:35:39 +00:00			`for md_link, html_link in replace_links.items():`
Don't replace links within code sections 2022-06-27 11:14:42 +00:00			`lines = markdown.split('\n')`
			`markdown = ''`
			`code_section = False`
			`ctr = 0`
			`for line in lines:`
			`if ctr > 0:`
			`markdown += '\n'`
			`# avoid code sections`
			`if not code_section:`
			`if '<code>' in line:`
			`code_section = True`
			`else:`
			`if '</code>' in line:`
			`code_section = False`
			`if code_section:`
			`markdown += line`
			`ctr += 1`
			`continue`
			`markdown += line.replace(md_link, html_link)`
			`ctr += 1`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`return markdown`


Improve markdown support 2022-06-27 10:38:31 +00:00			`def _markdown_replace_bullet_points(markdown: str) -> str:`
			`"""Replaces bullet points`
			`"""`
			`lines = markdown.split('\n')`
			`bullet_style = ('* ', ' * ', '- ', ' - ')`
			`bullet_matched = ''`
			`start_line = -1`
			`line_ctr = 0`
			`changed = False`
			`code_section = False`
			`for line in lines:`
			`if not line.strip():`
			`# skip blank lines`
			`line_ctr += 1`
			`continue`

			`# skip over code sections`
			`if not code_section:`
			`if '<code>' in line:`
			`code_section = True`
			`else:`
			`if '</code>' in line:`
			`code_section = False`
			`if code_section:`
			`line_ctr += 1`
			`continue`

			`if not bullet_matched:`
			`for test_str in bullet_style:`
			`if line.startswith(test_str):`
			`bullet_matched = test_str`
			`start_line = line_ctr`
			`break`
			`else:`
			`if not line.startswith(bullet_matched):`
			`for index in range(start_line, line_ctr):`
			`line_text = lines[index].replace(bullet_matched, '', 1)`
			`if index == start_line:`
			`lines[index] = '<ul>\n<li>' + line_text + '</li>'`
			`elif index == line_ctr - 1:`
			`lines[index] = '<li>' + line_text + '</li>\n</ul>'`
			`else:`
			`lines[index] = '<li>' + line_text + '</li>'`
			`changed = True`
			`start_line = -1`
			`bullet_matched = ''`
			`line_ctr += 1`

			`if not changed:`
			`return markdown`

			`markdown = ''`
			`for line in lines:`
			`markdown += line + '\n'`
			`return markdown`


			`def _markdown_replace_code(markdown: str) -> str:`
			`"""Replaces code sections within markdown`
			`"""`
			`lines = markdown.split('\n')`
			`start_line = -1`
			`line_ctr = 0`
			`changed = False`
			`section_active = False`
			`for line in lines:`
			`if not line.strip():`
			`# skip blank lines`
			`line_ctr += 1`
			`continue`
			if line.startswith('```'):
			`if not section_active:`
			`start_line = line_ctr`
			`section_active = True`
			`else:`
			`lines[start_line] = '<code>'`
			`lines[line_ctr] = '</code>'`
			`section_active = False`
			`changed = True`
			`line_ctr += 1`

			`if not changed:`
			`return markdown`

			`markdown = ''`
			`for line in lines:`
			`markdown += line + '\n'`
			`return markdown`


Moving to snake case 2021-12-29 21:55:09 +00:00			`def markdown_to_html(markdown: str) -> str:`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`"""Converts markdown formatted text to html`
			`"""`
Improve markdown support 2022-06-27 10:38:31 +00:00			`markdown = _markdown_replace_code(markdown)`
			`markdown = _markdown_replace_bullet_points(markdown)`
Moving to snake case 2021-12-29 21:55:09 +00:00			`markdown = _markdown_replace_quotes(markdown)`
			`markdown = _markdown_emphasis_html(markdown)`
			`markdown = _markdown_replace_links(markdown, True)`
			`markdown = _markdown_replace_links(markdown)`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00
			`# replace headers`
Snake case 2022-01-02 22:35:39 +00:00			`lines_list = markdown.split('\n')`
			`html_str = ''`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`ctr = 0`
Improve markdown support 2022-06-27 10:38:31 +00:00			`code_section = False`
Tidying 2021-07-03 20:15:34 +00:00			`titles = {`
			`"h5": '#####',`
			`"h4": '####',`
			`"h3": '###',`
			`"h2": '##',`
			`"h1": '#'`
			`}`
Snake case 2022-01-02 22:35:39 +00:00			`for line in lines_list:`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`if ctr > 0:`
Line breaks for view source 2022-06-27 11:18:34 +00:00			`html_str += '<br>\n'`
Improve markdown support 2022-06-27 10:38:31 +00:00
			`# avoid code sections`
			`if not code_section:`
			`if '<code>' in line:`
			`code_section = True`
			`else:`
			`if '</code>' in line:`
			`code_section = False`
			`if code_section:`
			`html_str += line`
			`ctr += 1`
			`continue`

Snake case 2022-01-02 22:35:39 +00:00			`for hsh, hashes in titles.items():`
Tidying 2021-07-03 20:15:34 +00:00			`if line.startswith(hashes):`
			`line = line.replace(hashes, '').strip()`
Snake case 2022-01-02 22:35:39 +00:00			`line = '<' + hsh + '>' + line + '</' + hsh + '>'`
Tidying 2021-07-03 20:15:34 +00:00			`ctr = -1`
			`break`
Snake case 2022-01-02 22:35:39 +00:00			`html_str += line`
Move markdown functions to a separate module 2021-06-25 14:38:31 +00:00			`ctr += 1`
Snake case 2022-01-02 22:35:39 +00:00			`return html_str`