epicyon/markdown.py

496 lines
15 KiB
Python
Raw Normal View History

__filename__ = "markdown.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
2024-01-21 19:01:20 +00:00
__version__ = "1.5.0"
__maintainer__ = "Bob Mottram"
2021-09-10 16:14:50 +00:00
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Web Interface"
def _markdown_get_sections(markdown: str) -> []:
"""Returns a list of sections for markdown
"""
if '<code>' not in markdown:
return [markdown]
lines = markdown.split('\n')
sections = []
section_text = ''
section_active = False
ctr = 0
for line in lines:
if ctr > 0:
section_text += '\n'
if not section_active:
if '<code>' in line:
section_active = True
sections.append(section_text)
section_text = ''
else:
if '</code>' in line:
section_active = False
sections.append(section_text)
section_text = ''
section_text += line
ctr += 1
if section_text.strip():
sections.append(section_text)
return sections
2021-12-29 21:55:09 +00:00
def _markdown_emphasis_html(markdown: str) -> str:
"""Add italics and bold html markup to the given markdown
"""
replacements = {
' **': ' <b>',
'** ': '</b> ',
'**.': '</b>.',
'**:': '</b>:',
'**;': '</b>;',
2022-06-27 12:45:48 +00:00
'?**': '?</b>',
2022-06-27 12:47:22 +00:00
'\n**': '\n<b>',
'**,': '</b>,',
'**\n': '</b>\n',
2022-06-28 16:52:38 +00:00
'(**': '(<b>)',
2022-06-27 13:07:14 +00:00
'**)': '</b>)',
2022-06-27 12:38:55 +00:00
'>**': '><b>',
'**<': '</b><',
'>*': '><i>',
'*<': '</i><',
' *': ' <i>',
'* ': '</i> ',
2022-06-27 12:45:48 +00:00
'?*': '?</i>',
2022-06-27 12:47:22 +00:00
'\n*': '\n<i>',
'*.': '</i>.',
'*:': '</i>:',
'*;': '</i>;',
2022-06-28 16:52:38 +00:00
'(*': '(<i>)',
2022-06-27 13:07:14 +00:00
'*)': '</i>)',
'*,': '</i>,',
'*\n': '</i>\n',
2022-06-28 16:52:38 +00:00
'(_': '(<u>',
'_)': '</u>)',
2022-06-27 16:21:48 +00:00
' _': ' <u>',
'_ ': '</u> ',
'_.': '</u>.',
'_:': '</u>:',
'_;': '</u>;',
'_,': '</u>,',
'_\n': '</u>\n',
2022-06-27 10:41:52 +00:00
' `': ' <em>',
'`.': '</em>.',
'`:': '</em>:',
2022-06-27 13:01:32 +00:00
"`'": "</em>'",
2022-06-28 16:52:38 +00:00
"(`": "(<em>",
2022-06-27 13:07:14 +00:00
"`)": "</em>)",
2022-06-27 10:41:52 +00:00
'`;': '</em>;',
2022-06-28 16:52:38 +00:00
'`,': '</em>,',
2022-06-27 10:41:52 +00:00
'`\n': '</em>\n',
'` ': '</em> '
}
sections = _markdown_get_sections(markdown)
markdown = ''
for section_text in sections:
if '<code>' in section_text:
markdown += section_text
continue
for md_str, html in replacements.items():
section_text = section_text.replace(md_str, html)
if section_text.startswith('**'):
section_text = section_text[2:] + '<b>'
elif section_text.startswith('*'):
section_text = section_text[1:] + '<i>'
elif section_text.startswith('_'):
2022-06-27 16:21:48 +00:00
section_text = section_text[1:] + '<u>'
if section_text.endswith('**'):
section_text = section_text[:len(section_text) - 2] + '</b>'
elif section_text.endswith('*'):
section_text = section_text[:len(section_text) - 1] + '</i>'
elif section_text.endswith('_'):
2022-06-27 16:21:48 +00:00
section_text = section_text[:len(section_text) - 1] + '</u>'
if section_text.strip():
markdown += section_text
return markdown
2021-12-29 21:55:09 +00:00
def _markdown_replace_quotes(markdown: str) -> str:
"""Replaces > quotes with html blockquote
"""
if '> ' not in markdown:
return markdown
lines = markdown.split('\n')
result = ''
2022-01-02 22:35:39 +00:00
prev_quote_line = None
2022-06-27 10:38:31 +00:00
code_section = False
for line in lines:
2022-06-27 10:38:31 +00:00
# avoid code sections
if not code_section:
if '<code>' in line:
code_section = True
else:
if '</code>' in line:
code_section = False
if code_section:
result += line + '\n'
continue
if '> ' not in line:
result += line + '\n'
2022-01-02 22:35:39 +00:00
prev_quote_line = None
continue
2022-01-02 22:35:39 +00:00
line_str = line.strip()
if not line_str.startswith('> '):
result += line + '\n'
2022-01-02 22:35:39 +00:00
prev_quote_line = None
continue
2022-01-02 22:35:39 +00:00
line_str = line_str.replace('> ', '', 1).strip()
if prev_quote_line:
new_prev_line = prev_quote_line.replace('</i></blockquote>\n', '')
result = result.replace(prev_quote_line, new_prev_line) + ' '
line_str += '</i></blockquote>\n'
else:
2022-01-02 22:35:39 +00:00
line_str = '<blockquote><i>' + line_str + '</i></blockquote>\n'
result += line_str
prev_quote_line = line_str
if '</blockquote>\n' in result:
result = result.replace('</blockquote>\n', '</blockquote>')
if result.endswith('\n') and \
not markdown.endswith('\n'):
result = result[:len(result) - 1]
return result
2022-06-28 21:34:05 +00:00
def _markdown_replace_links(markdown: str) -> str:
"""Replaces markdown links with html
Optionally replace image links
"""
2022-06-28 21:34:05 +00:00
sections = _markdown_get_sections(markdown)
result = ''
for section_text in sections:
if '<code>' in section_text or \
'](' not in section_text:
result += section_text
continue
2022-06-28 21:34:05 +00:00
sections_links = section_text.split('](')
ctr = 0
2022-06-28 21:34:05 +00:00
for link_section in sections_links:
if ctr == 0:
ctr += 1
continue
2024-07-15 19:15:39 +00:00
if not ('[' in sections_links[ctr - 1] and
')' in link_section):
ctr += 1
continue
link_text = sections_links[ctr - 1].split('[')[-1]
link_url = link_section.split(')')[0]
replace_str = '[' + link_text + '](' + link_url + ')'
link_text = link_text.replace('`', '')
if '!' + replace_str in section_text:
html_link = \
'<img class="markdownImage" src="' + \
link_url + '" alt="' + link_text + '" />'
section_text = \
section_text.replace('!' + replace_str, html_link)
if replace_str in section_text:
if not link_url.startswith('#'):
# external link
html_link = \
'<a href="' + link_url + '" target="_blank" ' + \
'rel="nofollow noopener noreferrer">' + \
link_text + '</a>'
else:
# bookmark
2022-06-28 21:34:05 +00:00
html_link = \
2024-07-15 19:15:39 +00:00
'<a href="' + link_url + '">' + link_text + '</a>'
section_text = \
section_text.replace(replace_str, html_link)
ctr += 1
2022-06-28 21:34:05 +00:00
result += section_text
return result
2024-08-13 10:34:48 +00:00
def _markdown_replace_misskey(markdown: str) -> str:
"""Replaces misskey animations with emojis
https://codeberg.org/fediverse/fep/src/branch/main/fep/c16b/fep-c16b.md
2024-08-13 10:41:57 +00:00
https://akkoma.dev/nbsp/marked-mfm/src/branch/master/docs/syntax.md
2024-08-13 10:34:48 +00:00
"""
animation_types = {
'tada': '',
'jelly': '',
'twitch': '😛',
'shake': '🫨',
'spin': '',
'jump': '🦘',
'bounce': '',
'flip': '🙃',
'x2': '',
'x3': '',
'x4': '',
'font': '',
'rotate': ''
}
if '$[' not in markdown or ']' not in markdown:
return markdown
sections = _markdown_get_sections(markdown)
result = ''
for section_text in sections:
if '<code>' in section_text or \
'$[' not in section_text or \
']' not in section_text or \
' ' not in section_text:
result += section_text
continue
sections_links = section_text.split('$[')
ctr = 0
for link_section in sections_links:
if ctr == 0:
ctr += 1
continue
if ']' not in link_section:
ctr += 1
continue
misskey_str = link_section.split(']')[0]
if ' ' not in misskey_str:
ctr += 1
continue
# get the type of animation
animation_type = misskey_str.split(' ')[0]
append_emoji = None
mfm_type = ''
found = False
for anim, anim_emoji in animation_types.items():
if animation_type.startswith(anim):
mfm_type = anim
append_emoji = anim_emoji
found = True
break
if not found:
ctr += 1
continue
animation_text = misskey_str.split(' ', 1)[1]
orig_str = '$[' + misskey_str + ']'
if append_emoji:
animation_text += ' ' + append_emoji
replace_str = \
'<span class="mfm-' + mfm_type + '">' + animation_text + \
'</span>'
section_text = section_text.replace(orig_str, replace_str)
ctr += 1
result += section_text
return result
2022-06-27 10:38:31 +00:00
def _markdown_replace_bullet_points(markdown: str) -> str:
"""Replaces bullet points
"""
lines = markdown.split('\n')
bullet_style = ('* ', ' * ', '- ', ' - ')
bullet_matched = ''
start_line = -1
line_ctr = 0
changed = False
code_section = False
for line in lines:
if not line.strip():
# skip blank lines
line_ctr += 1
continue
# skip over code sections
if not code_section:
if '<code>' in line:
code_section = True
else:
if '</code>' in line:
code_section = False
if code_section:
line_ctr += 1
continue
if not bullet_matched:
for test_str in bullet_style:
if line.startswith(test_str):
bullet_matched = test_str
start_line = line_ctr
break
else:
if not line.startswith(bullet_matched):
for index in range(start_line, line_ctr):
line_text = lines[index].replace(bullet_matched, '', 1)
if index == start_line:
2022-06-27 16:21:48 +00:00
lines[index] = \
'<ul class="md_list">\n<li>' + line_text + '</li>'
2022-06-27 10:38:31 +00:00
elif index == line_ctr - 1:
lines[index] = '<li>' + line_text + '</li>\n</ul>'
else:
lines[index] = '<li>' + line_text + '</li>'
changed = True
start_line = -1
bullet_matched = ''
line_ctr += 1
if not changed:
return markdown
markdown = ''
for line in lines:
markdown += line + '\n'
return markdown
def _markdown_replace_code(markdown: str) -> str:
"""Replaces code sections within markdown
"""
lines = markdown.split('\n')
start_line = -1
line_ctr = 0
changed = False
section_active = False
2022-11-10 09:52:38 +00:00
url_encode = False
2022-11-09 18:06:54 +00:00
html_escape_table = {
"&": "&amp;",
'"': "&quot;",
"'": "&apos;",
">": "&gt;",
"<": "&lt;"
}
2022-06-27 10:38:31 +00:00
for line in lines:
if not line.strip():
# skip blank lines
line_ctr += 1
continue
if line.startswith('```'):
if not section_active:
2022-11-09 17:51:10 +00:00
if 'html' in line or 'xml' in line or 'rdf' in line:
2022-11-10 09:52:38 +00:00
url_encode = True
2022-06-27 10:38:31 +00:00
start_line = line_ctr
section_active = True
else:
lines[start_line] = '<code>'
lines[line_ctr] = '</code>'
2022-11-10 09:52:38 +00:00
if url_encode:
2022-11-09 17:58:52 +00:00
lines[start_line] = '<pre>\n<code>'
lines[line_ctr] = '</code>\n</pre>'
2022-11-10 09:52:38 +00:00
for line_num in range(start_line + 1, line_ctr):
lines[line_num] = \
"".join(html_escape_table.get(char, char)
for char in lines[line_num])
2022-06-27 10:38:31 +00:00
section_active = False
changed = True
2022-11-10 09:52:38 +00:00
url_encode = False
2022-06-27 10:38:31 +00:00
line_ctr += 1
if not changed:
return markdown
markdown = ''
for line in lines:
markdown += line + '\n'
return markdown
def markdown_example_numbers(markdown: str) -> str:
"""Ensures that example numbers in the ActivityPub specification
document are sequential
"""
lines = markdown.split('\n')
example_number = 1
line_ctr = 0
for line in lines:
if not line.strip():
# skip blank lines
line_ctr += 1
continue
if line.startswith('##') and '## Example ' in line:
header_str = line.split(' Example ')[0]
lines[line_ctr] = header_str + ' Example ' + str(example_number)
example_number += 1
line_ctr += 1
markdown = ''
for line in lines:
markdown += line + '\n'
return markdown
2021-12-29 21:55:09 +00:00
def markdown_to_html(markdown: str) -> str:
"""Converts markdown formatted text to html
"""
2024-08-13 10:34:48 +00:00
markdown = _markdown_replace_misskey(markdown)
2022-06-27 10:38:31 +00:00
markdown = _markdown_replace_code(markdown)
markdown = _markdown_replace_bullet_points(markdown)
2021-12-29 21:55:09 +00:00
markdown = _markdown_replace_quotes(markdown)
markdown = _markdown_emphasis_html(markdown)
markdown = _markdown_replace_links(markdown)
# replace headers
2022-01-02 22:35:39 +00:00
lines_list = markdown.split('\n')
html_str = ''
ctr = 0
2022-06-27 10:38:31 +00:00
code_section = False
2021-07-03 20:15:34 +00:00
titles = {
2022-06-29 11:14:20 +00:00
"h6": '######',
2021-07-03 20:15:34 +00:00
"h5": '#####',
"h4": '####',
"h3": '###',
"h2": '##',
"h1": '#'
}
2022-01-02 22:35:39 +00:00
for line in lines_list:
if ctr > 0:
2022-06-27 15:13:23 +00:00
if not code_section:
html_str += '<br>\n'
else:
html_str += '\n'
2022-06-27 10:38:31 +00:00
# avoid code sections
if not code_section:
if '<code>' in line:
code_section = True
else:
if '</code>' in line:
code_section = False
if code_section:
html_str += line
ctr += 1
continue
2022-01-02 22:35:39 +00:00
for hsh, hashes in titles.items():
2021-07-03 20:15:34 +00:00
if line.startswith(hashes):
2022-12-06 14:43:03 +00:00
bookmark_str = line.split(' ', 1)[1].lower().replace(' ', '-')
2021-07-03 20:15:34 +00:00
line = line.replace(hashes, '').strip()
2022-12-06 14:37:18 +00:00
line = '<' + hsh + ' id="' + bookmark_str + '">' + \
line + '</' + hsh + '>\n'
2021-07-03 20:15:34 +00:00
ctr = -1
break
2022-01-02 22:35:39 +00:00
html_str += line
ctr += 1
2022-06-27 12:38:55 +00:00
2024-06-19 19:32:25 +00:00
replacements = (
('<code><br>', '<code>'),
('</code><br>', '</code>'),
('<ul class="md_list"><br>', '<ul class="md_list">'),
('</li><br>', '</li>')
)
for pair in replacements:
html_str = html_str.replace(pair[0], pair[1])
2022-06-28 09:18:42 +00:00
2022-01-02 22:35:39 +00:00
return html_str