2021-06-25 14:38:31 +00:00
|
|
|
__filename__ = "markdown.py"
|
|
|
|
__author__ = "Bob Mottram"
|
|
|
|
__license__ = "AGPL3+"
|
2024-01-21 19:01:20 +00:00
|
|
|
__version__ = "1.5.0"
|
2021-06-25 14:38:31 +00:00
|
|
|
__maintainer__ = "Bob Mottram"
|
2021-09-10 16:14:50 +00:00
|
|
|
__email__ = "bob@libreserver.org"
|
2021-06-25 14:38:31 +00:00
|
|
|
__status__ = "Production"
|
|
|
|
__module_group__ = "Web Interface"
|
|
|
|
|
|
|
|
|
2022-06-27 14:09:40 +00:00
|
|
|
def _markdown_get_sections(markdown: str) -> []:
|
|
|
|
"""Returns a list of sections for markdown
|
|
|
|
"""
|
|
|
|
if '<code>' not in markdown:
|
|
|
|
return [markdown]
|
|
|
|
lines = markdown.split('\n')
|
|
|
|
sections = []
|
|
|
|
section_text = ''
|
|
|
|
section_active = False
|
|
|
|
ctr = 0
|
|
|
|
for line in lines:
|
|
|
|
if ctr > 0:
|
|
|
|
section_text += '\n'
|
|
|
|
|
|
|
|
if not section_active:
|
|
|
|
if '<code>' in line:
|
|
|
|
section_active = True
|
|
|
|
sections.append(section_text)
|
|
|
|
section_text = ''
|
|
|
|
else:
|
|
|
|
if '</code>' in line:
|
|
|
|
section_active = False
|
|
|
|
sections.append(section_text)
|
|
|
|
section_text = ''
|
|
|
|
|
|
|
|
section_text += line
|
|
|
|
ctr += 1
|
|
|
|
if section_text.strip():
|
|
|
|
sections.append(section_text)
|
|
|
|
return sections
|
|
|
|
|
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def _markdown_emphasis_html(markdown: str) -> str:
|
2021-06-25 14:38:31 +00:00
|
|
|
"""Add italics and bold html markup to the given markdown
|
|
|
|
"""
|
|
|
|
replacements = {
|
|
|
|
' **': ' <b>',
|
|
|
|
'** ': '</b> ',
|
|
|
|
'**.': '</b>.',
|
|
|
|
'**:': '</b>:',
|
|
|
|
'**;': '</b>;',
|
2022-06-27 12:45:48 +00:00
|
|
|
'?**': '?</b>',
|
2022-06-27 12:47:22 +00:00
|
|
|
'\n**': '\n<b>',
|
2021-06-25 14:38:31 +00:00
|
|
|
'**,': '</b>,',
|
|
|
|
'**\n': '</b>\n',
|
2022-06-28 16:52:38 +00:00
|
|
|
'(**': '(<b>)',
|
2022-06-27 13:07:14 +00:00
|
|
|
'**)': '</b>)',
|
2022-06-27 12:38:55 +00:00
|
|
|
'>**': '><b>',
|
|
|
|
'**<': '</b><',
|
|
|
|
'>*': '><i>',
|
|
|
|
'*<': '</i><',
|
2021-06-25 14:38:31 +00:00
|
|
|
' *': ' <i>',
|
|
|
|
'* ': '</i> ',
|
2022-06-27 12:45:48 +00:00
|
|
|
'?*': '?</i>',
|
2022-06-27 12:47:22 +00:00
|
|
|
'\n*': '\n<i>',
|
2021-06-25 14:38:31 +00:00
|
|
|
'*.': '</i>.',
|
|
|
|
'*:': '</i>:',
|
|
|
|
'*;': '</i>;',
|
2022-06-28 16:52:38 +00:00
|
|
|
'(*': '(<i>)',
|
2022-06-27 13:07:14 +00:00
|
|
|
'*)': '</i>)',
|
2021-06-25 14:38:31 +00:00
|
|
|
'*,': '</i>,',
|
|
|
|
'*\n': '</i>\n',
|
2022-06-28 16:52:38 +00:00
|
|
|
'(_': '(<u>',
|
|
|
|
'_)': '</u>)',
|
2022-06-27 16:21:48 +00:00
|
|
|
' _': ' <u>',
|
|
|
|
'_ ': '</u> ',
|
|
|
|
'_.': '</u>.',
|
|
|
|
'_:': '</u>:',
|
|
|
|
'_;': '</u>;',
|
|
|
|
'_,': '</u>,',
|
|
|
|
'_\n': '</u>\n',
|
2022-06-27 10:41:52 +00:00
|
|
|
' `': ' <em>',
|
|
|
|
'`.': '</em>.',
|
|
|
|
'`:': '</em>:',
|
2022-06-27 13:01:32 +00:00
|
|
|
"`'": "</em>'",
|
2022-06-28 16:52:38 +00:00
|
|
|
"(`": "(<em>",
|
2022-06-27 13:07:14 +00:00
|
|
|
"`)": "</em>)",
|
2022-06-27 10:41:52 +00:00
|
|
|
'`;': '</em>;',
|
2022-06-28 16:52:38 +00:00
|
|
|
'`,': '</em>,',
|
2022-06-27 10:41:52 +00:00
|
|
|
'`\n': '</em>\n',
|
|
|
|
'` ': '</em> '
|
2021-06-25 14:38:31 +00:00
|
|
|
}
|
|
|
|
|
2022-06-27 14:09:40 +00:00
|
|
|
sections = _markdown_get_sections(markdown)
|
|
|
|
markdown = ''
|
|
|
|
for section_text in sections:
|
|
|
|
if '<code>' in section_text:
|
|
|
|
markdown += section_text
|
|
|
|
continue
|
|
|
|
for md_str, html in replacements.items():
|
|
|
|
section_text = section_text.replace(md_str, html)
|
|
|
|
|
|
|
|
if section_text.startswith('**'):
|
|
|
|
section_text = section_text[2:] + '<b>'
|
|
|
|
elif section_text.startswith('*'):
|
|
|
|
section_text = section_text[1:] + '<i>'
|
|
|
|
elif section_text.startswith('_'):
|
2022-06-27 16:21:48 +00:00
|
|
|
section_text = section_text[1:] + '<u>'
|
2022-06-27 14:09:40 +00:00
|
|
|
|
|
|
|
if section_text.endswith('**'):
|
|
|
|
section_text = section_text[:len(section_text) - 2] + '</b>'
|
|
|
|
elif section_text.endswith('*'):
|
|
|
|
section_text = section_text[:len(section_text) - 1] + '</i>'
|
|
|
|
elif section_text.endswith('_'):
|
2022-06-27 16:21:48 +00:00
|
|
|
section_text = section_text[:len(section_text) - 1] + '</u>'
|
2021-06-25 14:38:31 +00:00
|
|
|
|
2022-06-27 14:09:40 +00:00
|
|
|
if section_text.strip():
|
|
|
|
markdown += section_text
|
2021-06-25 14:38:31 +00:00
|
|
|
return markdown
|
|
|
|
|
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def _markdown_replace_quotes(markdown: str) -> str:
|
2021-06-25 14:38:31 +00:00
|
|
|
"""Replaces > quotes with html blockquote
|
|
|
|
"""
|
|
|
|
if '> ' not in markdown:
|
|
|
|
return markdown
|
|
|
|
lines = markdown.split('\n')
|
|
|
|
result = ''
|
2022-01-02 22:35:39 +00:00
|
|
|
prev_quote_line = None
|
2022-06-27 10:38:31 +00:00
|
|
|
code_section = False
|
2021-06-25 14:38:31 +00:00
|
|
|
for line in lines:
|
2022-06-27 10:38:31 +00:00
|
|
|
# avoid code sections
|
|
|
|
if not code_section:
|
|
|
|
if '<code>' in line:
|
|
|
|
code_section = True
|
|
|
|
else:
|
|
|
|
if '</code>' in line:
|
|
|
|
code_section = False
|
|
|
|
if code_section:
|
|
|
|
result += line + '\n'
|
|
|
|
continue
|
|
|
|
|
2021-06-25 14:38:31 +00:00
|
|
|
if '> ' not in line:
|
|
|
|
result += line + '\n'
|
2022-01-02 22:35:39 +00:00
|
|
|
prev_quote_line = None
|
2021-06-25 14:38:31 +00:00
|
|
|
continue
|
2022-01-02 22:35:39 +00:00
|
|
|
line_str = line.strip()
|
|
|
|
if not line_str.startswith('> '):
|
2021-06-25 14:38:31 +00:00
|
|
|
result += line + '\n'
|
2022-01-02 22:35:39 +00:00
|
|
|
prev_quote_line = None
|
2021-06-25 14:38:31 +00:00
|
|
|
continue
|
2022-01-02 22:35:39 +00:00
|
|
|
line_str = line_str.replace('> ', '', 1).strip()
|
|
|
|
if prev_quote_line:
|
|
|
|
new_prev_line = prev_quote_line.replace('</i></blockquote>\n', '')
|
|
|
|
result = result.replace(prev_quote_line, new_prev_line) + ' '
|
|
|
|
line_str += '</i></blockquote>\n'
|
2021-06-25 14:38:31 +00:00
|
|
|
else:
|
2022-01-02 22:35:39 +00:00
|
|
|
line_str = '<blockquote><i>' + line_str + '</i></blockquote>\n'
|
|
|
|
result += line_str
|
|
|
|
prev_quote_line = line_str
|
2021-06-25 14:38:31 +00:00
|
|
|
|
|
|
|
if '</blockquote>\n' in result:
|
|
|
|
result = result.replace('</blockquote>\n', '</blockquote>')
|
|
|
|
|
|
|
|
if result.endswith('\n') and \
|
|
|
|
not markdown.endswith('\n'):
|
|
|
|
result = result[:len(result) - 1]
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
2022-06-28 21:34:05 +00:00
|
|
|
def _markdown_replace_links(markdown: str) -> str:
|
2021-06-25 14:38:31 +00:00
|
|
|
"""Replaces markdown links with html
|
|
|
|
Optionally replace image links
|
|
|
|
"""
|
2022-06-28 21:34:05 +00:00
|
|
|
sections = _markdown_get_sections(markdown)
|
|
|
|
result = ''
|
|
|
|
for section_text in sections:
|
|
|
|
if '<code>' in section_text or \
|
|
|
|
'](' not in section_text:
|
|
|
|
result += section_text
|
2021-06-25 14:38:31 +00:00
|
|
|
continue
|
2022-06-28 21:34:05 +00:00
|
|
|
sections_links = section_text.split('](')
|
2022-06-27 11:14:42 +00:00
|
|
|
ctr = 0
|
2022-06-28 21:34:05 +00:00
|
|
|
for link_section in sections_links:
|
|
|
|
if ctr == 0:
|
2022-06-27 11:14:42 +00:00
|
|
|
ctr += 1
|
|
|
|
continue
|
2022-06-28 21:34:05 +00:00
|
|
|
if '[' in sections_links[ctr - 1] and \
|
|
|
|
')' in link_section:
|
|
|
|
link_text = sections_links[ctr - 1].split('[')[-1]
|
|
|
|
link_url = link_section.split(')')[0]
|
|
|
|
replace_str = '[' + link_text + '](' + link_url + ')'
|
2022-06-28 21:37:29 +00:00
|
|
|
link_text = link_text.replace('`', '')
|
2022-06-28 21:34:05 +00:00
|
|
|
if '!' + replace_str in section_text:
|
|
|
|
html_link = \
|
|
|
|
'<img class="markdownImage" src="' + \
|
|
|
|
link_url + '" alt="' + link_text + '" />'
|
|
|
|
section_text = \
|
|
|
|
section_text.replace('!' + replace_str, html_link)
|
|
|
|
if replace_str in section_text:
|
2022-12-06 14:48:57 +00:00
|
|
|
if not link_url.startswith('#'):
|
|
|
|
# external link
|
|
|
|
html_link = \
|
|
|
|
'<a href="' + link_url + '" target="_blank" ' + \
|
|
|
|
'rel="nofollow noopener noreferrer">' + \
|
|
|
|
link_text + '</a>'
|
|
|
|
else:
|
|
|
|
# bookmark
|
|
|
|
html_link = \
|
|
|
|
'<a href="' + link_url + '">' + link_text + '</a>'
|
2022-06-28 21:34:05 +00:00
|
|
|
section_text = \
|
|
|
|
section_text.replace(replace_str, html_link)
|
2022-06-27 11:14:42 +00:00
|
|
|
ctr += 1
|
2022-06-28 21:34:05 +00:00
|
|
|
result += section_text
|
|
|
|
return result
|
2021-06-25 14:38:31 +00:00
|
|
|
|
|
|
|
|
2022-06-27 10:38:31 +00:00
|
|
|
def _markdown_replace_bullet_points(markdown: str) -> str:
|
|
|
|
"""Replaces bullet points
|
|
|
|
"""
|
|
|
|
lines = markdown.split('\n')
|
|
|
|
bullet_style = ('* ', ' * ', '- ', ' - ')
|
|
|
|
bullet_matched = ''
|
|
|
|
start_line = -1
|
|
|
|
line_ctr = 0
|
|
|
|
changed = False
|
|
|
|
code_section = False
|
|
|
|
for line in lines:
|
|
|
|
if not line.strip():
|
|
|
|
# skip blank lines
|
|
|
|
line_ctr += 1
|
|
|
|
continue
|
|
|
|
|
|
|
|
# skip over code sections
|
|
|
|
if not code_section:
|
|
|
|
if '<code>' in line:
|
|
|
|
code_section = True
|
|
|
|
else:
|
|
|
|
if '</code>' in line:
|
|
|
|
code_section = False
|
|
|
|
if code_section:
|
|
|
|
line_ctr += 1
|
|
|
|
continue
|
|
|
|
|
|
|
|
if not bullet_matched:
|
|
|
|
for test_str in bullet_style:
|
|
|
|
if line.startswith(test_str):
|
|
|
|
bullet_matched = test_str
|
|
|
|
start_line = line_ctr
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
if not line.startswith(bullet_matched):
|
|
|
|
for index in range(start_line, line_ctr):
|
|
|
|
line_text = lines[index].replace(bullet_matched, '', 1)
|
|
|
|
if index == start_line:
|
2022-06-27 16:21:48 +00:00
|
|
|
lines[index] = \
|
|
|
|
'<ul class="md_list">\n<li>' + line_text + '</li>'
|
2022-06-27 10:38:31 +00:00
|
|
|
elif index == line_ctr - 1:
|
|
|
|
lines[index] = '<li>' + line_text + '</li>\n</ul>'
|
|
|
|
else:
|
|
|
|
lines[index] = '<li>' + line_text + '</li>'
|
|
|
|
changed = True
|
|
|
|
start_line = -1
|
|
|
|
bullet_matched = ''
|
|
|
|
line_ctr += 1
|
|
|
|
|
|
|
|
if not changed:
|
|
|
|
return markdown
|
|
|
|
|
|
|
|
markdown = ''
|
|
|
|
for line in lines:
|
|
|
|
markdown += line + '\n'
|
|
|
|
return markdown
|
|
|
|
|
|
|
|
|
|
|
|
def _markdown_replace_code(markdown: str) -> str:
|
|
|
|
"""Replaces code sections within markdown
|
|
|
|
"""
|
|
|
|
lines = markdown.split('\n')
|
|
|
|
start_line = -1
|
|
|
|
line_ctr = 0
|
|
|
|
changed = False
|
|
|
|
section_active = False
|
2022-11-10 09:52:38 +00:00
|
|
|
url_encode = False
|
2022-11-09 18:06:54 +00:00
|
|
|
html_escape_table = {
|
|
|
|
"&": "&",
|
|
|
|
'"': """,
|
|
|
|
"'": "'",
|
|
|
|
">": ">",
|
|
|
|
"<": "<"
|
|
|
|
}
|
2022-06-27 10:38:31 +00:00
|
|
|
for line in lines:
|
|
|
|
if not line.strip():
|
|
|
|
# skip blank lines
|
|
|
|
line_ctr += 1
|
|
|
|
continue
|
|
|
|
if line.startswith('```'):
|
|
|
|
if not section_active:
|
2022-11-09 17:51:10 +00:00
|
|
|
if 'html' in line or 'xml' in line or 'rdf' in line:
|
2022-11-10 09:52:38 +00:00
|
|
|
url_encode = True
|
2022-06-27 10:38:31 +00:00
|
|
|
start_line = line_ctr
|
|
|
|
section_active = True
|
|
|
|
else:
|
|
|
|
lines[start_line] = '<code>'
|
|
|
|
lines[line_ctr] = '</code>'
|
2022-11-10 09:52:38 +00:00
|
|
|
if url_encode:
|
2022-11-09 17:58:52 +00:00
|
|
|
lines[start_line] = '<pre>\n<code>'
|
|
|
|
lines[line_ctr] = '</code>\n</pre>'
|
2022-11-10 09:52:38 +00:00
|
|
|
for line_num in range(start_line + 1, line_ctr):
|
|
|
|
lines[line_num] = \
|
|
|
|
"".join(html_escape_table.get(char, char)
|
|
|
|
for char in lines[line_num])
|
2022-06-27 10:38:31 +00:00
|
|
|
section_active = False
|
|
|
|
changed = True
|
2022-11-10 09:52:38 +00:00
|
|
|
url_encode = False
|
2022-06-27 10:38:31 +00:00
|
|
|
line_ctr += 1
|
|
|
|
|
|
|
|
if not changed:
|
|
|
|
return markdown
|
|
|
|
|
|
|
|
markdown = ''
|
|
|
|
for line in lines:
|
|
|
|
markdown += line + '\n'
|
|
|
|
return markdown
|
|
|
|
|
|
|
|
|
2022-06-28 09:03:34 +00:00
|
|
|
def markdown_example_numbers(markdown: str) -> str:
|
|
|
|
"""Ensures that example numbers in the ActivityPub specification
|
|
|
|
document are sequential
|
|
|
|
"""
|
|
|
|
lines = markdown.split('\n')
|
|
|
|
example_number = 1
|
|
|
|
line_ctr = 0
|
|
|
|
for line in lines:
|
|
|
|
if not line.strip():
|
|
|
|
# skip blank lines
|
|
|
|
line_ctr += 1
|
|
|
|
continue
|
|
|
|
if line.startswith('##') and '## Example ' in line:
|
|
|
|
header_str = line.split(' Example ')[0]
|
|
|
|
lines[line_ctr] = header_str + ' Example ' + str(example_number)
|
|
|
|
example_number += 1
|
|
|
|
line_ctr += 1
|
|
|
|
|
|
|
|
markdown = ''
|
|
|
|
for line in lines:
|
|
|
|
markdown += line + '\n'
|
|
|
|
return markdown
|
|
|
|
|
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def markdown_to_html(markdown: str) -> str:
|
2021-06-25 14:38:31 +00:00
|
|
|
"""Converts markdown formatted text to html
|
|
|
|
"""
|
2022-06-27 10:38:31 +00:00
|
|
|
markdown = _markdown_replace_code(markdown)
|
|
|
|
markdown = _markdown_replace_bullet_points(markdown)
|
2021-12-29 21:55:09 +00:00
|
|
|
markdown = _markdown_replace_quotes(markdown)
|
|
|
|
markdown = _markdown_emphasis_html(markdown)
|
|
|
|
markdown = _markdown_replace_links(markdown)
|
2021-06-25 14:38:31 +00:00
|
|
|
|
|
|
|
# replace headers
|
2022-01-02 22:35:39 +00:00
|
|
|
lines_list = markdown.split('\n')
|
|
|
|
html_str = ''
|
2021-06-25 14:38:31 +00:00
|
|
|
ctr = 0
|
2022-06-27 10:38:31 +00:00
|
|
|
code_section = False
|
2021-07-03 20:15:34 +00:00
|
|
|
titles = {
|
2022-06-29 11:14:20 +00:00
|
|
|
"h6": '######',
|
2021-07-03 20:15:34 +00:00
|
|
|
"h5": '#####',
|
|
|
|
"h4": '####',
|
|
|
|
"h3": '###',
|
|
|
|
"h2": '##',
|
|
|
|
"h1": '#'
|
|
|
|
}
|
2022-01-02 22:35:39 +00:00
|
|
|
for line in lines_list:
|
2021-06-25 14:38:31 +00:00
|
|
|
if ctr > 0:
|
2022-06-27 15:13:23 +00:00
|
|
|
if not code_section:
|
|
|
|
html_str += '<br>\n'
|
|
|
|
else:
|
|
|
|
html_str += '\n'
|
2022-06-27 10:38:31 +00:00
|
|
|
|
|
|
|
# avoid code sections
|
|
|
|
if not code_section:
|
|
|
|
if '<code>' in line:
|
|
|
|
code_section = True
|
|
|
|
else:
|
|
|
|
if '</code>' in line:
|
|
|
|
code_section = False
|
|
|
|
if code_section:
|
|
|
|
html_str += line
|
|
|
|
ctr += 1
|
|
|
|
continue
|
|
|
|
|
2022-01-02 22:35:39 +00:00
|
|
|
for hsh, hashes in titles.items():
|
2021-07-03 20:15:34 +00:00
|
|
|
if line.startswith(hashes):
|
2022-12-06 14:43:03 +00:00
|
|
|
bookmark_str = line.split(' ', 1)[1].lower().replace(' ', '-')
|
2021-07-03 20:15:34 +00:00
|
|
|
line = line.replace(hashes, '').strip()
|
2022-12-06 14:37:18 +00:00
|
|
|
line = '<' + hsh + ' id="' + bookmark_str + '">' + \
|
|
|
|
line + '</' + hsh + '>\n'
|
2021-07-03 20:15:34 +00:00
|
|
|
ctr = -1
|
|
|
|
break
|
2022-01-02 22:35:39 +00:00
|
|
|
html_str += line
|
2021-06-25 14:38:31 +00:00
|
|
|
ctr += 1
|
2022-06-27 12:38:55 +00:00
|
|
|
|
|
|
|
html_str = html_str.replace('<code><br>', '<code>')
|
|
|
|
html_str = html_str.replace('</code><br>', '</code>')
|
|
|
|
|
2022-06-28 09:18:42 +00:00
|
|
|
html_str = html_str.replace('<ul class="md_list"><br>',
|
|
|
|
'<ul class="md_list">')
|
|
|
|
html_str = html_str.replace('</li><br>', '</li>')
|
|
|
|
|
2022-01-02 22:35:39 +00:00
|
|
|
return html_str
|