Save blog in markdown format

2025-11-21 15:23:18 +00:00 · 2025-11-21 15:23:18 +00:00 · 665ceb8a11
parent 798ea8b917
commit 665ceb8a11
7 changed files with 184 additions and 0 deletions
--- a/manual/manual.epub
+++ b/manual/manual.epub
--- a/manual/manual.html
+++ b/manual/manual.html
@ -547,6 +547,15 @@ other types of post, and are also publicly visible to anyone on the
 web.</p>
 <p>At the top of the <em>links</em> column on the main timeline screen
 there is an icon to show an RSS feed for your blog entries.</p>
+<p>When you create a blog post a corresponding file in <a
+href="https://en.wikipedia.org/wiki/Gemini_(protocol)">Gemini</a> and <a
+href="https://en.wikipedia.org/wiki/Markdown">Markdown</a> formats will
+also be generated. These can be found in the
+<strong>accounts/username@domain/gemnini</strong> and
+<strong>accounts/username@domain/markdown</strong> directories. If you
+are running a Gemini server or a separate markdown blog you can then
+perhaps set up a cron script to copy those files to the relevant
+destinations.</p>
 <h3 id="unlisted">Unlisted</h3>
 <p>Similar to a public post, but will not appear as a recent post within
 your profile. Unlisted posts can add a little more privacy to a
--- a/manual/manual.md
+++ b/manual/manual.md
@ -408,6 +408,8 @@ Used to create a blog post. Blog posts are typically longer than other types of

 At the top of the *links* column on the main timeline screen there is an icon to show an RSS feed for your blog entries.

+When you create a blog post a corresponding file in [Gemini](https://en.wikipedia.org/wiki/Gemini_(protocol)) and [Markdown](https://en.wikipedia.org/wiki/Markdown) formats will also be generated. These can be found in the **accounts/username@domain/gemnini** and **accounts/username@domain/markdown** directories. If you are running a Gemini server or a separate markdown blog you can then perhaps set up a cron script to copy those files to the relevant destinations.
+
 ### Unlisted
 Similar to a public post, but will not appear as a recent post within your profile. Unlisted posts can add a little more privacy to a conversation in that it will not be immediately obvious to casual observers. Often in practice this is all that's needed to avoid trolls or unwanted attention.

--- a/markdown.py
+++ b/markdown.py
@ -7,6 +7,17 @@ __email__ = "bob@libreserver.org"
 __status__ = "Production"
 __module_group__ = "Web Interface"

+import os
+import shutil
+from utils import acct_dir
+from utils import remove_html
+from utils import get_base_content_from_post
+from utils import get_post_attachments
+from utils import get_url_from_post
+from utils import get_markdown_blog_filename
+from utils import get_gemini_blog_title
+from utils import get_gemini_blog_published
+

 def _markdown_get_sections(markdown: str) -> []:
    """Returns a list of sections for markdown
@ -493,3 +504,88 @@ def markdown_to_html(markdown: str) -> str:
        html_str = html_str.replace(pair[0], pair[1])

    return html_str
+
+
+def blog_to_markdown(base_dir: str, nickname: str, domain: str,
+                     message_json: dict, system_language: str,
+                     debug: bool, testing: bool) -> bool:
+    """
+    Converts a blog post to markdown format
+    Returns True on success
+     """
+    if not testing:
+        account_dir = acct_dir(base_dir, nickname, domain)
+    else:
+        account_dir = base_dir
+        if os.path.isdir(account_dir + '/markdowntest'):
+            shutil.rmtree(account_dir + '/markdowntest', ignore_errors=True)
+
+    if not os.path.isdir(account_dir):
+        if debug:
+            print('WARN: blog_to_markdown account directory not found ' +
+                  account_dir)
+        return False
+
+    published = get_gemini_blog_published(message_json, debug)
+    if not published:
+        return False
+
+    # get the blog content
+    content_str = get_base_content_from_post(message_json, system_language)
+    if not content_str:
+        if debug:
+            print('WARN: blog_to_markdown no content ' +
+                  str(message_json))
+        return False
+    content_text = remove_html(content_str)
+
+    # get the blog title
+    title_text = get_gemini_blog_title(message_json, system_language)
+
+    # create markdown blog directory
+    if not testing:
+        markdown_blog_dir = account_dir + '/markdown'
+    else:
+        markdown_blog_dir = account_dir + '/markdowntest'
+    if not os.path.isdir(markdown_blog_dir):
+        os.mkdir(markdown_blog_dir)
+
+    markdown_blog_filename = \
+        get_markdown_blog_filename(base_dir, nickname, domain,
+                                   message_json, system_language,
+                                   debug, testing)
+
+    if not title_text.startswith('# '):
+        title_text = '# ' + title_text
+
+    # get attachments
+    links: list[str] = []
+    post_attachments = get_post_attachments(message_json)
+    if post_attachments:
+        descriptions = ''
+        for attach in post_attachments:
+            if not isinstance(attach, dict):
+                continue
+            if not attach.get('name'):
+                continue
+            descriptions += attach['name'] + ' '
+            if attach.get('url'):
+                links.append('[' + attach['name'] + '](' +
+                             get_url_from_post(attach['url']) + ')')
+
+    # add links to the end of the content
+    if links:
+        content_text += '\n\n'
+    for link_str in links:
+        content_text += link_str + '\n'
+
+    try:
+        with open(markdown_blog_filename, 'w+',
+                  encoding='utf-8') as fp_markdown:
+            fp_markdown.write(title_text + '\n\n' + published + '\n\n' +
+                              content_text)
+    except OSError:
+        print('EX: blog_to_markdown unable to write ' + markdown_blog_filename)
+        return False
+
+    return True
--- a/outbox.py
+++ b/outbox.py
@ -69,6 +69,7 @@ from reading import store_book_events
 from reading import has_edition_tag
 from inbox_receive import inbox_update_index
 from gemini import blog_to_gemini
+from markdown import blog_to_markdown


 def _localonly_not_local(message_json: {}, domain_full: str) -> bool:
@ -549,6 +550,9 @@ def post_message_to_outbox(session, translate: {},
        # export blog post in gemini format
        blog_to_gemini(base_dir, post_to_nickname, domain,
                       message_json, system_language, debug, False)
+        # export blog post in markdown format
+        blog_to_markdown(base_dir, post_to_nickname, domain,
+                         message_json, system_language, debug, False)

    # update the speaker endpoint for speech synthesis
    actor_url = get_actor_from_post(message_json)
--- a/tests.py
+++ b/tests.py
@ -200,6 +200,7 @@ from webapp_post import replace_link_variable
 from webapp_post import prepare_html_post_nickname
 from speaker import speaker_replace_links
 from markdown import markdown_to_html
+from markdown import blog_to_markdown
 from languages import get_reply_language
 from languages import set_actor_languages
 from languages import get_actor_languages
@ -9519,9 +9520,41 @@ def _test_gemini_blog(base_dir: str) -> None:
    assert text_in_file('# ' + title + '\n', gemini_blog_filename)
    assert text_in_file(content, gemini_blog_filename)
    assert text_in_file('=> ' + link, gemini_blog_filename)
+    assert text_in_file('2022-02-25', gemini_blog_filename)
    shutil.rmtree(gemini_blog_dir, ignore_errors=True)


+def _test_markdown_blog(base_dir: str) -> None:
+    print('markdown_blog')
+    markdown_blog_dir = base_dir + '/markdowntest'
+    published = '2022-02-25T20:15:00Z'
+    title = 'Markdown test title'
+    content = 'This is a markdown test'
+    link = 'https://some.link'
+    markdown_blog_filename = \
+        markdown_blog_dir + '/2022-02-25_' + \
+        title.replace(' ', '_').lower() + '.md'
+    system_language = 'en'
+    debug = True
+    message_json = {
+        'object': {
+            'published': published,
+            'summary': title,
+            'content': content + ' ' + link
+        }
+    }
+    result = blog_to_markdown(base_dir, 'someuser', 'somedomain',
+                              message_json, system_language,
+                              debug, True)
+    assert result
+    assert os.path.isdir(markdown_blog_dir)
+    assert os.path.isfile(markdown_blog_filename)
+    assert text_in_file('# ' + title + '\n', markdown_blog_filename)
+    assert text_in_file(content, markdown_blog_filename)
+    assert text_in_file('2022-02-25', markdown_blog_filename)
+    shutil.rmtree(markdown_blog_dir, ignore_errors=True)
+
+
 def run_all_tests():
    base_dir = os.getcwd()
    data_dir_testing(base_dir)
@ -9540,6 +9573,7 @@ def run_all_tests():
    _test_checkbox_names()
    _test_thread_functions()
    _test_functions()
+    _test_markdown_blog(base_dir)
    _test_gemini_blog(base_dir)
    _test_actor_status()
    _test_filter_match()
--- a/utils.py
+++ b/utils.py
@ -2261,6 +2261,30 @@ def get_gemini_blog_filename(base_dir: str, nickname: str, domain: str,
    return gemini_blog_filename


+def get_markdown_blog_filename(base_dir: str, nickname: str, domain: str,
+                               message_json: dict, system_language: str,
+                               debug: bool, testing: bool) -> str:
+    """Returns the filename for a markdown blog post
+    """
+    title_text = get_gemini_blog_title(message_json, system_language)
+    published = get_gemini_blog_published(message_json, debug)
+    if not published:
+        return ''
+    title_text2 = title_text.replace('.', ' ')
+    title_text2 = title_text2.replace(' ', '_')
+
+    if not testing:
+        account_dir = acct_dir(base_dir, nickname, domain)
+        markdown_blog_dir = account_dir + '/markdown'
+    else:
+        account_dir = base_dir
+        markdown_blog_dir = account_dir + '/markdowntest'
+
+    markdown_blog_filename = \
+        markdown_blog_dir + '/' + published + '_' + title_text2.lower() + '.md'
+    return markdown_blog_filename
+
+
 def delete_post(base_dir: str, http_prefix: str,
                nickname: str, domain: str, post_filename: str,
                debug: bool, recent_posts_cache: {},
@ -2321,6 +2345,21 @@ def delete_post(base_dir: str, http_prefix: str,
                    print('EX: delete_post unable to delete gemini post ' +
                          str(gemini_blog_filename))

+    # delete markdown blog post
+    markdown_blog_filename = \
+        get_markdown_blog_filename(base_dir, nickname, domain,
+                                   post_json_object, '',
+                                   debug, False)
+    if markdown_blog_filename:
+        if os.path.isfile(markdown_blog_filename):
+            try:
+                os.remove(markdown_blog_filename)
+                return True
+            except OSError:
+                if debug:
+                    print('EX: delete_post unable to delete markdown post ' +
+                          str(markdown_blog_filename))
+
    # remove from recent posts cache in memory
    remove_post_from_cache(post_json_object, recent_posts_cache)