From 8e7dc3b23ee74561048610548e817ffcb01cc6b7 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Tue, 18 Nov 2025 17:55:40 +0000 Subject: [PATCH] Export blogs in gemini format --- gemini.py | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ outbox.py | 6 +++ tests.py | 43 +++++++++++++++--- 3 files changed, 176 insertions(+), 5 deletions(-) create mode 100644 gemini.py diff --git a/gemini.py b/gemini.py new file mode 100644 index 000000000..f1d9f8144 --- /dev/null +++ b/gemini.py @@ -0,0 +1,132 @@ +__filename__ = "gemini.py" +__author__ = "Bob Mottram" +__license__ = "AGPL3+" +__version__ = "1.6.0" +__maintainer__ = "Bob Mottram" +__email__ = "bob@libreserver.org" +__status__ = "Production" +__module_group__ = "Timeline" + +import os +import shutil +from utils import acct_dir +from utils import has_object_dict +from utils import remove_html +from utils import get_summary_from_post +from utils import get_base_content_from_post + + +def blog_to_gemini(base_dir: str, nickname: str, domain: str, + message_json: dict, system_language: str, + debug: bool, testing: bool) -> bool: + """ + Converts a blog post to gemini format + Returns True on success + """ + if not testing: + account_dir = acct_dir(base_dir, nickname, domain) + else: + account_dir = base_dir + if os.path.isdir(account_dir + '/geminitest'): + shutil.rmtree(account_dir + '/geminitest', ignore_errors=True) + + if not os.path.isdir(account_dir): + if debug: + print('WARN: blog_to_gemini account directory not found ' + + account_dir) + return False + + # get the publication date + obj = message_json + if has_object_dict(message_json): + obj = message_json['object'] + if not obj.get('published'): + if debug: + print('WARN: blog_to_gemini Blog post has no publication date ' + + str(message_json)) + return False + if not isinstance(obj['published'], str): + if debug: + print('WARN: blog_to_gemini publication date is not a string ' + + str(message_json)) + return False + if 'T' not in obj['published']: + if debug: + print('WARN: blog_to_gemini ' + + 'publication date not in expected format ' + + obj['published']) + return False + published = obj['published'].split('T')[0] + + # get the blog content + content_str = get_base_content_from_post(message_json, system_language) + if not content_str: + if debug: + print('WARN: blog_to_gemini no content ' + + str(message_json)) + return False + content_text = remove_html(content_str) + + # get the blog title + title_text = '' + title_str = get_summary_from_post(message_json, system_language, []) + if title_str: + title_text = remove_html(title_str) + + # get web links + if '://' in content_text: + sections = content_text.split('://') + ctr = 0 + prev_section = '' + links: list[str] = [] + for section in sections: + if ctr > 0: + link_str = section + if '\n' in link_str: + link_str = link_str.split('\n')[0] + if ' ' in link_str: + link_str = link_str.split(' ')[0] + if link_str.endswith('.'): + link_length = len(link_str) + link_str = link_str[:link_length-1] + if '.' not in link_str: + continue + prefix = prev_section.rsplit(' ', 1)[-1] + if prefix in ('http', 'https', 'gemini'): + link_str = prefix + '://' + link_str + links.append(link_str) + prev_section = section + ctr += 1 + + # add links to the end of the content + if links: + content_text += '\n\n' + for link_str in links: + content_text += '=> ' + link_str + '\n' + + # create gemini blog directory + if not testing: + gemini_blog_dir = account_dir + '/gemini' + else: + gemini_blog_dir = account_dir + '/geminitest' + if not os.path.isdir(gemini_blog_dir): + os.mkdir(gemini_blog_dir) + + title_text2 = title_text.replace('.', ' ') + title_text2 = title_text2.replace(' ', '_') + gemini_blog_filename = \ + gemini_blog_dir + '/' + published + '_' + title_text2.lower() + '.gmi' + + if not title_text.startswith('# '): + title_text = '# ' + title_text + + try: + with open(gemini_blog_filename, 'w+', + encoding='utf-8') as fp_gemini: + fp_gemini.write(title_text + '\n\n' + published + '\n\n' + + content_text) + except OSError: + print('EX: blog_to_gemini unable to write ' + gemini_blog_filename) + return False + + return True diff --git a/outbox.py b/outbox.py index 7a3be1456..c28bc29db 100644 --- a/outbox.py +++ b/outbox.py @@ -68,6 +68,7 @@ from speaker import update_speaker from reading import store_book_events from reading import has_edition_tag from inbox_receive import inbox_update_index +from gemini import blog_to_gemini def _localonly_not_local(message_json: {}, domain_full: str) -> bool: @@ -544,6 +545,11 @@ def post_message_to_outbox(session, translate: {}, print('WARN: post not saved to outbox ' + outbox_name) return False + if outbox_name == 'tlblogs': + # export blog post in gemini format + blog_to_gemini(base_dir, post_to_nickname, domain, + message_json, system_language, debug, False) + # update the speaker endpoint for speech synthesis actor_url = get_actor_from_post(message_json) update_speaker(base_dir, http_prefix, diff --git a/tests.py b/tests.py index d66ac9803..385b6cb85 100644 --- a/tests.py +++ b/tests.py @@ -8,17 +8,17 @@ __status__ = "Production" __module_group__ = "Testing" import base64 +import time +import os +import shutil +import json +import datetime from cryptography.hazmat.primitives import hashes from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.serialization import load_pem_private_key from cryptography.hazmat.primitives.serialization import load_pem_public_key from cryptography.hazmat.primitives.asymmetric import padding from cryptography.hazmat.primitives.asymmetric import utils as hazutils -import time -import os -import shutil -import json -import datetime from shutil import copyfile from random import randint from time import gmtime, strftime @@ -234,6 +234,7 @@ from webapp_utils import add_emoji_to_display_name from blocking import is_blocked_nickname from blocking import is_blocked_domain from filters import filtered_match +from gemini import blog_to_gemini TEST_SERVER_GROUP_RUNNING = False @@ -9490,6 +9491,37 @@ def _test_actor_status() -> None: assert not actor_status_expired(actor['sm:status']) +def _test_gemini_blog(base_dir: str) -> None: + print('gemini_blog') + gemini_blog_dir = base_dir + '/geminitest' + published = '2022-02-25T20:15:00Z' + title = 'Test title' + content = 'This is a test' + link = 'https://some.link' + gemini_blog_filename = \ + gemini_blog_dir + '/2022-02-25_' + \ + title.replace(' ', '_').lower() + '.gmi' + system_language = 'en' + debug = True + message_json = { + 'object': { + 'published': published, + 'summary': title, + 'content': content + ' ' + link + } + } + result = blog_to_gemini(base_dir, 'someuser', 'somedomain', + message_json, system_language, + debug, True) + assert result + assert os.path.isdir(gemini_blog_dir) + assert os.path.isfile(gemini_blog_filename) + assert text_in_file('# ' + title + '\n', gemini_blog_filename) + assert text_in_file(content, gemini_blog_filename) + assert text_in_file('=> ' + link, gemini_blog_filename) + shutil.rmtree(gemini_blog_dir, ignore_errors=True) + + def run_all_tests(): base_dir = os.getcwd() data_dir_testing(base_dir) @@ -9508,6 +9540,7 @@ def run_all_tests(): _test_checkbox_names() _test_thread_functions() _test_functions() + _test_gemini_blog(base_dir) _test_actor_status() _test_filter_match() _test_blocking_domain(base_dir)