Export blogs in gemini format

main
Bob Mottram 2025-11-18 17:55:40 +00:00
parent a577d59341
commit 8e7dc3b23e
3 changed files with 176 additions and 5 deletions

132
gemini.py 100644
View File

@ -0,0 +1,132 @@
__filename__ = "gemini.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.6.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Timeline"
import os
import shutil
from utils import acct_dir
from utils import has_object_dict
from utils import remove_html
from utils import get_summary_from_post
from utils import get_base_content_from_post
def blog_to_gemini(base_dir: str, nickname: str, domain: str,
message_json: dict, system_language: str,
debug: bool, testing: bool) -> bool:
"""
Converts a blog post to gemini format
Returns True on success
"""
if not testing:
account_dir = acct_dir(base_dir, nickname, domain)
else:
account_dir = base_dir
if os.path.isdir(account_dir + '/geminitest'):
shutil.rmtree(account_dir + '/geminitest', ignore_errors=True)
if not os.path.isdir(account_dir):
if debug:
print('WARN: blog_to_gemini account directory not found ' +
account_dir)
return False
# get the publication date
obj = message_json
if has_object_dict(message_json):
obj = message_json['object']
if not obj.get('published'):
if debug:
print('WARN: blog_to_gemini Blog post has no publication date ' +
str(message_json))
return False
if not isinstance(obj['published'], str):
if debug:
print('WARN: blog_to_gemini publication date is not a string ' +
str(message_json))
return False
if 'T' not in obj['published']:
if debug:
print('WARN: blog_to_gemini ' +
'publication date not in expected format ' +
obj['published'])
return False
published = obj['published'].split('T')[0]
# get the blog content
content_str = get_base_content_from_post(message_json, system_language)
if not content_str:
if debug:
print('WARN: blog_to_gemini no content ' +
str(message_json))
return False
content_text = remove_html(content_str)
# get the blog title
title_text = ''
title_str = get_summary_from_post(message_json, system_language, [])
if title_str:
title_text = remove_html(title_str)
# get web links
if '://' in content_text:
sections = content_text.split('://')
ctr = 0
prev_section = ''
links: list[str] = []
for section in sections:
if ctr > 0:
link_str = section
if '\n' in link_str:
link_str = link_str.split('\n')[0]
if ' ' in link_str:
link_str = link_str.split(' ')[0]
if link_str.endswith('.'):
link_length = len(link_str)
link_str = link_str[:link_length-1]
if '.' not in link_str:
continue
prefix = prev_section.rsplit(' ', 1)[-1]
if prefix in ('http', 'https', 'gemini'):
link_str = prefix + '://' + link_str
links.append(link_str)
prev_section = section
ctr += 1
# add links to the end of the content
if links:
content_text += '\n\n'
for link_str in links:
content_text += '=> ' + link_str + '\n'
# create gemini blog directory
if not testing:
gemini_blog_dir = account_dir + '/gemini'
else:
gemini_blog_dir = account_dir + '/geminitest'
if not os.path.isdir(gemini_blog_dir):
os.mkdir(gemini_blog_dir)
title_text2 = title_text.replace('.', ' ')
title_text2 = title_text2.replace(' ', '_')
gemini_blog_filename = \
gemini_blog_dir + '/' + published + '_' + title_text2.lower() + '.gmi'
if not title_text.startswith('# '):
title_text = '# ' + title_text
try:
with open(gemini_blog_filename, 'w+',
encoding='utf-8') as fp_gemini:
fp_gemini.write(title_text + '\n\n' + published + '\n\n' +
content_text)
except OSError:
print('EX: blog_to_gemini unable to write ' + gemini_blog_filename)
return False
return True

View File

@ -68,6 +68,7 @@ from speaker import update_speaker
from reading import store_book_events
from reading import has_edition_tag
from inbox_receive import inbox_update_index
from gemini import blog_to_gemini
def _localonly_not_local(message_json: {}, domain_full: str) -> bool:
@ -544,6 +545,11 @@ def post_message_to_outbox(session, translate: {},
print('WARN: post not saved to outbox ' + outbox_name)
return False
if outbox_name == 'tlblogs':
# export blog post in gemini format
blog_to_gemini(base_dir, post_to_nickname, domain,
message_json, system_language, debug, False)
# update the speaker endpoint for speech synthesis
actor_url = get_actor_from_post(message_json)
update_speaker(base_dir, http_prefix,

View File

@ -8,17 +8,17 @@ __status__ = "Production"
__module_group__ = "Testing"
import base64
import time
import os
import shutil
import json
import datetime
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.serialization import load_pem_private_key
from cryptography.hazmat.primitives.serialization import load_pem_public_key
from cryptography.hazmat.primitives.asymmetric import padding
from cryptography.hazmat.primitives.asymmetric import utils as hazutils
import time
import os
import shutil
import json
import datetime
from shutil import copyfile
from random import randint
from time import gmtime, strftime
@ -234,6 +234,7 @@ from webapp_utils import add_emoji_to_display_name
from blocking import is_blocked_nickname
from blocking import is_blocked_domain
from filters import filtered_match
from gemini import blog_to_gemini
TEST_SERVER_GROUP_RUNNING = False
@ -9490,6 +9491,37 @@ def _test_actor_status() -> None:
assert not actor_status_expired(actor['sm:status'])
def _test_gemini_blog(base_dir: str) -> None:
print('gemini_blog')
gemini_blog_dir = base_dir + '/geminitest'
published = '2022-02-25T20:15:00Z'
title = 'Test title'
content = 'This is a test'
link = 'https://some.link'
gemini_blog_filename = \
gemini_blog_dir + '/2022-02-25_' + \
title.replace(' ', '_').lower() + '.gmi'
system_language = 'en'
debug = True
message_json = {
'object': {
'published': published,
'summary': title,
'content': content + ' ' + link
}
}
result = blog_to_gemini(base_dir, 'someuser', 'somedomain',
message_json, system_language,
debug, True)
assert result
assert os.path.isdir(gemini_blog_dir)
assert os.path.isfile(gemini_blog_filename)
assert text_in_file('# ' + title + '\n', gemini_blog_filename)
assert text_in_file(content, gemini_blog_filename)
assert text_in_file('=> ' + link, gemini_blog_filename)
shutil.rmtree(gemini_blog_dir, ignore_errors=True)
def run_all_tests():
base_dir = os.getcwd()
data_dir_testing(base_dir)
@ -9508,6 +9540,7 @@ def run_all_tests():
_test_checkbox_names()
_test_thread_functions()
_test_functions()
_test_gemini_blog(base_dir)
_test_actor_status()
_test_filter_match()
_test_blocking_domain(base_dir)