epicyon/gemini.py

188 lines
6.2 KiB
Python

__filename__ = "gemini.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.6.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Timeline"
import os
import shutil
from utils import acct_dir
from utils import has_object_dict
from utils import remove_html
from utils import get_summary_from_post
from utils import get_base_content_from_post
from utils import get_post_attachments
from utils import get_url_from_post
def _get_gemini_blog_title(message_json: dict, system_language: str) -> str:
"""Returns the title for a gemini blog post
"""
title_text = ''
title_str = get_summary_from_post(message_json, system_language, [])
if title_str:
title_text = remove_html(title_str)
return title_text
def _get_gemini_blog_published(message_json: dict, debug: bool) -> str:
"""Returns the published date for a gemini blog post
"""
# get the publication date
obj = message_json
if has_object_dict(message_json):
obj = message_json['object']
if not obj.get('published'):
if debug:
print('WARN: blog_to_gemini Blog post has no publication date ' +
str(message_json))
return ''
if not isinstance(obj['published'], str):
if debug:
print('WARN: blog_to_gemini publication date is not a string ' +
str(message_json))
return ''
if 'T' not in obj['published']:
if debug:
print('WARN: blog_to_gemini ' +
'publication date not in expected format ' +
obj['published'])
return ''
return obj['published'].split('T')[0]
def _get_gemini_blog_filename(base_dir: str, nickname: str, domain: str,
message_json: dict, system_language: str,
debug: bool, testing: bool) -> str:
"""Returns the filename for a gemini blog post
"""
title_text = _get_gemini_blog_title(message_json, system_language)
published = _get_gemini_blog_published(message_json, debug)
if not published:
return ''
title_text2 = title_text.replace('.', ' ')
title_text2 = title_text2.replace(' ', '_')
if not testing:
account_dir = acct_dir(base_dir, nickname, domain)
gemini_blog_dir = account_dir + '/gemini'
else:
account_dir = base_dir
gemini_blog_dir = account_dir + '/geminitest'
gemini_blog_filename = \
gemini_blog_dir + '/' + published + '_' + title_text2.lower() + '.gmi'
return gemini_blog_filename
def blog_to_gemini(base_dir: str, nickname: str, domain: str,
message_json: dict, system_language: str,
debug: bool, testing: bool) -> bool:
"""
Converts a blog post to gemini format
Returns True on success
"""
if not testing:
account_dir = acct_dir(base_dir, nickname, domain)
else:
account_dir = base_dir
if os.path.isdir(account_dir + '/geminitest'):
shutil.rmtree(account_dir + '/geminitest', ignore_errors=True)
if not os.path.isdir(account_dir):
if debug:
print('WARN: blog_to_gemini account directory not found ' +
account_dir)
return False
published = _get_gemini_blog_published(message_json, debug)
if not published:
return False
# get the blog content
content_str = get_base_content_from_post(message_json, system_language)
if not content_str:
if debug:
print('WARN: blog_to_gemini no content ' +
str(message_json))
return False
content_text = remove_html(content_str)
# get the blog title
title_text = _get_gemini_blog_title(message_json, system_language)
# get web links
links: list[str] = []
if '://' in content_text:
sections = content_text.split('://')
ctr = 0
prev_section = ''
for section in sections:
if ctr > 0:
link_str = section
if '\n' in link_str:
link_str = link_str.split('\n')[0]
if ' ' in link_str:
link_str = link_str.split(' ')[0]
if link_str.endswith('.'):
link_length = len(link_str)
link_str = link_str[:link_length-1]
if '.' not in link_str:
continue
prefix = prev_section.rsplit(' ', 1)[-1]
if prefix in ('http', 'https', 'gemini'):
link_str = prefix + '://' + link_str
links.append(link_str)
prev_section = section
ctr += 1
# create gemini blog directory
if not testing:
gemini_blog_dir = account_dir + '/gemini'
else:
gemini_blog_dir = account_dir + '/geminitest'
if not os.path.isdir(gemini_blog_dir):
os.mkdir(gemini_blog_dir)
gemini_blog_filename = \
_get_gemini_blog_filename(base_dir, nickname, domain,
message_json, system_language,
debug, testing)
if not title_text.startswith('# '):
title_text = '# ' + title_text
# get attachments
post_attachments = get_post_attachments(message_json)
if post_attachments:
descriptions = ''
for attach in post_attachments:
if not isinstance(attach, dict):
continue
if not attach.get('name'):
continue
descriptions += attach['name'] + ' '
if attach.get('url'):
links.append(get_url_from_post(attach['url']) + ' ' +
attach['name'])
# add links to the end of the content
if links:
content_text += '\n\n'
for link_str in links:
content_text += '=> ' + link_str + '\n'
try:
with open(gemini_blog_filename, 'w+',
encoding='utf-8') as fp_gemini:
fp_gemini.write(title_text + '\n\n' + published + '\n\n' +
content_text)
except OSError:
print('EX: blog_to_gemini unable to write ' + gemini_blog_filename)
return False
return True