diff --git a/gemini.py b/gemini.py index def2ef6d5..6817bb7e0 100644 --- a/gemini.py +++ b/gemini.py @@ -18,6 +18,66 @@ from utils import get_post_attachments from utils import get_url_from_post +def _get_gemini_blog_title(message_json: dict, system_language: str) -> str: + """Returns the title for a gemini blog post + """ + title_text = '' + title_str = get_summary_from_post(message_json, system_language, []) + if title_str: + title_text = remove_html(title_str) + return title_text + + +def _get_gemini_blog_published(message_json: dict, debug: bool) -> str: + """Returns the published date for a gemini blog post + """ + # get the publication date + obj = message_json + if has_object_dict(message_json): + obj = message_json['object'] + if not obj.get('published'): + if debug: + print('WARN: blog_to_gemini Blog post has no publication date ' + + str(message_json)) + return '' + if not isinstance(obj['published'], str): + if debug: + print('WARN: blog_to_gemini publication date is not a string ' + + str(message_json)) + return '' + if 'T' not in obj['published']: + if debug: + print('WARN: blog_to_gemini ' + + 'publication date not in expected format ' + + obj['published']) + return '' + return obj['published'].split('T')[0] + + +def _get_gemini_blog_filename(base_dir: str, nickname: str, domain: str, + message_json: dict, system_language: str, + debug: bool, testing: bool) -> str: + """Returns the filename for a gemini blog post + """ + title_text = _get_gemini_blog_title(message_json, system_language) + published = _get_gemini_blog_published(message_json, debug) + if not published: + return '' + title_text2 = title_text.replace('.', ' ') + title_text2 = title_text2.replace(' ', '_') + + if not testing: + account_dir = acct_dir(base_dir, nickname, domain) + gemini_blog_dir = account_dir + '/gemini' + else: + account_dir = base_dir + gemini_blog_dir = account_dir + '/geminitest' + + gemini_blog_filename = \ + gemini_blog_dir + '/' + published + '_' + title_text2.lower() + '.gmi' + return gemini_blog_filename + + def blog_to_gemini(base_dir: str, nickname: str, domain: str, message_json: dict, system_language: str, debug: bool, testing: bool) -> bool: @@ -38,27 +98,9 @@ def blog_to_gemini(base_dir: str, nickname: str, domain: str, account_dir) return False - # get the publication date - obj = message_json - if has_object_dict(message_json): - obj = message_json['object'] - if not obj.get('published'): - if debug: - print('WARN: blog_to_gemini Blog post has no publication date ' + - str(message_json)) + published = _get_gemini_blog_published(message_json, debug) + if not published: return False - if not isinstance(obj['published'], str): - if debug: - print('WARN: blog_to_gemini publication date is not a string ' + - str(message_json)) - return False - if 'T' not in obj['published']: - if debug: - print('WARN: blog_to_gemini ' + - 'publication date not in expected format ' + - obj['published']) - return False - published = obj['published'].split('T')[0] # get the blog content content_str = get_base_content_from_post(message_json, system_language) @@ -70,10 +112,7 @@ def blog_to_gemini(base_dir: str, nickname: str, domain: str, content_text = remove_html(content_str) # get the blog title - title_text = '' - title_str = get_summary_from_post(message_json, system_language, []) - if title_str: - title_text = remove_html(title_str) + title_text = _get_gemini_blog_title(message_json, system_language) # get web links links: list[str] = [] @@ -108,10 +147,10 @@ def blog_to_gemini(base_dir: str, nickname: str, domain: str, if not os.path.isdir(gemini_blog_dir): os.mkdir(gemini_blog_dir) - title_text2 = title_text.replace('.', ' ') - title_text2 = title_text2.replace(' ', '_') gemini_blog_filename = \ - gemini_blog_dir + '/' + published + '_' + title_text2.lower() + '.gmi' + _get_gemini_blog_filename(base_dir, nickname, domain, + message_json, system_language, + debug, testing) if not title_text.startswith('# '): title_text = '# ' + title_text