diff --git a/blog.py b/blog.py index acf875153..168078764 100644 --- a/blog.py +++ b/blog.py @@ -35,6 +35,7 @@ from utils import load_json from utils import first_paragraph_from_string from utils import get_actor_property_url from utils import acct_dir +from utils import escape_text from posts import create_blogs_timeline from newswire import rss2header from newswire import rss2footer @@ -375,12 +376,13 @@ def _html_blog_post_rss2(domain: str, post_json_object: {}, post_json_object['object'].get('published'): published = post_json_object['object']['published'] pub_date = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ") - title_str = post_json_object['object']['summary'] + title_str = escape_text(post_json_object['object']['summary']) rss_date_str = pub_date.strftime("%a, %d %b %Y %H:%M:%S UT") content = \ get_base_content_from_post(post_json_object, system_language) description = first_paragraph_from_string(content) + description = escape_text(description) rss_str = ' ' rss_str += ' ' + title_str + '' rss_str += ' ' + message_link + '' @@ -542,7 +544,7 @@ def html_blog_page(authorized: bool, session, timeline_json = \ create_blogs_timeline(base_dir, nickname, domain, port, http_prefix, - no_of_items, False, page_number, '') + no_of_items, False, page_number) if not timeline_json: return blog_str + html_footer() @@ -631,7 +633,7 @@ def html_blog_page_rss2(base_dir: str, http_prefix: str, translate: {}, nickname, domain, port, http_prefix, no_of_items, False, - page_number, '') + page_number) if not timeline_json: if include_header: @@ -670,7 +672,7 @@ def html_blog_page_rss3(base_dir: str, http_prefix: str, timeline_json = \ create_blogs_timeline(base_dir, nickname, domain, port, http_prefix, - no_of_items, False, page_number, '') + no_of_items, False, page_number) if not timeline_json: return blog_rss3 diff --git a/feeds.py b/feeds.py index 6b91885c3..ea8589dec 100644 --- a/feeds.py +++ b/feeds.py @@ -8,6 +8,9 @@ __status__ = "Production" __module_group__ = "RSS Feeds" +from utils import escape_text + + def rss2tag_header(hashtag: str, http_prefix: str, domain_full: str) -> str: """Header for rss 2 """ @@ -15,9 +18,9 @@ def rss2tag_header(hashtag: str, http_prefix: str, domain_full: str) -> str: "" + \ "" + \ '' + \ - ' #' + hashtag + '' + \ + ' #' + escape_text(hashtag) + '' + \ ' ' + http_prefix + '://' + domain_full + \ - '/tags/rss2/' + hashtag + '' + '/tags/rss2/' + escape_text(hashtag) + '' def rss2tag_footer() -> str: diff --git a/newswire.py b/newswire.py index 56e667bc4..d80e7546b 100644 --- a/newswire.py +++ b/newswire.py @@ -39,6 +39,8 @@ from utils import remove_html from utils import is_account_dir from utils import acct_dir from utils import local_actor_url +from utils import escape_text +from utils import unescaped_text from blocking import is_blocked_domain from blocking import is_blocked_hashtag from filters import is_filtered @@ -76,8 +78,9 @@ def rss2header(http_prefix: str, ' ' + http_prefix + '://' + domain_full + \ '/blog/rss.xml' + '' else: + title_str = escape_text(translate[title]) rss_str += \ - ' ' + translate[title] + '' + \ + ' ' + title_str + '' + \ ' ' + \ local_actor_url(http_prefix, nickname, domain_full) + \ '/rss.xml' + '' @@ -407,12 +410,14 @@ def _xml2str_to_hashtag_categories(base_dir: str, xml_str: str, continue category_str = rss_item.split('')[1] category_str = category_str.split('')[0].strip() + category_str = unescaped_text(category_str) if not category_str: continue if 'CDATA' in category_str: continue hashtag_list_str = rss_item.split('')[1] hashtag_list_str = hashtag_list_str.split('')[0].strip() + hashtag_list_str = unescaped_text(hashtag_list_str) if not hashtag_list_str: continue if 'CDATA' in hashtag_list_str: @@ -766,17 +771,20 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str, title = rss_item.split('')[1] title = _remove_cdata(title.split('')[0]) + title = unescaped_text(title) title = remove_html(title) description = '' if '' in rss_item and '' in rss_item: description = rss_item.split('')[1] description = remove_html(description.split('')[0]) + description = unescaped_text(description) else: if '' in rss_item and \ '' in rss_item: description = rss_item.split('')[1] description = description.split('')[0] + description = unescaped_text(description) description = remove_html(description) proxy_type = None @@ -874,16 +882,19 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str, continue title = rss_item.split('')[1] title = _remove_cdata(title.split('')[0]) + title = unescaped_text(title) title = remove_html(title) description = '' if '' in rss_item and '' in rss_item: description = rss_item.split('')[1] description = remove_html(description.split('')[0]) + description = unescaped_text(description) else: if '' in rss_item and \ '' in rss_item: description = rss_item.split('')[1] description = description.split('')[0] + description = unescaped_text(description) description = remove_html(description) proxy_type = None @@ -969,16 +980,19 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str, continue title = atom_item.split('')[1] title = _remove_cdata(title.split('')[0]) + title = unescaped_text(title) title = remove_html(title) description = '' if '' in atom_item and '' in atom_item: description = atom_item.split('')[1] description = remove_html(description.split('')[0]) + description = unescaped_text(description) else: if '' in atom_item and \ '' in atom_item: description = atom_item.split('')[1] description = description.split('')[0] + description = unescaped_text(description) description = remove_html(description) proxy_type = None @@ -1184,15 +1198,18 @@ def _atom_feed_yt_to_dict(base_dir: str, domain: str, xml_str: str, continue title = atom_item.split('')[1] title = _remove_cdata(title.split('')[0]) + title = unescaped_text(title) description = '' if '' in atom_item and \ '' in atom_item: description = atom_item.split('')[1] description = description.split('')[0] + description = unescaped_text(description) description = remove_html(description) elif '' in atom_item and '' in atom_item: description = atom_item.split('')[1] description = description.split('')[0] + description = unescaped_text(description) description = remove_html(description) link, _ = get_link_from_rss_item(atom_item, None, None) @@ -1382,9 +1399,10 @@ def get_rs_sfrom_dict(base_dir: str, newswire: {}, continue rss_str += \ '\n' + \ - ' ' + fields[0] + '\n' + ' ' + escape_text(fields[0]) + '\n' description = remove_html(first_paragraph_from_string(fields[4])) - rss_str += ' ' + description + '\n' + rss_str += \ + ' ' + escape_text(description) + '\n' url = fields[1] if '://' not in url: if domain_full not in url: diff --git a/person.py b/person.py index 6badac74a..16abd7561 100644 --- a/person.py +++ b/person.py @@ -1071,7 +1071,7 @@ def person_box_json(recent_posts_cache: {}, if boxname == 'tlblogs': return create_blogs_timeline(base_dir, nickname, domain, port, http_prefix, no_of_items, header_only, - page_number, first_post_id) + page_number) if boxname == 'outbox': return create_outbox(base_dir, nickname, domain, port, http_prefix, diff --git a/posts.py b/posts.py index 3d11250b2..82997dc9d 100644 --- a/posts.py +++ b/posts.py @@ -3626,12 +3626,11 @@ def create_replies_timeline(recent_posts_cache: {}, def create_blogs_timeline(base_dir: str, nickname: str, domain: str, port: int, http_prefix: str, items_per_page: int, - header_only: bool, page_number: int, - first_post_id: str) -> {}: + header_only: bool, page_number: int) -> {}: return _create_box_indexed({}, base_dir, 'tlblogs', nickname, domain, port, http_prefix, items_per_page, header_only, True, - 0, False, 0, page_number, first_post_id) + 0, False, 0, page_number) def create_features_timeline(base_dir: str, diff --git a/utils.py b/utils.py index ce258e548..dad2c098e 100644 --- a/utils.py +++ b/utils.py @@ -36,7 +36,7 @@ VALID_HASHTAG_CHARS = \ # both incoming and outgoing. # Could include dubious clacks or admin dogwhistles INVALID_CHARACTERS = ( - '卐', '卍', '࿕', '࿖', '࿗', '࿘', 'ϟϟ', '🏳️‍🌈🚫', '⚡⚡' + '卐', '卍', '࿕', '࿖', '࿗', '࿘', 'ϟϟ', '🏳️‍🌈🚫', '⚡⚡', '​' ) INVALID_ACTOR_URL_CHARACTERS = ( @@ -4226,3 +4226,31 @@ def license_link_from_name(license: str) -> str: else: value = 'https://creativecommons.org/publicdomain/zero/1.0' return value + + +def _get_escaped_chars() -> {}: + """Returns escaped characters + """ + return { + "&": "&", + "<": "<", + ">": ">", + '"': """, + "'": "'" + } + + +def escape_text(txt: str) -> str: + """Escape text for inclusion in xml/rss + """ + for orig, replacement in _get_escaped_chars().items(): + txt = txt.replace(orig, replacement) + return txt + + +def unescaped_text(txt: str) -> str: + """Escape text for inclusion in xml/rss + """ + for orig, replacement in _get_escaped_chars().items(): + txt = txt.replace(replacement, orig) + return txt diff --git a/webapp_hashtagswarm.py b/webapp_hashtagswarm.py index 9cc0aeeeb..2e6e95f4a 100644 --- a/webapp_hashtagswarm.py +++ b/webapp_hashtagswarm.py @@ -11,6 +11,7 @@ import os from datetime import datetime from utils import get_nickname_from_actor from utils import get_config_param +from utils import escape_text from categories import get_hashtag_categories from categories import get_hashtag_category from webapp_utils import set_custom_background @@ -41,7 +42,7 @@ def get_hashtag_categories_feed(base_dir: str, for category_str, hashtag_list in hashtag_categories.items(): rss_str += \ '\n' + \ - ' ' + category_str + '\n' + ' ' + escape_text(category_str) + '\n' list_str = '' for hashtag in hashtag_list: if ':' in hashtag: @@ -50,7 +51,8 @@ def get_hashtag_categories_feed(base_dir: str, continue list_str += hashtag + ' ' rss_str += \ - ' ' + list_str.strip() + '\n' + \ + ' ' + \ + escape_text(list_str.strip()) + '\n' + \ ' \n' + \ ' ' + rss_date_str + '\n' + \ '\n' diff --git a/webapp_search.py b/webapp_search.py index 077cb99cb..41628549b 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -26,6 +26,7 @@ from utils import search_box_posts from utils import get_alt_path from utils import acct_dir from utils import local_actor_url +from utils import escape_text from skills import no_of_actor_skills from skills import get_skills_from_list from categories import get_hashtag_category @@ -1133,12 +1134,13 @@ def rss_hashtag_search(nickname: str, domain: str, port: int, if post_json_object['object'].get('summary'): hashtag_feed += \ ' ' + \ - post_json_object['object']['summary'] + \ + escape_text(post_json_object['object']['summary']) + \ '' description = \ get_base_content_from_post(post_json_object, system_language) description = first_paragraph_from_string(description) + description = escape_text(description) hashtag_feed += \ ' ' + description + '' hashtag_feed += \ diff --git a/webapp_timeline.py b/webapp_timeline.py index 8208c7ecf..e1ce92329 100644 --- a/webapp_timeline.py +++ b/webapp_timeline.py @@ -1072,8 +1072,9 @@ def html_timeline(default_timeline: str, if item_ctr > 0: # if showing the page down icon then remove the last item so that # firstpost does not overlap on the next timeline - if last_item_str: - tl_str = tl_str.replace(last_item_str, '') + if last_item_str and first_post_id != last_post_id: + if item_ctr > items_per_page / 2: + tl_str = tl_str.replace(last_item_str, '') tl_str += text_mode_separator first_post = '' if last_post_id: