diff --git a/feeds.py b/feeds.py index 6b91885c3..ea8589dec 100644 --- a/feeds.py +++ b/feeds.py @@ -8,6 +8,9 @@ __status__ = "Production" __module_group__ = "RSS Feeds" +from utils import escape_text + + def rss2tag_header(hashtag: str, http_prefix: str, domain_full: str) -> str: """Header for rss 2 """ @@ -15,9 +18,9 @@ def rss2tag_header(hashtag: str, http_prefix: str, domain_full: str) -> str: "" + \ "" + \ '' + \ - ' #' + hashtag + '' + \ + ' #' + escape_text(hashtag) + '' + \ ' ' + http_prefix + '://' + domain_full + \ - '/tags/rss2/' + hashtag + '' + '/tags/rss2/' + escape_text(hashtag) + '' def rss2tag_footer() -> str: diff --git a/newswire.py b/newswire.py index 56e667bc4..d80e7546b 100644 --- a/newswire.py +++ b/newswire.py @@ -39,6 +39,8 @@ from utils import remove_html from utils import is_account_dir from utils import acct_dir from utils import local_actor_url +from utils import escape_text +from utils import unescaped_text from blocking import is_blocked_domain from blocking import is_blocked_hashtag from filters import is_filtered @@ -76,8 +78,9 @@ def rss2header(http_prefix: str, ' ' + http_prefix + '://' + domain_full + \ '/blog/rss.xml' + '' else: + title_str = escape_text(translate[title]) rss_str += \ - ' ' + translate[title] + '' + \ + ' ' + title_str + '' + \ ' ' + \ local_actor_url(http_prefix, nickname, domain_full) + \ '/rss.xml' + '' @@ -407,12 +410,14 @@ def _xml2str_to_hashtag_categories(base_dir: str, xml_str: str, continue category_str = rss_item.split('')[1] category_str = category_str.split('')[0].strip() + category_str = unescaped_text(category_str) if not category_str: continue if 'CDATA' in category_str: continue hashtag_list_str = rss_item.split('')[1] hashtag_list_str = hashtag_list_str.split('')[0].strip() + hashtag_list_str = unescaped_text(hashtag_list_str) if not hashtag_list_str: continue if 'CDATA' in hashtag_list_str: @@ -766,17 +771,20 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str, title = rss_item.split('')[1] title = _remove_cdata(title.split('')[0]) + title = unescaped_text(title) title = remove_html(title) description = '' if '' in rss_item and '' in rss_item: description = rss_item.split('')[1] description = remove_html(description.split('')[0]) + description = unescaped_text(description) else: if '' in rss_item and \ '' in rss_item: description = rss_item.split('')[1] description = description.split('')[0] + description = unescaped_text(description) description = remove_html(description) proxy_type = None @@ -874,16 +882,19 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str, continue title = rss_item.split('')[1] title = _remove_cdata(title.split('')[0]) + title = unescaped_text(title) title = remove_html(title) description = '' if '' in rss_item and '' in rss_item: description = rss_item.split('')[1] description = remove_html(description.split('')[0]) + description = unescaped_text(description) else: if '' in rss_item and \ '' in rss_item: description = rss_item.split('')[1] description = description.split('')[0] + description = unescaped_text(description) description = remove_html(description) proxy_type = None @@ -969,16 +980,19 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str, continue title = atom_item.split('')[1] title = _remove_cdata(title.split('')[0]) + title = unescaped_text(title) title = remove_html(title) description = '' if '' in atom_item and '' in atom_item: description = atom_item.split('')[1] description = remove_html(description.split('')[0]) + description = unescaped_text(description) else: if '' in atom_item and \ '' in atom_item: description = atom_item.split('')[1] description = description.split('')[0] + description = unescaped_text(description) description = remove_html(description) proxy_type = None @@ -1184,15 +1198,18 @@ def _atom_feed_yt_to_dict(base_dir: str, domain: str, xml_str: str, continue title = atom_item.split('')[1] title = _remove_cdata(title.split('')[0]) + title = unescaped_text(title) description = '' if '' in atom_item and \ '' in atom_item: description = atom_item.split('')[1] description = description.split('')[0] + description = unescaped_text(description) description = remove_html(description) elif '' in atom_item and '' in atom_item: description = atom_item.split('')[1] description = description.split('')[0] + description = unescaped_text(description) description = remove_html(description) link, _ = get_link_from_rss_item(atom_item, None, None) @@ -1382,9 +1399,10 @@ def get_rs_sfrom_dict(base_dir: str, newswire: {}, continue rss_str += \ '\n' + \ - ' ' + fields[0] + '\n' + ' ' + escape_text(fields[0]) + '\n' description = remove_html(first_paragraph_from_string(fields[4])) - rss_str += ' ' + description + '\n' + rss_str += \ + ' ' + escape_text(description) + '\n' url = fields[1] if '://' not in url: if domain_full not in url: diff --git a/utils.py b/utils.py index d5abc8b3f..da8702791 100644 --- a/utils.py +++ b/utils.py @@ -4241,3 +4241,18 @@ def escape_text(txt: str) -> str: for orig, replacement in replacements.items(): txt = txt.replace(orig, replacement) return txt + + +def unescaped_text(txt: str) -> str: + """Escape text for inclusion in xml/rss + """ + replacements = { + "&": "&", + "<": "<", + ">": ">", + '"': """, + "'": "'" + } + for orig, replacement in replacements.items(): + txt = txt.replace(replacement, orig) + return txt diff --git a/webapp_hashtagswarm.py b/webapp_hashtagswarm.py index 9cc0aeeeb..2e6e95f4a 100644 --- a/webapp_hashtagswarm.py +++ b/webapp_hashtagswarm.py @@ -11,6 +11,7 @@ import os from datetime import datetime from utils import get_nickname_from_actor from utils import get_config_param +from utils import escape_text from categories import get_hashtag_categories from categories import get_hashtag_category from webapp_utils import set_custom_background @@ -41,7 +42,7 @@ def get_hashtag_categories_feed(base_dir: str, for category_str, hashtag_list in hashtag_categories.items(): rss_str += \ '\n' + \ - ' ' + category_str + '\n' + ' ' + escape_text(category_str) + '\n' list_str = '' for hashtag in hashtag_list: if ':' in hashtag: @@ -50,7 +51,8 @@ def get_hashtag_categories_feed(base_dir: str, continue list_str += hashtag + ' ' rss_str += \ - ' ' + list_str.strip() + '\n' + \ + ' ' + \ + escape_text(list_str.strip()) + '\n' + \ ' \n' + \ ' ' + rss_date_str + '\n' + \ '\n' diff --git a/webapp_search.py b/webapp_search.py index 077cb99cb..41628549b 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -26,6 +26,7 @@ from utils import search_box_posts from utils import get_alt_path from utils import acct_dir from utils import local_actor_url +from utils import escape_text from skills import no_of_actor_skills from skills import get_skills_from_list from categories import get_hashtag_category @@ -1133,12 +1134,13 @@ def rss_hashtag_search(nickname: str, domain: str, port: int, if post_json_object['object'].get('summary'): hashtag_feed += \ ' ' + \ - post_json_object['object']['summary'] + \ + escape_text(post_json_object['object']['summary']) + \ '' description = \ get_base_content_from_post(post_json_object, system_language) description = first_paragraph_from_string(description) + description = escape_text(description) hashtag_feed += \ ' ' + description + '' hashtag_feed += \