From 7d125c25f1c8568f2a80cdfed22c0b6a23d5cb42 Mon Sep 17 00:00:00 2001
From: Bob Mottram <bob@libreserver.org>
Date: Sun, 1 Jan 2023 22:28:13 +0000
Subject: [PATCH 1/9] Filter out posts containing zero width spaces

---
 utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/utils.py b/utils.py
index ce258e548..4c9ad21e6 100644
--- a/utils.py
+++ b/utils.py
@@ -36,7 +36,7 @@ VALID_HASHTAG_CHARS = \
 # both incoming and outgoing.
 # Could include dubious clacks or admin dogwhistles
 INVALID_CHARACTERS = (
-    '卐', '卍', '࿕', '࿖', '࿗', '࿘', 'ϟϟ', '🏳️‍🌈🚫', '⚡⚡'
+    '卐', '卍', '࿕', '࿖', '࿗', '࿘', 'ϟϟ', '🏳️‍🌈🚫', '⚡⚡', '​'
 )
 
 INVALID_ACTOR_URL_CHARACTERS = (

From ab0ca588c98eebda31e6ea1456320cac60acf95a Mon Sep 17 00:00:00 2001
From: Bob Mottram <bob@libreserver.org>
Date: Mon, 2 Jan 2023 09:55:41 +0000
Subject: [PATCH 2/9] Escape text within blog rss feed

---
 blog.py  |  4 +++-
 utils.py | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/blog.py b/blog.py
index acf875153..84379d48a 100644
--- a/blog.py
+++ b/blog.py
@@ -35,6 +35,7 @@ from utils import load_json
 from utils import first_paragraph_from_string
 from utils import get_actor_property_url
 from utils import acct_dir
+from utils import escape_text
 from posts import create_blogs_timeline
 from newswire import rss2header
 from newswire import rss2footer
@@ -375,12 +376,13 @@ def _html_blog_post_rss2(domain: str, post_json_object: {},
                post_json_object['object'].get('published'):
                 published = post_json_object['object']['published']
                 pub_date = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
-                title_str = post_json_object['object']['summary']
+                title_str = escape_text(post_json_object['object']['summary'])
                 rss_date_str = pub_date.strftime("%a, %d %b %Y %H:%M:%S UT")
                 content = \
                     get_base_content_from_post(post_json_object,
                                                system_language)
                 description = first_paragraph_from_string(content)
+                description = escape_text(description)
                 rss_str = '     <item>'
                 rss_str += '         <title>' + title_str + '</title>'
                 rss_str += '         <link>' + message_link + '</link>'
diff --git a/utils.py b/utils.py
index 4c9ad21e6..d5abc8b3f 100644
--- a/utils.py
+++ b/utils.py
@@ -4226,3 +4226,18 @@ def license_link_from_name(license: str) -> str:
     else:
         value = 'https://creativecommons.org/publicdomain/zero/1.0'
     return value
+
+
+def escape_text(txt: str) -> str:
+    """Escape text for inclusion in xml/rss
+    """
+    replacements = {
+        "&": "&amp;",
+        "<": "&lt;",
+        ">": "&gt;",
+        '"': "&quot;",
+        "'": "&apos;"
+    }
+    for orig, replacement in replacements.items():
+        txt = txt.replace(orig, replacement)
+    return txt

From e371a4d65e46f0898905fb521cdf39c20d85fe0a Mon Sep 17 00:00:00 2001
From: Bob Mottram <bob@libreserver.org>
Date: Mon, 2 Jan 2023 10:24:35 +0000
Subject: [PATCH 3/9] Escape rss titles and descriptions

---
 feeds.py               |  7 +++++--
 newswire.py            | 24 +++++++++++++++++++++---
 utils.py               | 15 +++++++++++++++
 webapp_hashtagswarm.py |  6 ++++--
 webapp_search.py       |  4 +++-
 5 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/feeds.py b/feeds.py
index 6b91885c3..ea8589dec 100644
--- a/feeds.py
+++ b/feeds.py
@@ -8,6 +8,9 @@ __status__ = "Production"
 __module_group__ = "RSS Feeds"
 
 
+from utils import escape_text
+
+
 def rss2tag_header(hashtag: str, http_prefix: str, domain_full: str) -> str:
     """Header for rss 2
     """
@@ -15,9 +18,9 @@ def rss2tag_header(hashtag: str, http_prefix: str, domain_full: str) -> str:
         "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" + \
         "<rss version=\"2.0\">" + \
         '<channel>' + \
-        '    <title>#' + hashtag + '</title>' + \
+        '    <title>#' + escape_text(hashtag) + '</title>' + \
         '    <link>' + http_prefix + '://' + domain_full + \
-        '/tags/rss2/' + hashtag + '</link>'
+        '/tags/rss2/' + escape_text(hashtag) + '</link>'
 
 
 def rss2tag_footer() -> str:
diff --git a/newswire.py b/newswire.py
index 56e667bc4..d80e7546b 100644
--- a/newswire.py
+++ b/newswire.py
@@ -39,6 +39,8 @@ from utils import remove_html
 from utils import is_account_dir
 from utils import acct_dir
 from utils import local_actor_url
+from utils import escape_text
+from utils import unescaped_text
 from blocking import is_blocked_domain
 from blocking import is_blocked_hashtag
 from filters import is_filtered
@@ -76,8 +78,9 @@ def rss2header(http_prefix: str,
             '    <link>' + http_prefix + '://' + domain_full + \
             '/blog/rss.xml' + '</link>'
     else:
+        title_str = escape_text(translate[title])
         rss_str += \
-            '    <title>' + translate[title] + '</title>' + \
+            '    <title>' + title_str + '</title>' + \
             '    <link>' + \
             local_actor_url(http_prefix, nickname, domain_full) + \
             '/rss.xml' + '</link>'
@@ -407,12 +410,14 @@ def _xml2str_to_hashtag_categories(base_dir: str, xml_str: str,
             continue
         category_str = rss_item.split('<title>')[1]
         category_str = category_str.split('</title>')[0].strip()
+        category_str = unescaped_text(category_str)
         if not category_str:
             continue
         if 'CDATA' in category_str:
             continue
         hashtag_list_str = rss_item.split('<description>')[1]
         hashtag_list_str = hashtag_list_str.split('</description>')[0].strip()
+        hashtag_list_str = unescaped_text(hashtag_list_str)
         if not hashtag_list_str:
             continue
         if 'CDATA' in hashtag_list_str:
@@ -766,17 +771,20 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
 
         title = rss_item.split('<title>')[1]
         title = _remove_cdata(title.split('</title>')[0])
+        title = unescaped_text(title)
         title = remove_html(title)
 
         description = ''
         if '<description>' in rss_item and '</description>' in rss_item:
             description = rss_item.split('<description>')[1]
             description = remove_html(description.split('</description>')[0])
+            description = unescaped_text(description)
         else:
             if '<media:description>' in rss_item and \
                '</media:description>' in rss_item:
                 description = rss_item.split('<media:description>')[1]
                 description = description.split('</media:description>')[0]
+                description = unescaped_text(description)
                 description = remove_html(description)
 
         proxy_type = None
@@ -874,16 +882,19 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
             continue
         title = rss_item.split('<title>')[1]
         title = _remove_cdata(title.split('</title>')[0])
+        title = unescaped_text(title)
         title = remove_html(title)
         description = ''
         if '<description>' in rss_item and '</description>' in rss_item:
             description = rss_item.split('<description>')[1]
             description = remove_html(description.split('</description>')[0])
+            description = unescaped_text(description)
         else:
             if '<media:description>' in rss_item and \
                '</media:description>' in rss_item:
                 description = rss_item.split('<media:description>')[1]
                 description = description.split('</media:description>')[0]
+                description = unescaped_text(description)
                 description = remove_html(description)
 
         proxy_type = None
@@ -969,16 +980,19 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
             continue
         title = atom_item.split('<title>')[1]
         title = _remove_cdata(title.split('</title>')[0])
+        title = unescaped_text(title)
         title = remove_html(title)
         description = ''
         if '<summary>' in atom_item and '</summary>' in atom_item:
             description = atom_item.split('<summary>')[1]
             description = remove_html(description.split('</summary>')[0])
+            description = unescaped_text(description)
         else:
             if '<media:description>' in atom_item and \
                '</media:description>' in atom_item:
                 description = atom_item.split('<media:description>')[1]
                 description = description.split('</media:description>')[0]
+                description = unescaped_text(description)
                 description = remove_html(description)
 
         proxy_type = None
@@ -1184,15 +1198,18 @@ def _atom_feed_yt_to_dict(base_dir: str, domain: str, xml_str: str,
             continue
         title = atom_item.split('<title>')[1]
         title = _remove_cdata(title.split('</title>')[0])
+        title = unescaped_text(title)
         description = ''
         if '<media:description>' in atom_item and \
            '</media:description>' in atom_item:
             description = atom_item.split('<media:description>')[1]
             description = description.split('</media:description>')[0]
+            description = unescaped_text(description)
             description = remove_html(description)
         elif '<summary>' in atom_item and '</summary>' in atom_item:
             description = atom_item.split('<summary>')[1]
             description = description.split('</summary>')[0]
+            description = unescaped_text(description)
             description = remove_html(description)
 
         link, _ = get_link_from_rss_item(atom_item, None, None)
@@ -1382,9 +1399,10 @@ def get_rs_sfrom_dict(base_dir: str, newswire: {},
             continue
         rss_str += \
             '<item>\n' + \
-            '  <title>' + fields[0] + '</title>\n'
+            '  <title>' + escape_text(fields[0]) + '</title>\n'
         description = remove_html(first_paragraph_from_string(fields[4]))
-        rss_str += '  <description>' + description + '</description>\n'
+        rss_str += \
+            '  <description>' + escape_text(description) + '</description>\n'
         url = fields[1]
         if '://' not in url:
             if domain_full not in url:
diff --git a/utils.py b/utils.py
index d5abc8b3f..da8702791 100644
--- a/utils.py
+++ b/utils.py
@@ -4241,3 +4241,18 @@ def escape_text(txt: str) -> str:
     for orig, replacement in replacements.items():
         txt = txt.replace(orig, replacement)
     return txt
+
+
+def unescaped_text(txt: str) -> str:
+    """Escape text for inclusion in xml/rss
+    """
+    replacements = {
+        "&": "&amp;",
+        "<": "&lt;",
+        ">": "&gt;",
+        '"': "&quot;",
+        "'": "&apos;"
+    }
+    for orig, replacement in replacements.items():
+        txt = txt.replace(replacement, orig)
+    return txt
diff --git a/webapp_hashtagswarm.py b/webapp_hashtagswarm.py
index 9cc0aeeeb..2e6e95f4a 100644
--- a/webapp_hashtagswarm.py
+++ b/webapp_hashtagswarm.py
@@ -11,6 +11,7 @@ import os
 from datetime import datetime
 from utils import get_nickname_from_actor
 from utils import get_config_param
+from utils import escape_text
 from categories import get_hashtag_categories
 from categories import get_hashtag_category
 from webapp_utils import set_custom_background
@@ -41,7 +42,7 @@ def get_hashtag_categories_feed(base_dir: str,
     for category_str, hashtag_list in hashtag_categories.items():
         rss_str += \
             '<item>\n' + \
-            '  <title>' + category_str + '</title>\n'
+            '  <title>' + escape_text(category_str) + '</title>\n'
         list_str = ''
         for hashtag in hashtag_list:
             if ':' in hashtag:
@@ -50,7 +51,8 @@ def get_hashtag_categories_feed(base_dir: str,
                 continue
             list_str += hashtag + ' '
         rss_str += \
-            '  <description>' + list_str.strip() + '</description>\n' + \
+            '  <description>' + \
+            escape_text(list_str.strip()) + '</description>\n' + \
             '  <link/>\n' + \
             '  <pubDate>' + rss_date_str + '</pubDate>\n' + \
             '</item>\n'
diff --git a/webapp_search.py b/webapp_search.py
index 077cb99cb..41628549b 100644
--- a/webapp_search.py
+++ b/webapp_search.py
@@ -26,6 +26,7 @@ from utils import search_box_posts
 from utils import get_alt_path
 from utils import acct_dir
 from utils import local_actor_url
+from utils import escape_text
 from skills import no_of_actor_skills
 from skills import get_skills_from_list
 from categories import get_hashtag_category
@@ -1133,12 +1134,13 @@ def rss_hashtag_search(nickname: str, domain: str, port: int,
                 if post_json_object['object'].get('summary'):
                     hashtag_feed += \
                         '         <title>' + \
-                        post_json_object['object']['summary'] + \
+                        escape_text(post_json_object['object']['summary']) + \
                         '</title>'
                 description = \
                     get_base_content_from_post(post_json_object,
                                                system_language)
                 description = first_paragraph_from_string(description)
+                description = escape_text(description)
                 hashtag_feed += \
                     '         <description>' + description + '</description>'
                 hashtag_feed += \

From bca431c95d64d9f6a966c8e0d087bae092a60c05 Mon Sep 17 00:00:00 2001
From: Bob Mottram <bob@libreserver.org>
Date: Mon, 2 Jan 2023 10:37:57 +0000
Subject: [PATCH 4/9] Don't apply first post to blogs

---
 person.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/person.py b/person.py
index 6badac74a..16abd7561 100644
--- a/person.py
+++ b/person.py
@@ -1071,7 +1071,7 @@ def person_box_json(recent_posts_cache: {},
     if boxname == 'tlblogs':
         return create_blogs_timeline(base_dir, nickname, domain, port,
                                      http_prefix, no_of_items, header_only,
-                                     page_number, first_post_id)
+                                     page_number)
     if boxname == 'outbox':
         return create_outbox(base_dir, nickname, domain, port,
                              http_prefix,

From e930eb3aaf1ee62c10fbd1db2d6ed17bda6b057b Mon Sep 17 00:00:00 2001
From: Bob Mottram <bob@libreserver.org>
Date: Mon, 2 Jan 2023 10:41:42 +0000
Subject: [PATCH 5/9] Don't apply first post to blogs

---
 person.py | 2 +-
 posts.py  | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/person.py b/person.py
index 16abd7561..8372d573a 100644
--- a/person.py
+++ b/person.py
@@ -1071,7 +1071,7 @@ def person_box_json(recent_posts_cache: {},
     if boxname == 'tlblogs':
         return create_blogs_timeline(base_dir, nickname, domain, port,
                                      http_prefix, no_of_items, header_only,
-                                     page_number)
+                                     page_number, '')
     if boxname == 'outbox':
         return create_outbox(base_dir, nickname, domain, port,
                              http_prefix,
diff --git a/posts.py b/posts.py
index 3d11250b2..82997dc9d 100644
--- a/posts.py
+++ b/posts.py
@@ -3626,12 +3626,11 @@ def create_replies_timeline(recent_posts_cache: {},
 
 def create_blogs_timeline(base_dir: str, nickname: str, domain: str,
                           port: int, http_prefix: str, items_per_page: int,
-                          header_only: bool, page_number: int,
-                          first_post_id: str) -> {}:
+                          header_only: bool, page_number: int) -> {}:
     return _create_box_indexed({}, base_dir, 'tlblogs', nickname,
                                domain, port, http_prefix,
                                items_per_page, header_only, True,
-                               0, False, 0, page_number, first_post_id)
+                               0, False, 0, page_number)
 
 
 def create_features_timeline(base_dir: str,

From 71c291abd18d6c0a5b3f390628df60158be2e311 Mon Sep 17 00:00:00 2001
From: Bob Mottram <bob@libreserver.org>
Date: Mon, 2 Jan 2023 10:44:49 +0000
Subject: [PATCH 6/9] Remove unused argument

---
 blog.py   | 6 +++---
 person.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/blog.py b/blog.py
index 84379d48a..168078764 100644
--- a/blog.py
+++ b/blog.py
@@ -544,7 +544,7 @@ def html_blog_page(authorized: bool, session,
     timeline_json = \
         create_blogs_timeline(base_dir,
                               nickname, domain, port, http_prefix,
-                              no_of_items, False, page_number, '')
+                              no_of_items, False, page_number)
 
     if not timeline_json:
         return blog_str + html_footer()
@@ -633,7 +633,7 @@ def html_blog_page_rss2(base_dir: str, http_prefix: str, translate: {},
                                           nickname, domain, port,
                                           http_prefix,
                                           no_of_items, False,
-                                          page_number, '')
+                                          page_number)
 
     if not timeline_json:
         if include_header:
@@ -672,7 +672,7 @@ def html_blog_page_rss3(base_dir: str, http_prefix: str,
     timeline_json = \
         create_blogs_timeline(base_dir,
                               nickname, domain, port, http_prefix,
-                              no_of_items, False, page_number, '')
+                              no_of_items, False, page_number)
 
     if not timeline_json:
         return blog_rss3
diff --git a/person.py b/person.py
index 8372d573a..16abd7561 100644
--- a/person.py
+++ b/person.py
@@ -1071,7 +1071,7 @@ def person_box_json(recent_posts_cache: {},
     if boxname == 'tlblogs':
         return create_blogs_timeline(base_dir, nickname, domain, port,
                                      http_prefix, no_of_items, header_only,
-                                     page_number, '')
+                                     page_number)
     if boxname == 'outbox':
         return create_outbox(base_dir, nickname, domain, port,
                              http_prefix,

From 58971bb3b7d4886c090700e35dcd30662cd17fc6 Mon Sep 17 00:00:00 2001
From: Bob Mottram <bob@libreserver.org>
Date: Mon, 2 Jan 2023 11:23:05 +0000
Subject: [PATCH 7/9] Check for single posts on timelines

---
 webapp_timeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/webapp_timeline.py b/webapp_timeline.py
index 8208c7ecf..f4a929cee 100644
--- a/webapp_timeline.py
+++ b/webapp_timeline.py
@@ -1072,7 +1072,7 @@ def html_timeline(default_timeline: str,
     if item_ctr > 0:
         # if showing the page down icon then remove the last item so that
         # firstpost does not overlap on the next timeline
-        if last_item_str:
+        if last_item_str and first_post_id != last_post_id:
             tl_str = tl_str.replace(last_item_str, '')
         tl_str += text_mode_separator
         first_post = ''

From 5257ba4f55b33c277f4a04b3f9a102e9f74b8e41 Mon Sep 17 00:00:00 2001
From: Bob Mottram <bob@libreserver.org>
Date: Mon, 2 Jan 2023 11:27:52 +0000
Subject: [PATCH 8/9] Only remove last post if there are some quantity

---
 webapp_timeline.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/webapp_timeline.py b/webapp_timeline.py
index f4a929cee..e1ce92329 100644
--- a/webapp_timeline.py
+++ b/webapp_timeline.py
@@ -1073,7 +1073,8 @@ def html_timeline(default_timeline: str,
         # if showing the page down icon then remove the last item so that
         # firstpost does not overlap on the next timeline
         if last_item_str and first_post_id != last_post_id:
-            tl_str = tl_str.replace(last_item_str, '')
+            if item_ctr > items_per_page / 2:
+                tl_str = tl_str.replace(last_item_str, '')
         tl_str += text_mode_separator
         first_post = ''
         if last_post_id:

From 586c482bea38a75fe71bd616f7023fbd16f477b7 Mon Sep 17 00:00:00 2001
From: Bob Mottram <bob@libreserver.org>
Date: Mon, 2 Jan 2023 11:41:48 +0000
Subject: [PATCH 9/9] Tidying

---
 utils.py | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/utils.py b/utils.py
index da8702791..dad2c098e 100644
--- a/utils.py
+++ b/utils.py
@@ -4228,17 +4228,22 @@ def license_link_from_name(license: str) -> str:
     return value
 
 
-def escape_text(txt: str) -> str:
-    """Escape text for inclusion in xml/rss
+def _get_escaped_chars() -> {}:
+    """Returns escaped characters
     """
-    replacements = {
+    return {
         "&": "&amp;",
         "<": "&lt;",
         ">": "&gt;",
         '"': "&quot;",
         "'": "&apos;"
     }
-    for orig, replacement in replacements.items():
+
+
+def escape_text(txt: str) -> str:
+    """Escape text for inclusion in xml/rss
+    """
+    for orig, replacement in _get_escaped_chars().items():
         txt = txt.replace(orig, replacement)
     return txt
 
@@ -4246,13 +4251,6 @@ def escape_text(txt: str) -> str:
 def unescaped_text(txt: str) -> str:
     """Escape text for inclusion in xml/rss
     """
-    replacements = {
-        "&": "&amp;",
-        "<": "&lt;",
-        ">": "&gt;",
-        '"': "&quot;",
-        "'": "&apos;"
-    }
-    for orig, replacement in replacements.items():
+    for orig, replacement in _get_escaped_chars().items():
         txt = txt.replace(replacement, orig)
     return txt