Merge branch 'main' of gitlab.com:bashrc2/epicyon

2022-01-13 17:08:38 +00:00 · 2022-01-13 17:08:38 +00:00 · 3c1866c40b
parent 1a3586b58d 3aab275360
commit 3c1866c40b
8 changed files with 139 additions and 45 deletions
--- a/content.py
+++ b/content.py
@ -11,9 +11,9 @@ import os
 import email.parser
 import urllib.parse
 from shutil import copyfile
+from utils import valid_hash_tag
 from utils import dangerous_svg
 from utils import remove_domain_port
-from utils import is_valid_language
 from utils import get_image_extensions
 from utils import load_json
 from utils import save_json
@ -33,17 +33,6 @@ MUSIC_SITES = ('soundcloud.com', 'bandcamp.com')

 MAX_LINK_LENGTH = 40

-VALID_HASHTAG_CHARS = \
-    set('0123456789' +
-        'abcdefghijklmnopqrstuvwxyz' +
-        'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +
-        '¡¿ÄäÀàÁáÂâÃãÅåǍǎĄąĂăÆæĀā' +
-        'ÇçĆćĈĉČčĎđĐďðÈèÉéÊêËëĚěĘęĖėĒē' +
-        'ĜĝĢģĞğĤĥÌìÍíÎîÏïıĪīĮįĴĵĶķ' +
-        'ĹĺĻļŁłĽľĿŀÑñŃńŇňŅņÖöÒòÓóÔôÕõŐőØøŒœ' +
-        'ŔŕŘřẞßŚśŜŝŞşŠšȘșŤťŢţÞþȚțÜüÙùÚúÛûŰűŨũŲųŮůŪū' +
-        'ŴŵÝýŸÿŶŷŹźŽžŻż')
-
 REMOVE_MARKUP = (
    'b', 'i', 'ul', 'ol', 'li', 'em', 'strong',
    'blockquote', 'h1', 'h2', 'h3', 'h4', 'h5'
@ -497,19 +486,6 @@ def add_web_links(content: str) -> str:
    return content


-def valid_hash_tag(hashtag: str) -> bool:
-    """Returns true if the give hashtag contains valid characters
-    """
-    # long hashtags are not valid
-    if len(hashtag) >= 32:
-        return False
-    if set(hashtag).issubset(VALID_HASHTAG_CHARS):
-        return True
-    if is_valid_language(hashtag):
-        return True
-    return False
-
-
 def _add_hash_tags(word_str: str, http_prefix: str, domain: str,
                   replace_hashtags: {}, post_hashtags: {}) -> bool:
    """Detects hashtags and adds them to the replacements dict
--- a/epicyon-podcast.css
+++ b/epicyon-podcast.css
@ -71,6 +71,10 @@ body, html {
    image-rendering: var(--rendering);
 }

+audio {
+    width: 90%;
+}
+
 a, u {
    color: var(--options-fg-color);
 }
--- a/inbox.py
+++ b/inbox.py
@ -61,6 +61,7 @@ from utils import undo_reaction_collection_entry
 from utils import has_group_type
 from utils import local_actor_url
 from utils import has_object_stringType
+from utils import valid_hash_tag
 from categories import get_hashtag_categories
 from categories import set_hashtag_category
 from httpsig import get_digest_algorithm_from_headers
@ -119,7 +120,6 @@ from announce import is_self_announce
 from announce import create_announce
 from notifyOnPost import notify_when_person_posts
 from conversation import update_conversation
-from content import valid_hash_tag
 from webapp_hashtagswarm import html_hash_tag_swarm
 from person import valid_sending_actor

--- a/newsdaemon.py
+++ b/newsdaemon.py
@ -24,7 +24,7 @@ from newswire import get_dict_from_newswire
 # from posts import send_signed_json
 from posts import create_news_post
 from posts import archive_posts_for_person
-from content import valid_hash_tag
+from utils import valid_hash_tag
 from utils import get_base_content_from_post
 from utils import remove_html
 from utils import get_full_domain
--- a/newswire.py
+++ b/newswire.py
@ -18,6 +18,7 @@ from datetime import timezone
 from collections import OrderedDict
 from utils import valid_post_date
 from categories import set_hashtag_category
+from utils import valid_hash_tag
 from utils import dangerous_svg
 from utils import get_fav_filename_from_url
 from utils import get_base_content_from_post
@ -225,6 +226,10 @@ def _add_newswire_dict_entry(base_dir: str, domain: str,
    # extract hashtags from the text of the feed post
    post_tags = get_newswire_tags(all_text, max_tags)

+    # Include tags from podcast categories
+    if podcast_properties:
+        post_tags += podcast_properties['categories']
+
    # combine the tags into a single list
    for tag in tags:
        if tag in post_tags:
@ -384,13 +389,59 @@ def _xml2str_to_hashtag_categories(base_dir: str, xml_str: str,
                                     False, force)


-def xml_podcast_to_dict(xml_str: str) -> {}:
+def _get_podcast_categories(xml_item: str, xml_str: str) -> str:
+    """ get podcast categories if they exist. These can be turned into hashtags
+    """
+    podcast_categories = []
+    episode_category_tags = ['<itunes:category', '<category']
+
+    for category_tag in episode_category_tags:
+        item_str = xml_item
+        if category_tag not in xml_item:
+            if category_tag not in xml_str:
+                continue
+            item_str = xml_str
+
+        category_list = item_str.split(category_tag)
+        first_category = True
+        for episode_category in category_list:
+            if first_category:
+                first_category = False
+                continue
+
+            if 'text="' in episode_category:
+                episode_category = episode_category.split('text="')[1]
+                if '"' in episode_category:
+                    episode_category = episode_category.split('"')[0]
+                    episode_category = \
+                        episode_category.lower().replace(' ', '')
+                    episode_category = episode_category.replace('#', '')
+                    if episode_category not in podcast_categories:
+                        if valid_hash_tag(episode_category):
+                            podcast_categories.append('#' + episode_category)
+                continue
+
+            if '>' in episode_category:
+                episode_category = episode_category.split('>')[1]
+                if '<' in episode_category:
+                    episode_category = episode_category.split('<')[0]
+                    episode_category = \
+                        episode_category.lower().replace(' ', '')
+                    episode_category = episode_category.replace('#', '')
+                    if episode_category not in podcast_categories:
+                        if valid_hash_tag(episode_category):
+                            podcast_categories.append('#' + episode_category)
+
+    return podcast_categories
+
+
+def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}:
    """podcasting extensions for RSS feeds
    See https://github.com/Podcastindex-org/podcast-namespace/
    blob/main/docs/1.0.md
    """
-    if '<podcast:' not in xml_str:
-        if '<itunes:' not in xml_str:
+    if '<podcast:' not in xml_item:
+        if '<itunes:' not in xml_item:
            return {}

    podcast_properties = {
@ -402,7 +453,7 @@ def xml_podcast_to_dict(xml_str: str) -> {}:
        "trailers": []
    }

-    pod_lines = xml_str.split('<podcast:')
+    pod_lines = xml_item.split('<podcast:')
    ctr = 0
    for pod_line in pod_lines:
        if ctr == 0 or '>' not in pod_line:
@ -453,9 +504,13 @@ def xml_podcast_to_dict(xml_str: str) -> {}:
    podcast_episode_image = None
    episode_image_tags = ['<itunes:image']
    for image_tag in episode_image_tags:
-        if image_tag not in xml_str:
-            continue
-        episode_image = xml_str.split(image_tag)[1]
+        item_str = xml_item
+        if image_tag not in xml_item:
+            if image_tag not in xml_str:
+                continue
+            item_str = xml_str
+
+        episode_image = item_str.split(image_tag)[1]
        if 'href="' in episode_image:
            episode_image = episode_image.split('href="')[1]
            if '"' in episode_image:
@ -471,17 +526,21 @@ def xml_podcast_to_dict(xml_str: str) -> {}:
                        podcast_episode_image = episode_image
                        break

+    # get categories if they exist. These can be turned into hashtags
+    podcast_categories = _get_podcast_categories(xml_item, xml_str)
+
    if podcast_episode_image:
        podcast_properties['image'] = podcast_episode_image
+        podcast_properties['categories'] = podcast_categories

-        if '<itunes:explicit>Y' in xml_str or \
-           '<itunes:explicit>T' in xml_str or \
-           '<itunes:explicit>1' in xml_str:
+        if '<itunes:explicit>Y' in xml_item or \
+           '<itunes:explicit>T' in xml_item or \
+           '<itunes:explicit>1' in xml_item:
            podcast_properties['explicit'] = True
        else:
            podcast_properties['explicit'] = False
    else:
-        if '<podcast:' not in xml_str:
+        if '<podcast:' not in xml_item:
            return {}

    return podcast_properties
@ -537,7 +596,11 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
    rss_items = xml_str.split('<item>')
    post_ctr = 0
    max_bytes = max_feed_item_size_kb * 1024
+    first_item = True
    for rss_item in rss_items:
+        if first_item:
+            first_item = False
+            continue
        if not rss_item:
            continue
        if len(rss_item) > max_bytes:
@ -589,7 +652,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
            if _valid_feed_date(pub_date_str):
                post_filename = ''
                votes_status = []
-                podcast_properties = xml_podcast_to_dict(rss_item)
+                podcast_properties = xml_podcast_to_dict(rss_item, xml_str)
                if podcast_properties:
                    podcast_properties['linkMimeType'] = link_mime_type
                _add_newswire_dict_entry(base_dir, domain,
@ -630,7 +693,11 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
    rss_items = xml_str.split(item_str)
    post_ctr = 0
    max_bytes = max_feed_item_size_kb * 1024
+    first_item = True
    for rss_item in rss_items:
+        if first_item:
+            first_item = False
+            continue
        if not rss_item:
            continue
        if len(rss_item) > max_bytes:
@ -682,7 +749,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
            if _valid_feed_date(pub_date_str):
                post_filename = ''
                votes_status = []
-                podcast_properties = xml_podcast_to_dict(rss_item)
+                podcast_properties = xml_podcast_to_dict(rss_item, xml_str)
                if podcast_properties:
                    podcast_properties['linkMimeType'] = link_mime_type
                _add_newswire_dict_entry(base_dir, domain,
@ -713,7 +780,11 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
    atom_items = xml_str.split('<entry>')
    post_ctr = 0
    max_bytes = max_feed_item_size_kb * 1024
+    first_item = True
    for atom_item in atom_items:
+        if first_item:
+            first_item = False
+            continue
        if not atom_item:
            continue
        if len(atom_item) > max_bytes:
@ -763,7 +834,7 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
            if _valid_feed_date(pub_date_str):
                post_filename = ''
                votes_status = []
-                podcast_properties = xml_podcast_to_dict(atom_item)
+                podcast_properties = xml_podcast_to_dict(atom_item, xml_str)
                if podcast_properties:
                    podcast_properties['linkMimeType'] = link_mime_type
                _add_newswire_dict_entry(base_dir, domain,
--- a/tests.py
+++ b/tests.py
@ -82,6 +82,7 @@ from utils import copytree
 from utils import load_json
 from utils import save_json
 from utils import get_status_number
+from utils import valid_hash_tag
 from utils import get_followers_of_person
 from utils import remove_html
 from utils import dangerous_markup
@ -132,7 +133,6 @@ from content import get_price_from_string
 from content import limit_repeated_words
 from content import switch_words
 from content import extract_text_fields_in_post
-from content import valid_hash_tag
 from content import html_replace_email_quote
 from content import html_replace_quote_marks
 from content import dangerous_css
@ -6428,7 +6428,7 @@ def _test_xml_podcast_dict() -> None:
        'address="someaddress2" split="99" />\n' + \
        '</podcast:value>\n' + \
        '</rss>'
-    podcast_properties = xml_podcast_to_dict(xml_str)
+    podcast_properties = xml_podcast_to_dict(xml_str, xml_str)
    assert podcast_properties
    # pprint(podcast_properties)
    assert podcast_properties.get('valueRecipients')
--- a/utils.py
+++ b/utils.py
@ -20,6 +20,17 @@ from cryptography.hazmat.backends import default_backend
 from cryptography.hazmat.primitives import hashes
 from followingCalendar import add_person_to_calendar

+VALID_HASHTAG_CHARS = \
+    set('0123456789' +
+        'abcdefghijklmnopqrstuvwxyz' +
+        'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +
+        '¡¿ÄäÀàÁáÂâÃãÅåǍǎĄąĂăÆæĀā' +
+        'ÇçĆćĈĉČčĎđĐďðÈèÉéÊêËëĚěĘęĖėĒē' +
+        'ĜĝĢģĞğĤĥÌìÍíÎîÏïıĪīĮįĴĵĶķ' +
+        'ĹĺĻļŁłĽľĿŀÑñŃńŇňŅņÖöÒòÓóÔôÕõŐőØøŒœ' +
+        'ŔŕŘřẞßŚśŜŝŞşŠšȘșŤťŢţÞþȚțÜüÙùÚúÛûŰűŨũŲųŮůŪū' +
+        'ŴŵÝýŸÿŶŷŹźŽžŻż')
+
 # posts containing these strings will always get screened out,
 # both incoming and outgoing.
 # Could include dubious clacks or admin dogwhistles
@ -1798,7 +1809,7 @@ def delete_post(base_dir: str, http_prefix: str,
                  str(post_filename))


-def is_valid_language(text: str) -> bool:
+def _is_valid_language(text: str) -> bool:
    """Returns true if the given text contains a valid
    natural language string
    """
@ -1900,7 +1911,7 @@ def valid_nickname(domain: str, nickname: str) -> bool:
        return False
    if len(nickname) > 30:
        return False
-    if not is_valid_language(nickname):
+    if not _is_valid_language(nickname):
        return False
    forbidden_chars = ('.', ' ', '/', '?', ':', ';', '@', '#', '!')
    for char in forbidden_chars:
@ -3288,3 +3299,16 @@ def get_fav_filename_from_url(base_dir: str, favicon_url: str) -> str:
    if '/favicon.' in favicon_url:
        favicon_url = favicon_url.replace('/favicon.', '.')
    return base_dir + '/favicons/' + favicon_url.replace('/', '-')
+
+
+def valid_hash_tag(hashtag: str) -> bool:
+    """Returns true if the give hashtag contains valid characters
+    """
+    # long hashtags are not valid
+    if len(hashtag) >= 32:
+        return False
+    if set(hashtag).issubset(VALID_HASHTAG_CHARS):
+        return True
+    if _is_valid_language(hashtag):
+        return True
+    return False
--- a/webapp_podcast.py
+++ b/webapp_podcast.py
@ -184,6 +184,17 @@ def html_podcast_episode(css_cache: {}, translate: {},
            audio_extension.replace('.', '') + '">' + \
            translate['Your browser does not support the audio element.'] + \
            '\n  </audio>\n'
+    elif podcast_properties.get('linkMimeType'):
+        if 'video' in podcast_properties['linkMimeType']:
+            video_mime_type = podcast_properties['linkMimeType']
+            video_msg = 'Your browser does not support the video element.'
+            podcast_str += \
+                '  <figure id="videoContainer" ' + \
+                'data-fullscreen="false">\n' + \
+                '    <video id="video" controls preload="metadata">\n' + \
+                '<source src="' + link_url + '" ' + \
+                'type="' + video_mime_type + '">' + \
+                translate[video_msg] + '</video>\n  </figure>\n'

    podcast_title = \
        remove_html(html.unescape(urllib.parse.unquote_plus(newswire_item[0])))
@ -210,6 +221,14 @@ def html_podcast_episode(css_cache: {}, translate: {},
                '"><button class="donateButton">' + translate['Donate'] + \
                '</button></a></p>\n'

+    if podcast_properties['categories']:
+        podcast_str += '<p>'
+        tags_str = ''
+        for tag in podcast_properties['categories']:
+            tag_link = '/users/' + nickname + '/tags/' + tag.replace('#', '')
+            tags_str += '<a href="' + tag_link + '">' + tag + '</a> '
+        podcast_str += tags_str.strip() + '</p>\n'
+
    podcast_str += _html_podcast_performers(podcast_properties)

    podcast_str += '  </center>\n'