diff --git a/content.py b/content.py index a0412f5a9..2394e21a1 100644 --- a/content.py +++ b/content.py @@ -11,9 +11,9 @@ import os import email.parser import urllib.parse from shutil import copyfile +from utils import valid_hash_tag from utils import dangerous_svg from utils import remove_domain_port -from utils import is_valid_language from utils import get_image_extensions from utils import load_json from utils import save_json @@ -33,17 +33,6 @@ MUSIC_SITES = ('soundcloud.com', 'bandcamp.com') MAX_LINK_LENGTH = 40 -VALID_HASHTAG_CHARS = \ - set('0123456789' + - 'abcdefghijklmnopqrstuvwxyz' + - 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + - '¡¿ÄäÀàÁáÂâÃãÅåǍǎĄąĂăÆæĀā' + - 'ÇçĆćĈĉČčĎđĐďðÈèÉéÊêËëĚěĘęĖėĒē' + - 'ĜĝĢģĞğĤĥÌìÍíÎîÏïıĪīĮįĴĵĶķ' + - 'ĹĺĻļŁłĽľĿŀÑñŃńŇňŅņÖöÒòÓóÔôÕõŐőØøŒœ' + - 'ŔŕŘřẞߌśŜŝŞşŠšȘșŤťŢţÞþȚțÜüÙùÚúÛûŰűŨũŲųŮůŪū' + - 'ŴŵÝýŸÿŶŷŹźŽžŻż') - REMOVE_MARKUP = ( 'b', 'i', 'ul', 'ol', 'li', 'em', 'strong', 'blockquote', 'h1', 'h2', 'h3', 'h4', 'h5' @@ -497,19 +486,6 @@ def add_web_links(content: str) -> str: return content -def valid_hash_tag(hashtag: str) -> bool: - """Returns true if the give hashtag contains valid characters - """ - # long hashtags are not valid - if len(hashtag) >= 32: - return False - if set(hashtag).issubset(VALID_HASHTAG_CHARS): - return True - if is_valid_language(hashtag): - return True - return False - - def _add_hash_tags(word_str: str, http_prefix: str, domain: str, replace_hashtags: {}, post_hashtags: {}) -> bool: """Detects hashtags and adds them to the replacements dict diff --git a/epicyon-podcast.css b/epicyon-podcast.css index 5510098c3..b15f9718e 100644 --- a/epicyon-podcast.css +++ b/epicyon-podcast.css @@ -71,6 +71,10 @@ body, html { image-rendering: var(--rendering); } +audio { + width: 90%; +} + a, u { color: var(--options-fg-color); } diff --git a/inbox.py b/inbox.py index a4acdf093..51692f5e0 100644 --- a/inbox.py +++ b/inbox.py @@ -61,6 +61,7 @@ from utils import undo_reaction_collection_entry from utils import has_group_type from utils import local_actor_url from utils import has_object_stringType +from utils import valid_hash_tag from categories import get_hashtag_categories from categories import set_hashtag_category from httpsig import get_digest_algorithm_from_headers @@ -119,7 +120,6 @@ from announce import is_self_announce from announce import create_announce from notifyOnPost import notify_when_person_posts from conversation import update_conversation -from content import valid_hash_tag from webapp_hashtagswarm import html_hash_tag_swarm from person import valid_sending_actor diff --git a/newsdaemon.py b/newsdaemon.py index cddb460d4..7233eab66 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -24,7 +24,7 @@ from newswire import get_dict_from_newswire # from posts import send_signed_json from posts import create_news_post from posts import archive_posts_for_person -from content import valid_hash_tag +from utils import valid_hash_tag from utils import get_base_content_from_post from utils import remove_html from utils import get_full_domain diff --git a/newswire.py b/newswire.py index c63dd710d..308e34c4f 100644 --- a/newswire.py +++ b/newswire.py @@ -18,6 +18,7 @@ from datetime import timezone from collections import OrderedDict from utils import valid_post_date from categories import set_hashtag_category +from utils import valid_hash_tag from utils import dangerous_svg from utils import get_fav_filename_from_url from utils import get_base_content_from_post @@ -225,6 +226,10 @@ def _add_newswire_dict_entry(base_dir: str, domain: str, # extract hashtags from the text of the feed post post_tags = get_newswire_tags(all_text, max_tags) + # Include tags from podcast categories + if podcast_properties: + post_tags += podcast_properties['categories'] + # combine the tags into a single list for tag in tags: if tag in post_tags: @@ -384,13 +389,59 @@ def _xml2str_to_hashtag_categories(base_dir: str, xml_str: str, False, force) -def xml_podcast_to_dict(xml_str: str) -> {}: +def _get_podcast_categories(xml_item: str, xml_str: str) -> str: + """ get podcast categories if they exist. These can be turned into hashtags + """ + podcast_categories = [] + episode_category_tags = ['' in episode_category: + episode_category = episode_category.split('>')[1] + if '<' in episode_category: + episode_category = episode_category.split('<')[0] + episode_category = \ + episode_category.lower().replace(' ', '') + episode_category = episode_category.replace('#', '') + if episode_category not in podcast_categories: + if valid_hash_tag(episode_category): + podcast_categories.append('#' + episode_category) + + return podcast_categories + + +def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}: """podcasting extensions for RSS feeds See https://github.com/Podcastindex-org/podcast-namespace/ blob/main/docs/1.0.md """ - if ' {}: "trailers": [] } - pod_lines = xml_str.split('' not in pod_line: @@ -453,9 +504,13 @@ def xml_podcast_to_dict(xml_str: str) -> {}: podcast_episode_image = None episode_image_tags = [' {}: podcast_episode_image = episode_image break + # get categories if they exist. These can be turned into hashtags + podcast_categories = _get_podcast_categories(xml_item, xml_str) + if podcast_episode_image: podcast_properties['image'] = podcast_episode_image + podcast_properties['categories'] = podcast_categories - if 'Y' in xml_str or \ - 'T' in xml_str or \ - '1' in xml_str: + if 'Y' in xml_item or \ + 'T' in xml_item or \ + '1' in xml_item: podcast_properties['explicit'] = True else: podcast_properties['explicit'] = False else: - if '') post_ctr = 0 max_bytes = max_feed_item_size_kb * 1024 + first_item = True for rss_item in rss_items: + if first_item: + first_item = False + continue if not rss_item: continue if len(rss_item) > max_bytes: @@ -589,7 +652,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str, if _valid_feed_date(pub_date_str): post_filename = '' votes_status = [] - podcast_properties = xml_podcast_to_dict(rss_item) + podcast_properties = xml_podcast_to_dict(rss_item, xml_str) if podcast_properties: podcast_properties['linkMimeType'] = link_mime_type _add_newswire_dict_entry(base_dir, domain, @@ -630,7 +693,11 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str, rss_items = xml_str.split(item_str) post_ctr = 0 max_bytes = max_feed_item_size_kb * 1024 + first_item = True for rss_item in rss_items: + if first_item: + first_item = False + continue if not rss_item: continue if len(rss_item) > max_bytes: @@ -682,7 +749,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str, if _valid_feed_date(pub_date_str): post_filename = '' votes_status = [] - podcast_properties = xml_podcast_to_dict(rss_item) + podcast_properties = xml_podcast_to_dict(rss_item, xml_str) if podcast_properties: podcast_properties['linkMimeType'] = link_mime_type _add_newswire_dict_entry(base_dir, domain, @@ -713,7 +780,11 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str, atom_items = xml_str.split('') post_ctr = 0 max_bytes = max_feed_item_size_kb * 1024 + first_item = True for atom_item in atom_items: + if first_item: + first_item = False + continue if not atom_item: continue if len(atom_item) > max_bytes: @@ -763,7 +834,7 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str, if _valid_feed_date(pub_date_str): post_filename = '' votes_status = [] - podcast_properties = xml_podcast_to_dict(atom_item) + podcast_properties = xml_podcast_to_dict(atom_item, xml_str) if podcast_properties: podcast_properties['linkMimeType'] = link_mime_type _add_newswire_dict_entry(base_dir, domain, diff --git a/tests.py b/tests.py index f549b8184..4d086b1f8 100644 --- a/tests.py +++ b/tests.py @@ -82,6 +82,7 @@ from utils import copytree from utils import load_json from utils import save_json from utils import get_status_number +from utils import valid_hash_tag from utils import get_followers_of_person from utils import remove_html from utils import dangerous_markup @@ -132,7 +133,6 @@ from content import get_price_from_string from content import limit_repeated_words from content import switch_words from content import extract_text_fields_in_post -from content import valid_hash_tag from content import html_replace_email_quote from content import html_replace_quote_marks from content import dangerous_css @@ -6428,7 +6428,7 @@ def _test_xml_podcast_dict() -> None: 'address="someaddress2" split="99" />\n' + \ '\n' + \ '' - podcast_properties = xml_podcast_to_dict(xml_str) + podcast_properties = xml_podcast_to_dict(xml_str, xml_str) assert podcast_properties # pprint(podcast_properties) assert podcast_properties.get('valueRecipients') diff --git a/utils.py b/utils.py index 3e1930464..74e6d2b39 100644 --- a/utils.py +++ b/utils.py @@ -20,6 +20,17 @@ from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import hashes from followingCalendar import add_person_to_calendar +VALID_HASHTAG_CHARS = \ + set('0123456789' + + 'abcdefghijklmnopqrstuvwxyz' + + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + + '¡¿ÄäÀàÁáÂâÃãÅåǍǎĄąĂăÆæĀā' + + 'ÇçĆćĈĉČčĎđĐďðÈèÉéÊêËëĚěĘęĖėĒē' + + 'ĜĝĢģĞğĤĥÌìÍíÎîÏïıĪīĮįĴĵĶķ' + + 'ĹĺĻļŁłĽľĿŀÑñŃńŇňŅņÖöÒòÓóÔôÕõŐőØøŒœ' + + 'ŔŕŘřẞߌśŜŝŞşŠšȘșŤťŢţÞþȚțÜüÙùÚúÛûŰűŨũŲųŮůŪū' + + 'ŴŵÝýŸÿŶŷŹźŽžŻż') + # posts containing these strings will always get screened out, # both incoming and outgoing. # Could include dubious clacks or admin dogwhistles @@ -1798,7 +1809,7 @@ def delete_post(base_dir: str, http_prefix: str, str(post_filename)) -def is_valid_language(text: str) -> bool: +def _is_valid_language(text: str) -> bool: """Returns true if the given text contains a valid natural language string """ @@ -1900,7 +1911,7 @@ def valid_nickname(domain: str, nickname: str) -> bool: return False if len(nickname) > 30: return False - if not is_valid_language(nickname): + if not _is_valid_language(nickname): return False forbidden_chars = ('.', ' ', '/', '?', ':', ';', '@', '#', '!') for char in forbidden_chars: @@ -3288,3 +3299,16 @@ def get_fav_filename_from_url(base_dir: str, favicon_url: str) -> str: if '/favicon.' in favicon_url: favicon_url = favicon_url.replace('/favicon.', '.') return base_dir + '/favicons/' + favicon_url.replace('/', '-') + + +def valid_hash_tag(hashtag: str) -> bool: + """Returns true if the give hashtag contains valid characters + """ + # long hashtags are not valid + if len(hashtag) >= 32: + return False + if set(hashtag).issubset(VALID_HASHTAG_CHARS): + return True + if _is_valid_language(hashtag): + return True + return False diff --git a/webapp_podcast.py b/webapp_podcast.py index def903fbb..1f30c36d2 100644 --- a/webapp_podcast.py +++ b/webapp_podcast.py @@ -184,6 +184,17 @@ def html_podcast_episode(css_cache: {}, translate: {}, audio_extension.replace('.', '') + '">' + \ translate['Your browser does not support the audio element.'] + \ '\n \n' + elif podcast_properties.get('linkMimeType'): + if 'video' in podcast_properties['linkMimeType']: + video_mime_type = podcast_properties['linkMimeType'] + video_msg = 'Your browser does not support the video element.' + podcast_str += \ + '
\n' + \ + ' \n
\n' podcast_title = \ remove_html(html.unescape(urllib.parse.unquote_plus(newswire_item[0]))) @@ -210,6 +221,14 @@ def html_podcast_episode(css_cache: {}, translate: {}, '">

\n' + if podcast_properties['categories']: + podcast_str += '

' + tags_str = '' + for tag in podcast_properties['categories']: + tag_link = '/users/' + nickname + '/tags/' + tag.replace('#', '') + tags_str += '' + tag + ' ' + podcast_str += tags_str.strip() + '

\n' + podcast_str += _html_podcast_performers(podcast_properties) podcast_str += ' \n'