mirror of https://gitlab.com/bashrc2/epicyon
Merge branch 'main' of gitlab.com:bashrc2/epicyon
commit
3c1866c40b
26
content.py
26
content.py
|
@ -11,9 +11,9 @@ import os
|
|||
import email.parser
|
||||
import urllib.parse
|
||||
from shutil import copyfile
|
||||
from utils import valid_hash_tag
|
||||
from utils import dangerous_svg
|
||||
from utils import remove_domain_port
|
||||
from utils import is_valid_language
|
||||
from utils import get_image_extensions
|
||||
from utils import load_json
|
||||
from utils import save_json
|
||||
|
@ -33,17 +33,6 @@ MUSIC_SITES = ('soundcloud.com', 'bandcamp.com')
|
|||
|
||||
MAX_LINK_LENGTH = 40
|
||||
|
||||
VALID_HASHTAG_CHARS = \
|
||||
set('0123456789' +
|
||||
'abcdefghijklmnopqrstuvwxyz' +
|
||||
'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +
|
||||
'¡¿ÄäÀàÁáÂâÃãÅåǍǎĄąĂăÆæĀā' +
|
||||
'ÇçĆćĈĉČčĎđĐďðÈèÉéÊêËëĚěĘęĖėĒē' +
|
||||
'ĜĝĢģĞğĤĥÌìÍíÎîÏïıĪīĮįĴĵĶķ' +
|
||||
'ĹĺĻļŁłĽľĿŀÑñŃńŇňŅņÖöÒòÓóÔôÕõŐőØøŒœ' +
|
||||
'ŔŕŘřẞߌśŜŝŞşŠšȘșŤťŢţÞþȚțÜüÙùÚúÛûŰűŨũŲųŮůŪū' +
|
||||
'ŴŵÝýŸÿŶŷŹźŽžŻż')
|
||||
|
||||
REMOVE_MARKUP = (
|
||||
'b', 'i', 'ul', 'ol', 'li', 'em', 'strong',
|
||||
'blockquote', 'h1', 'h2', 'h3', 'h4', 'h5'
|
||||
|
@ -497,19 +486,6 @@ def add_web_links(content: str) -> str:
|
|||
return content
|
||||
|
||||
|
||||
def valid_hash_tag(hashtag: str) -> bool:
|
||||
"""Returns true if the give hashtag contains valid characters
|
||||
"""
|
||||
# long hashtags are not valid
|
||||
if len(hashtag) >= 32:
|
||||
return False
|
||||
if set(hashtag).issubset(VALID_HASHTAG_CHARS):
|
||||
return True
|
||||
if is_valid_language(hashtag):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _add_hash_tags(word_str: str, http_prefix: str, domain: str,
|
||||
replace_hashtags: {}, post_hashtags: {}) -> bool:
|
||||
"""Detects hashtags and adds them to the replacements dict
|
||||
|
|
|
@ -71,6 +71,10 @@ body, html {
|
|||
image-rendering: var(--rendering);
|
||||
}
|
||||
|
||||
audio {
|
||||
width: 90%;
|
||||
}
|
||||
|
||||
a, u {
|
||||
color: var(--options-fg-color);
|
||||
}
|
||||
|
|
2
inbox.py
2
inbox.py
|
@ -61,6 +61,7 @@ from utils import undo_reaction_collection_entry
|
|||
from utils import has_group_type
|
||||
from utils import local_actor_url
|
||||
from utils import has_object_stringType
|
||||
from utils import valid_hash_tag
|
||||
from categories import get_hashtag_categories
|
||||
from categories import set_hashtag_category
|
||||
from httpsig import get_digest_algorithm_from_headers
|
||||
|
@ -119,7 +120,6 @@ from announce import is_self_announce
|
|||
from announce import create_announce
|
||||
from notifyOnPost import notify_when_person_posts
|
||||
from conversation import update_conversation
|
||||
from content import valid_hash_tag
|
||||
from webapp_hashtagswarm import html_hash_tag_swarm
|
||||
from person import valid_sending_actor
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ from newswire import get_dict_from_newswire
|
|||
# from posts import send_signed_json
|
||||
from posts import create_news_post
|
||||
from posts import archive_posts_for_person
|
||||
from content import valid_hash_tag
|
||||
from utils import valid_hash_tag
|
||||
from utils import get_base_content_from_post
|
||||
from utils import remove_html
|
||||
from utils import get_full_domain
|
||||
|
|
99
newswire.py
99
newswire.py
|
@ -18,6 +18,7 @@ from datetime import timezone
|
|||
from collections import OrderedDict
|
||||
from utils import valid_post_date
|
||||
from categories import set_hashtag_category
|
||||
from utils import valid_hash_tag
|
||||
from utils import dangerous_svg
|
||||
from utils import get_fav_filename_from_url
|
||||
from utils import get_base_content_from_post
|
||||
|
@ -225,6 +226,10 @@ def _add_newswire_dict_entry(base_dir: str, domain: str,
|
|||
# extract hashtags from the text of the feed post
|
||||
post_tags = get_newswire_tags(all_text, max_tags)
|
||||
|
||||
# Include tags from podcast categories
|
||||
if podcast_properties:
|
||||
post_tags += podcast_properties['categories']
|
||||
|
||||
# combine the tags into a single list
|
||||
for tag in tags:
|
||||
if tag in post_tags:
|
||||
|
@ -384,13 +389,59 @@ def _xml2str_to_hashtag_categories(base_dir: str, xml_str: str,
|
|||
False, force)
|
||||
|
||||
|
||||
def xml_podcast_to_dict(xml_str: str) -> {}:
|
||||
def _get_podcast_categories(xml_item: str, xml_str: str) -> str:
|
||||
""" get podcast categories if they exist. These can be turned into hashtags
|
||||
"""
|
||||
podcast_categories = []
|
||||
episode_category_tags = ['<itunes:category', '<category']
|
||||
|
||||
for category_tag in episode_category_tags:
|
||||
item_str = xml_item
|
||||
if category_tag not in xml_item:
|
||||
if category_tag not in xml_str:
|
||||
continue
|
||||
item_str = xml_str
|
||||
|
||||
category_list = item_str.split(category_tag)
|
||||
first_category = True
|
||||
for episode_category in category_list:
|
||||
if first_category:
|
||||
first_category = False
|
||||
continue
|
||||
|
||||
if 'text="' in episode_category:
|
||||
episode_category = episode_category.split('text="')[1]
|
||||
if '"' in episode_category:
|
||||
episode_category = episode_category.split('"')[0]
|
||||
episode_category = \
|
||||
episode_category.lower().replace(' ', '')
|
||||
episode_category = episode_category.replace('#', '')
|
||||
if episode_category not in podcast_categories:
|
||||
if valid_hash_tag(episode_category):
|
||||
podcast_categories.append('#' + episode_category)
|
||||
continue
|
||||
|
||||
if '>' in episode_category:
|
||||
episode_category = episode_category.split('>')[1]
|
||||
if '<' in episode_category:
|
||||
episode_category = episode_category.split('<')[0]
|
||||
episode_category = \
|
||||
episode_category.lower().replace(' ', '')
|
||||
episode_category = episode_category.replace('#', '')
|
||||
if episode_category not in podcast_categories:
|
||||
if valid_hash_tag(episode_category):
|
||||
podcast_categories.append('#' + episode_category)
|
||||
|
||||
return podcast_categories
|
||||
|
||||
|
||||
def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}:
|
||||
"""podcasting extensions for RSS feeds
|
||||
See https://github.com/Podcastindex-org/podcast-namespace/
|
||||
blob/main/docs/1.0.md
|
||||
"""
|
||||
if '<podcast:' not in xml_str:
|
||||
if '<itunes:' not in xml_str:
|
||||
if '<podcast:' not in xml_item:
|
||||
if '<itunes:' not in xml_item:
|
||||
return {}
|
||||
|
||||
podcast_properties = {
|
||||
|
@ -402,7 +453,7 @@ def xml_podcast_to_dict(xml_str: str) -> {}:
|
|||
"trailers": []
|
||||
}
|
||||
|
||||
pod_lines = xml_str.split('<podcast:')
|
||||
pod_lines = xml_item.split('<podcast:')
|
||||
ctr = 0
|
||||
for pod_line in pod_lines:
|
||||
if ctr == 0 or '>' not in pod_line:
|
||||
|
@ -453,9 +504,13 @@ def xml_podcast_to_dict(xml_str: str) -> {}:
|
|||
podcast_episode_image = None
|
||||
episode_image_tags = ['<itunes:image']
|
||||
for image_tag in episode_image_tags:
|
||||
if image_tag not in xml_str:
|
||||
continue
|
||||
episode_image = xml_str.split(image_tag)[1]
|
||||
item_str = xml_item
|
||||
if image_tag not in xml_item:
|
||||
if image_tag not in xml_str:
|
||||
continue
|
||||
item_str = xml_str
|
||||
|
||||
episode_image = item_str.split(image_tag)[1]
|
||||
if 'href="' in episode_image:
|
||||
episode_image = episode_image.split('href="')[1]
|
||||
if '"' in episode_image:
|
||||
|
@ -471,17 +526,21 @@ def xml_podcast_to_dict(xml_str: str) -> {}:
|
|||
podcast_episode_image = episode_image
|
||||
break
|
||||
|
||||
# get categories if they exist. These can be turned into hashtags
|
||||
podcast_categories = _get_podcast_categories(xml_item, xml_str)
|
||||
|
||||
if podcast_episode_image:
|
||||
podcast_properties['image'] = podcast_episode_image
|
||||
podcast_properties['categories'] = podcast_categories
|
||||
|
||||
if '<itunes:explicit>Y' in xml_str or \
|
||||
'<itunes:explicit>T' in xml_str or \
|
||||
'<itunes:explicit>1' in xml_str:
|
||||
if '<itunes:explicit>Y' in xml_item or \
|
||||
'<itunes:explicit>T' in xml_item or \
|
||||
'<itunes:explicit>1' in xml_item:
|
||||
podcast_properties['explicit'] = True
|
||||
else:
|
||||
podcast_properties['explicit'] = False
|
||||
else:
|
||||
if '<podcast:' not in xml_str:
|
||||
if '<podcast:' not in xml_item:
|
||||
return {}
|
||||
|
||||
return podcast_properties
|
||||
|
@ -537,7 +596,11 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
rss_items = xml_str.split('<item>')
|
||||
post_ctr = 0
|
||||
max_bytes = max_feed_item_size_kb * 1024
|
||||
first_item = True
|
||||
for rss_item in rss_items:
|
||||
if first_item:
|
||||
first_item = False
|
||||
continue
|
||||
if not rss_item:
|
||||
continue
|
||||
if len(rss_item) > max_bytes:
|
||||
|
@ -589,7 +652,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
if _valid_feed_date(pub_date_str):
|
||||
post_filename = ''
|
||||
votes_status = []
|
||||
podcast_properties = xml_podcast_to_dict(rss_item)
|
||||
podcast_properties = xml_podcast_to_dict(rss_item, xml_str)
|
||||
if podcast_properties:
|
||||
podcast_properties['linkMimeType'] = link_mime_type
|
||||
_add_newswire_dict_entry(base_dir, domain,
|
||||
|
@ -630,7 +693,11 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
rss_items = xml_str.split(item_str)
|
||||
post_ctr = 0
|
||||
max_bytes = max_feed_item_size_kb * 1024
|
||||
first_item = True
|
||||
for rss_item in rss_items:
|
||||
if first_item:
|
||||
first_item = False
|
||||
continue
|
||||
if not rss_item:
|
||||
continue
|
||||
if len(rss_item) > max_bytes:
|
||||
|
@ -682,7 +749,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
if _valid_feed_date(pub_date_str):
|
||||
post_filename = ''
|
||||
votes_status = []
|
||||
podcast_properties = xml_podcast_to_dict(rss_item)
|
||||
podcast_properties = xml_podcast_to_dict(rss_item, xml_str)
|
||||
if podcast_properties:
|
||||
podcast_properties['linkMimeType'] = link_mime_type
|
||||
_add_newswire_dict_entry(base_dir, domain,
|
||||
|
@ -713,7 +780,11 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
atom_items = xml_str.split('<entry>')
|
||||
post_ctr = 0
|
||||
max_bytes = max_feed_item_size_kb * 1024
|
||||
first_item = True
|
||||
for atom_item in atom_items:
|
||||
if first_item:
|
||||
first_item = False
|
||||
continue
|
||||
if not atom_item:
|
||||
continue
|
||||
if len(atom_item) > max_bytes:
|
||||
|
@ -763,7 +834,7 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
if _valid_feed_date(pub_date_str):
|
||||
post_filename = ''
|
||||
votes_status = []
|
||||
podcast_properties = xml_podcast_to_dict(atom_item)
|
||||
podcast_properties = xml_podcast_to_dict(atom_item, xml_str)
|
||||
if podcast_properties:
|
||||
podcast_properties['linkMimeType'] = link_mime_type
|
||||
_add_newswire_dict_entry(base_dir, domain,
|
||||
|
|
4
tests.py
4
tests.py
|
@ -82,6 +82,7 @@ from utils import copytree
|
|||
from utils import load_json
|
||||
from utils import save_json
|
||||
from utils import get_status_number
|
||||
from utils import valid_hash_tag
|
||||
from utils import get_followers_of_person
|
||||
from utils import remove_html
|
||||
from utils import dangerous_markup
|
||||
|
@ -132,7 +133,6 @@ from content import get_price_from_string
|
|||
from content import limit_repeated_words
|
||||
from content import switch_words
|
||||
from content import extract_text_fields_in_post
|
||||
from content import valid_hash_tag
|
||||
from content import html_replace_email_quote
|
||||
from content import html_replace_quote_marks
|
||||
from content import dangerous_css
|
||||
|
@ -6428,7 +6428,7 @@ def _test_xml_podcast_dict() -> None:
|
|||
'address="someaddress2" split="99" />\n' + \
|
||||
'</podcast:value>\n' + \
|
||||
'</rss>'
|
||||
podcast_properties = xml_podcast_to_dict(xml_str)
|
||||
podcast_properties = xml_podcast_to_dict(xml_str, xml_str)
|
||||
assert podcast_properties
|
||||
# pprint(podcast_properties)
|
||||
assert podcast_properties.get('valueRecipients')
|
||||
|
|
28
utils.py
28
utils.py
|
@ -20,6 +20,17 @@ from cryptography.hazmat.backends import default_backend
|
|||
from cryptography.hazmat.primitives import hashes
|
||||
from followingCalendar import add_person_to_calendar
|
||||
|
||||
VALID_HASHTAG_CHARS = \
|
||||
set('0123456789' +
|
||||
'abcdefghijklmnopqrstuvwxyz' +
|
||||
'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +
|
||||
'¡¿ÄäÀàÁáÂâÃãÅåǍǎĄąĂăÆæĀā' +
|
||||
'ÇçĆćĈĉČčĎđĐďðÈèÉéÊêËëĚěĘęĖėĒē' +
|
||||
'ĜĝĢģĞğĤĥÌìÍíÎîÏïıĪīĮįĴĵĶķ' +
|
||||
'ĹĺĻļŁłĽľĿŀÑñŃńŇňŅņÖöÒòÓóÔôÕõŐőØøŒœ' +
|
||||
'ŔŕŘřẞߌśŜŝŞşŠšȘșŤťŢţÞþȚțÜüÙùÚúÛûŰűŨũŲųŮůŪū' +
|
||||
'ŴŵÝýŸÿŶŷŹźŽžŻż')
|
||||
|
||||
# posts containing these strings will always get screened out,
|
||||
# both incoming and outgoing.
|
||||
# Could include dubious clacks or admin dogwhistles
|
||||
|
@ -1798,7 +1809,7 @@ def delete_post(base_dir: str, http_prefix: str,
|
|||
str(post_filename))
|
||||
|
||||
|
||||
def is_valid_language(text: str) -> bool:
|
||||
def _is_valid_language(text: str) -> bool:
|
||||
"""Returns true if the given text contains a valid
|
||||
natural language string
|
||||
"""
|
||||
|
@ -1900,7 +1911,7 @@ def valid_nickname(domain: str, nickname: str) -> bool:
|
|||
return False
|
||||
if len(nickname) > 30:
|
||||
return False
|
||||
if not is_valid_language(nickname):
|
||||
if not _is_valid_language(nickname):
|
||||
return False
|
||||
forbidden_chars = ('.', ' ', '/', '?', ':', ';', '@', '#', '!')
|
||||
for char in forbidden_chars:
|
||||
|
@ -3288,3 +3299,16 @@ def get_fav_filename_from_url(base_dir: str, favicon_url: str) -> str:
|
|||
if '/favicon.' in favicon_url:
|
||||
favicon_url = favicon_url.replace('/favicon.', '.')
|
||||
return base_dir + '/favicons/' + favicon_url.replace('/', '-')
|
||||
|
||||
|
||||
def valid_hash_tag(hashtag: str) -> bool:
|
||||
"""Returns true if the give hashtag contains valid characters
|
||||
"""
|
||||
# long hashtags are not valid
|
||||
if len(hashtag) >= 32:
|
||||
return False
|
||||
if set(hashtag).issubset(VALID_HASHTAG_CHARS):
|
||||
return True
|
||||
if _is_valid_language(hashtag):
|
||||
return True
|
||||
return False
|
||||
|
|
|
@ -184,6 +184,17 @@ def html_podcast_episode(css_cache: {}, translate: {},
|
|||
audio_extension.replace('.', '') + '">' + \
|
||||
translate['Your browser does not support the audio element.'] + \
|
||||
'\n </audio>\n'
|
||||
elif podcast_properties.get('linkMimeType'):
|
||||
if 'video' in podcast_properties['linkMimeType']:
|
||||
video_mime_type = podcast_properties['linkMimeType']
|
||||
video_msg = 'Your browser does not support the video element.'
|
||||
podcast_str += \
|
||||
' <figure id="videoContainer" ' + \
|
||||
'data-fullscreen="false">\n' + \
|
||||
' <video id="video" controls preload="metadata">\n' + \
|
||||
'<source src="' + link_url + '" ' + \
|
||||
'type="' + video_mime_type + '">' + \
|
||||
translate[video_msg] + '</video>\n </figure>\n'
|
||||
|
||||
podcast_title = \
|
||||
remove_html(html.unescape(urllib.parse.unquote_plus(newswire_item[0])))
|
||||
|
@ -210,6 +221,14 @@ def html_podcast_episode(css_cache: {}, translate: {},
|
|||
'"><button class="donateButton">' + translate['Donate'] + \
|
||||
'</button></a></p>\n'
|
||||
|
||||
if podcast_properties['categories']:
|
||||
podcast_str += '<p>'
|
||||
tags_str = ''
|
||||
for tag in podcast_properties['categories']:
|
||||
tag_link = '/users/' + nickname + '/tags/' + tag.replace('#', '')
|
||||
tags_str += '<a href="' + tag_link + '">' + tag + '</a> '
|
||||
podcast_str += tags_str.strip() + '</p>\n'
|
||||
|
||||
podcast_str += _html_podcast_performers(podcast_properties)
|
||||
|
||||
podcast_str += ' </center>\n'
|
||||
|
|
Loading…
Reference in New Issue