epicyon/utils.py

5407 lines
183 KiB
Python
Raw Normal View History

2020-04-04 13:44:49 +00:00
__filename__ = "utils.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
2024-12-22 23:37:30 +00:00
__version__ = "1.6.0"
2020-04-04 13:44:49 +00:00
__maintainer__ = "Bob Mottram"
2021-09-10 16:14:50 +00:00
__email__ = "bob@libreserver.org"
2020-04-04 13:44:49 +00:00
__status__ = "Production"
2021-06-26 11:16:41 +00:00
__module_group__ = "Core"
__accounts_data_path__ = None
__accounts_data_path_tests__ = False
2019-07-02 09:25:29 +00:00
import os
import re
2019-10-11 18:03:58 +00:00
import time
2019-09-29 18:48:34 +00:00
import shutil
2019-07-02 09:25:29 +00:00
import datetime
2019-11-23 10:20:30 +00:00
import json
2021-03-18 17:27:46 +00:00
import locale
2020-06-06 18:16:16 +00:00
from pprint import pprint
2023-11-01 20:38:04 +00:00
import idna
from dateutil.tz import tz
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import hashes
2021-12-27 16:18:52 +00:00
from followingCalendar import add_person_to_calendar
2022-01-13 15:10:41 +00:00
VALID_HASHTAG_CHARS = \
2022-03-02 15:13:59 +00:00
set('_0123456789' +
2022-01-13 15:10:41 +00:00
'abcdefghijklmnopqrstuvwxyz' +
'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +
'¡¿ÄäÀàÁáÂâÃãÅåǍǎĄąĂăÆæĀā' +
'ÇçĆćĈĉČčĎđĐďðÈèÉéÊêËëĚěĘęĖėĒē' +
'ĜĝĢģĞğĤĥÌìÍíÎîÏïıĪīĮįĴĵĶķ' +
'ĹĺĻļŁłĽľĿŀÑñŃńŇňŅņÖöÒòÓóÔôÕõŐőØøŒœ' +
'ŔŕŘřẞߌśŜŝŞşŠšȘșŤťŢţÞþȚțÜüÙùÚúÛûŰűŨũŲųŮůŪū' +
'ŴŵÝýŸÿŶŷŹźŽžŻż')
2021-02-11 10:33:56 +00:00
# posts containing these strings will always get screened out,
# both incoming and outgoing.
# Could include dubious clacks or admin dogwhistles
2021-12-26 10:11:18 +00:00
INVALID_CHARACTERS = (
'', '', '', '', '', '', 'ϟϟ', '🏳️‍🌈🚫', '⚡⚡', ''
2021-02-11 10:33:56 +00:00
)
2022-12-26 10:49:41 +00:00
INVALID_ACTOR_URL_CHARACTERS = (
'', '', '<', '>', '%', '{', '}', '|', '\\', '^', '`',
2022-12-26 15:41:21 +00:00
'?', '#', '[', ']', '!', '$', '&', "'", '(', ')', '*',
'+', ',', ';', '='
2022-12-26 10:49:41 +00:00
)
def is_account_dir(dir_name: str) -> bool:
"""Is the given directory an account within /accounts ?
"""
if '@' not in dir_name:
return False
if 'inbox@' in dir_name or 'news@' in dir_name or 'Actor@' in dir_name:
return False
return True
2023-11-29 11:37:44 +00:00
def remove_zero_length_strings(text: str) -> str:
"""removes zero length strings from text
"""
return text.replace('', '')
2023-11-20 22:27:58 +00:00
def _utc_mktime(utc_tuple):
"""Returns number of seconds elapsed since epoch
Note that no timezone are taken into consideration.
utc tuple must be: (year, month, day, hour, minute, second)
"""
if len(utc_tuple) == 6:
utc_tuple += (0, 0, 0)
return time.mktime(utc_tuple) - time.mktime((1970, 1, 1, 0, 0, 0, 0, 0, 0))
def _datetime_to_timestamp(dtime):
"""Converts a datetime object to UTC timestamp"""
return int(_utc_mktime(dtime.timetuple()))
def date_utcnow():
"""returns the time now
"""
return datetime.datetime.now(datetime.timezone.utc)
def date_from_numbers(year: int, month: int, day: int,
hour: int, mins: int):
2023-11-20 22:27:58 +00:00
"""returns an offset-aware datetime
"""
return datetime.datetime(year, month, day, hour, mins, 0,
tzinfo=datetime.timezone.utc)
def date_from_string_format(date_str: str, formats: []):
"""returns an offset-aware datetime from a string date
"""
if not formats:
formats = ("%a, %d %b %Y %H:%M:%S %Z",
"%a, %d %b %Y %H:%M:%S %z",
"%Y-%m-%dT%H:%M:%S%z")
dtime = None
for date_format in formats:
try:
dtime = \
datetime.datetime.strptime(date_str, date_format)
except BaseException:
continue
break
if not dtime:
return None
if not dtime.tzinfo:
dtime = dtime.replace(tzinfo=datetime.timezone.utc)
return dtime
def date_epoch():
"""returns an offset-aware version of epoch
"""
return date_from_numbers(1970, 1, 1, 0, 0)
2023-11-20 22:27:58 +00:00
2023-12-09 14:18:24 +00:00
def get_url_from_post(url_field) -> str:
"""Returns a url from a post object
"""
if isinstance(url_field, str):
return url_field
if isinstance(url_field, list):
for url_dict in url_field:
if not isinstance(url_dict, dict):
continue
if 'href' not in url_dict:
continue
if 'mediaType' not in url_dict:
continue
if not isinstance(url_dict['href'], str):
continue
if not isinstance(url_dict['mediaType'], str):
continue
if url_dict['mediaType'] != 'text/html':
continue
if '://' not in url_dict['href']:
continue
return url_dict['href']
return ''
def get_attributed_to(field) -> str:
"""Returns the actor
"""
if isinstance(field, str):
return field
2023-11-01 20:36:16 +00:00
if isinstance(field, list):
2023-10-29 14:35:46 +00:00
for attrib in field:
2024-07-18 11:56:09 +00:00
if not isinstance(attrib, dict):
continue
if not (attrib.get('type') and attrib.get('id')):
continue
if not (isinstance(attrib['type'], str) and
isinstance(attrib['id'], str)):
continue
if attrib['type'] == 'Person' and \
resembles_url(attrib['id']):
return attrib['id']
if isinstance(field[0], str):
return field[0]
return None
2024-02-02 12:04:09 +00:00
def uninvert_text(text: str) -> str:
"""uninverts inverted text
"""
if len(text) < 4:
return text
flip_table = {
'\u0021': '\u00A1',
'\u0022': '\u201E',
'\u0026': '\u214B',
'\u002E': '\u02D9',
'\u0033': '\u0190',
'\u0034': '\u152D',
'\u0037': '\u2C62',
'\u003B': '\u061B',
'\u003F': '\u00BF',
'\u0041': '\u2200',
'\u0042': '\u10412',
'\u0043': '\u2183',
'\u0044': '\u25D6',
'\u0045': '\u018E',
'\u0046': '\u2132',
'\u0047': '\u2141',
'\u004A': '\u017F',
'\u004B': '\u22CA',
'\u004C': '\u2142',
'\u004D': '\u0057',
'\u004E': '\u1D0E',
'\u0050': '\u0500',
'\u0051': '\u038C',
'\u0052': '\u1D1A',
'\u0054': '\u22A5',
'\u0055': '\u2229',
'\u0056': '\u1D27',
'\u0059': '\u2144',
'\u005F': '\u203E',
'\u0061': '\u0250',
'\u0062': '\u0071',
'\u0063': '\u0254',
'\u0064': '\u0070',
'\u0065': '\u01DD',
'\u0066': '\u025F',
'\u0067': '\u0183',
'\u0068': '\u0265',
'\u0069': '\u0131',
'\u006A': '\u027E',
'\u006B': '\u029E',
'\u006C': '\u0283',
'\u006D': '\u026F',
'\u006E': '\u0075',
'\u0072': '\u0279',
'\u0074': '\u0287',
'\u0076': '\u028C',
'\u0077': '\u028D',
'\u0079': '\u028E',
'\u203F': '\u2040',
'\u2234': '\u2235'
}
matches = 0
possible_result = ''
for ch_test in text:
ch_result = ch_test
for ch1, ch_inv in flip_table.items():
if ch_test == ch_inv:
matches += 1
ch_result = ch1
break
possible_result = ch_result + possible_result
result = text
2024-04-27 16:28:33 +00:00
if matches > len(text)/2:
2024-02-02 13:29:13 +00:00
result = possible_result
2024-02-02 13:03:50 +00:00
new_result = ''
extra_replace = {
'[': ']',
']': '[',
'(': ')',
')': '(',
'<': '>',
2024-02-02 13:29:13 +00:00
'>': '<',
'9': '6',
'6': '9'
2024-02-02 13:03:50 +00:00
}
for ch1 in result:
ch_result = ch1
for ch2, rep in extra_replace.items():
if ch1 == ch2:
ch_result = rep
break
new_result += ch_result
result = new_result
2024-02-02 12:04:09 +00:00
return result
def _standardize_text_range(text: str,
range_start: int, range_end: int,
offset: str) -> str:
"""Convert any fancy characters within the given range into ordinary ones
"""
offset = ord(offset)
ctr = 0
text = list(text)
while ctr < len(text):
val = ord(text[ctr])
if val in range(range_start, range_end):
text[ctr] = chr(val - range_start + offset)
ctr += 1
return "".join(text)
def standardize_text(text: str) -> str:
"""Converts fancy unicode text to ordinary letters
"""
2022-07-09 22:56:33 +00:00
if not text:
return text
2022-07-09 11:03:12 +00:00
char_ranges = (
2022-07-09 11:09:29 +00:00
[65345, 'a'],
2022-07-09 11:03:12 +00:00
[119886, 'a'],
[119990, 'a'],
[120042, 'a'],
[120094, 'a'],
2022-07-09 11:09:29 +00:00
[120146, 'a'],
[120198, 'a'],
2022-07-09 11:24:11 +00:00
[120302, 'a'],
[120354, 'a'],
[120406, 'a'],
2022-07-09 11:03:12 +00:00
[65313, 'A'],
2022-07-09 11:09:29 +00:00
[119912, 'A'],
[119964, 'A'],
[120016, 'A'],
[120068, 'A'],
[120120, 'A'],
[120172, 'A'],
2022-07-09 11:24:11 +00:00
[120224, 'A'],
[120328, 'A'],
[120380, 'A'],
2023-07-19 09:14:36 +00:00
[120432, 'A'],
[127344, 'A'],
[127312, 'A'],
[127280, 'A'],
[127248, 'A']
)
2022-07-09 11:03:12 +00:00
for char_range in char_ranges:
range_start = char_range[0]
range_end = range_start + 26
2022-07-09 11:03:12 +00:00
offset = char_range[1]
text = _standardize_text_range(text, range_start, range_end, offset)
2024-02-02 12:04:09 +00:00
return uninvert_text(text)
2024-09-13 15:11:02 +00:00
def remove_eol(line: str) -> str:
2022-06-21 11:58:50 +00:00
"""Removes line ending characters
"""
2024-04-16 13:47:21 +00:00
return line.rstrip()
2022-06-21 11:58:50 +00:00
2022-06-10 13:01:39 +00:00
def text_in_file(text: str, filename: str,
case_sensitive: bool = True) -> bool:
2022-06-10 09:24:11 +00:00
"""is the given text in the given file?
"""
2022-06-10 11:43:33 +00:00
if not case_sensitive:
text = text.lower()
2024-07-16 12:20:58 +00:00
content = None
2022-06-10 09:24:11 +00:00
try:
2024-07-16 12:20:58 +00:00
with open(filename, 'r', encoding='utf-8') as fp_file:
content = fp_file.read()
2022-06-10 09:24:11 +00:00
except OSError:
2022-06-10 10:40:47 +00:00
print('EX: unable to find text in missing file ' + filename)
2024-07-16 12:20:58 +00:00
if content:
if not case_sensitive:
content = content.lower()
if text in content:
return True
2022-06-10 09:24:11 +00:00
return False
2021-12-26 10:19:59 +00:00
def local_actor_url(http_prefix: str, nickname: str, domain_full: str) -> str:
2021-08-14 11:13:39 +00:00
"""Returns the url for an actor on this instance
"""
2021-12-26 10:00:46 +00:00
return http_prefix + '://' + domain_full + '/users/' + nickname
2021-08-14 11:13:39 +00:00
2021-12-26 10:29:52 +00:00
def get_actor_languages_list(actor_json: {}) -> []:
"""Returns a list containing languages used by the given actor
"""
2021-12-26 10:29:52 +00:00
if not actor_json.get('attachment'):
return []
2021-12-26 10:32:45 +00:00
for property_value in actor_json['attachment']:
2022-05-11 16:10:38 +00:00
name_value = None
if property_value.get('name'):
name_value = property_value['name']
elif property_value.get('schema:name'):
name_value = property_value['schema:name']
if not name_value:
continue
2022-05-11 16:10:38 +00:00
if not name_value.lower().startswith('languages'):
continue
2021-12-26 10:32:45 +00:00
if not property_value.get('type'):
continue
prop_value_name, _ = \
get_attachment_property_value(property_value)
if not prop_value_name:
continue
2022-05-11 16:16:34 +00:00
if not property_value['type'].endswith('PropertyValue'):
continue
if isinstance(property_value[prop_value_name], list):
lang_list = property_value[prop_value_name]
2021-12-26 10:35:37 +00:00
lang_list.sort()
return lang_list
if isinstance(property_value[prop_value_name], str):
lang_str = property_value[prop_value_name]
2024-12-23 18:23:47 +00:00
lang_list_temp: list[str] = []
2021-12-26 10:38:02 +00:00
if ',' in lang_str:
2021-12-26 10:40:24 +00:00
lang_list_temp = lang_str.split(',')
2021-12-26 10:38:02 +00:00
elif ';' in lang_str:
2021-12-26 10:40:24 +00:00
lang_list_temp = lang_str.split(';')
2021-12-26 10:38:02 +00:00
elif '/' in lang_str:
2021-12-26 10:40:24 +00:00
lang_list_temp = lang_str.split('/')
2021-12-26 10:38:02 +00:00
elif '+' in lang_str:
2021-12-26 10:40:24 +00:00
lang_list_temp = lang_str.split('+')
2021-12-26 10:38:02 +00:00
elif ' ' in lang_str:
2021-12-26 10:40:24 +00:00
lang_list_temp = lang_str.split(' ')
else:
return [lang_str]
2024-12-23 18:23:47 +00:00
lang_list: list[str] = []
2021-12-26 10:40:24 +00:00
for lang in lang_list_temp:
2021-08-11 09:00:17 +00:00
lang = lang.strip()
2021-12-26 10:35:37 +00:00
if lang not in lang_list:
lang_list.append(lang)
lang_list.sort()
return lang_list
return []
def has_object_dict(post_json_object: {}) -> bool:
"""Returns true if the given post has an object dict
"""
if post_json_object.get('object'):
if isinstance(post_json_object['object'], dict):
return True
return False
def remove_markup_tag(html: str, tag: str) -> str:
"""Remove the given tag from the given html markup
"""
2024-02-15 17:29:16 +00:00
if '<' + tag not in html and \
'</' + tag not in html:
return html
section = html.split('<' + tag)
result = ''
for text in section:
if not result:
if html.startswith('<' + tag) and '>' in text:
result = text.split('>', 1)[1]
else:
result = text
continue
result += text.split('>', 1)[1]
html = result
section = html.split('</' + tag)
result = ''
for text in section:
if not result:
if html.startswith('</' + tag) and '>' in text:
result = text.split('>', 1)[1]
else:
result = text
continue
result += text.split('>', 1)[1]
return result
2024-09-24 19:40:30 +00:00
def remove_header_tags(html: str) -> str:
"""Removes any header tags from the given html text
"""
header_tags = ('h1', 'h2', 'h3', 'h4', 'h5')
for tag_str in header_tags:
html = remove_markup_tag(html, tag_str)
return html
2021-12-26 10:50:49 +00:00
def get_content_from_post(post_json_object: {}, system_language: str,
2022-01-28 10:07:35 +00:00
languages_understood: [],
2024-02-19 20:54:46 +00:00
content_type: str) -> str:
"""Returns the content from the post in the given language
2021-07-19 19:40:04 +00:00
including searching for a matching entry within contentMap
"""
2021-12-26 10:54:58 +00:00
this_post_json = post_json_object
2021-12-26 10:57:03 +00:00
if has_object_dict(post_json_object):
2021-12-26 10:54:58 +00:00
this_post_json = post_json_object['object']
2023-03-20 14:50:19 +00:00
map_dict = content_type + 'Map'
has_contentmap_dict = False
if this_post_json.get(map_dict):
if isinstance(this_post_json[map_dict], dict):
has_contentmap_dict = True
2023-03-20 14:50:19 +00:00
if not this_post_json.get(content_type) and \
not has_contentmap_dict:
return ''
2021-07-20 12:28:56 +00:00
content = ''
2024-08-08 17:23:33 +00:00
replacements = {
'&amp;': '&',
'<u>': '',
'</u>': ''
}
if has_contentmap_dict:
if this_post_json[map_dict].get(system_language):
sys_lang = this_post_json[map_dict][system_language]
if isinstance(sys_lang, str):
content = sys_lang
content = remove_markup_tag(content, 'pre')
2024-08-08 17:23:33 +00:00
content = replace_strings(content, replacements)
return standardize_text(content)
else:
# is there a contentMap/summaryMap entry for one of
# the understood languages?
for lang in languages_understood:
2024-07-18 12:40:00 +00:00
if not this_post_json[map_dict].get(lang):
continue
map_lang = this_post_json[map_dict][lang]
if not isinstance(map_lang, str):
continue
content = map_lang
content = remove_markup_tag(content, 'pre')
2024-08-08 17:23:33 +00:00
content = replace_strings(content, replacements)
2024-07-18 12:40:00 +00:00
return standardize_text(content)
2021-07-20 12:28:56 +00:00
else:
2022-06-10 13:01:39 +00:00
if isinstance(this_post_json[content_type], str):
content = this_post_json[content_type]
2024-08-08 17:23:33 +00:00
content = replace_strings(content, replacements)
content = remove_markup_tag(content, 'pre')
return standardize_text(content)
def get_language_from_post(post_json_object: {}, system_language: str,
languages_understood: [],
2024-02-19 20:52:37 +00:00
content_type: str) -> str:
"""Returns the content language from the post
including searching for a matching entry within contentMap
"""
this_post_json = post_json_object
if has_object_dict(post_json_object):
this_post_json = post_json_object['object']
if not this_post_json.get(content_type):
return system_language
map_dict = content_type + 'Map'
2024-08-05 10:04:05 +00:00
if not this_post_json.get(map_dict):
return system_language
if not isinstance(this_post_json[map_dict], dict):
return system_language
if this_post_json[map_dict].get(system_language):
sys_lang = this_post_json[map_dict][system_language]
if isinstance(sys_lang, str):
return system_language
else:
# is there a contentMap/summaryMap entry for one of
# the understood languages?
for lang in languages_understood:
if this_post_json[map_dict].get(lang):
return lang
return system_language
def get_media_descriptions_from_post(post_json_object: {}) -> str:
"""Returns all attached media descriptions as a single text.
This is used for filtering
"""
post_attachments = get_post_attachments(post_json_object)
if not post_attachments:
2023-11-29 10:48:38 +00:00
return ''
descriptions = ''
for attach in post_attachments:
2023-11-29 10:45:39 +00:00
if not isinstance(attach, dict):
2023-11-29 11:23:39 +00:00
print('WARN: attachment is not a dict ' + str(attach))
2023-11-29 10:45:39 +00:00
continue
if not attach.get('name'):
continue
descriptions += attach['name'] + ' '
if attach.get('url'):
2023-12-09 14:18:24 +00:00
descriptions += get_url_from_post(attach['url']) + ' '
return descriptions.strip()
2024-04-23 19:46:30 +00:00
def _valid_summary(possible_summary: str) -> bool:
"""Returns true if the given summary field is valid
"""
if not isinstance(possible_summary, str):
return False
if len(possible_summary) < 2:
return False
return True
2022-01-28 10:07:35 +00:00
def get_summary_from_post(post_json_object: {}, system_language: str,
languages_understood: []) -> str:
"""Returns the summary from the post in the given language
including searching for a matching entry within summaryMap.
2022-01-28 10:07:35 +00:00
"""
summary_str = \
get_content_from_post(post_json_object, system_language,
languages_understood, 'summary')
if not summary_str:
# Also try the "name" field if summary is not available.
# See https://codeberg.org/
# fediverse/fep/src/branch/main/fep/b2b8/fep-b2b8.md
obj = post_json_object
if has_object_dict(post_json_object):
obj = post_json_object['object']
if obj.get('type'):
if isinstance(obj['type'], str):
if obj['type'] == 'Article':
summary_str = \
get_content_from_post(post_json_object,
system_language,
languages_understood, 'name')
2024-04-21 09:18:30 +00:00
if summary_str:
summary_str = summary_str.strip()
2024-04-23 19:46:30 +00:00
if not _valid_summary(summary_str):
2024-04-21 09:18:30 +00:00
summary_str = ''
return summary_str
2022-01-28 10:07:35 +00:00
2021-12-26 11:29:40 +00:00
def get_base_content_from_post(post_json_object: {},
system_language: str) -> str:
2021-07-19 19:40:04 +00:00
"""Returns the content from the post in the given language
"""
2021-12-26 10:54:58 +00:00
this_post_json = post_json_object
2021-12-26 10:57:03 +00:00
if has_object_dict(post_json_object):
2021-12-26 10:54:58 +00:00
this_post_json = post_json_object['object']
if 'contentMap' in this_post_json:
if isinstance(this_post_json['contentMap'], dict):
if this_post_json['contentMap'].get(system_language):
return this_post_json['contentMap'][system_language]
2023-01-08 22:23:02 +00:00
if 'content' not in this_post_json:
2021-07-19 19:40:04 +00:00
return ''
2021-12-26 10:54:58 +00:00
return this_post_json['content']
2021-07-19 19:40:04 +00:00
def data_dir_testing(base_dir: str) -> None:
"""During unit tests __accounts_data_path__ should not be retained
"""
global __accounts_data_path__
global __accounts_data_path_tests__
__accounts_data_path_tests__ = True
__accounts_data_path__ = base_dir + '/accounts'
print('Data directory is in testing mode')
def set_accounts_data_dir(base_dir: str, accounts_data_path: str) -> None:
"""Sets the directory used to store instance accounts data
"""
if not accounts_data_path:
return
accounts_data_path_filename = base_dir + '/data_path.txt'
if os.path.isfile(accounts_data_path_filename):
# read the existing path
path = None
try:
with open(accounts_data_path_filename, 'r',
2024-07-16 12:20:58 +00:00
encoding='utf-8') as fp_accounts:
path = fp_accounts.read()
except OSError:
print('EX: unable to read ' + accounts_data_path_filename)
if path.strip() == accounts_data_path:
# path is already set, so avoid writing it again
return
try:
with open(accounts_data_path_filename, 'w+',
2024-07-16 12:20:58 +00:00
encoding='utf-8') as fp_accounts:
fp_accounts.write(accounts_data_path)
except OSError:
print('EX: unable to write ' + accounts_data_path_filename)
def data_dir(base_dir: str) -> str:
2024-05-12 12:35:26 +00:00
"""Returns the directory where account data is stored
"""
global __accounts_data_path__
global __accounts_data_path_tests__
if __accounts_data_path_tests__:
__accounts_data_path__ = base_dir + '/accounts'
return __accounts_data_path__
if not __accounts_data_path__:
# the default path for accounts data
__accounts_data_path__ = base_dir + '/accounts'
# is an alternative path set?
accounts_data_path_filename = base_dir + '/data_path.txt'
if os.path.isfile(accounts_data_path_filename):
path = None
try:
with open(accounts_data_path_filename, 'r',
2024-07-16 12:20:58 +00:00
encoding='utf-8') as fp_accounts:
path = fp_accounts.read()
except OSError:
print('EX: unable to read ' + accounts_data_path_filename)
if path:
__accounts_data_path__ = path.strip()
print('Accounts data path set to ' + __accounts_data_path__)
return __accounts_data_path__
2024-05-12 12:35:26 +00:00
2021-12-26 12:02:29 +00:00
def acct_dir(base_dir: str, nickname: str, domain: str) -> str:
2024-05-12 12:35:26 +00:00
"""Returns the directory for an account on this instance
"""
return data_dir(base_dir) + '/' + nickname + '@' + domain
2021-07-13 21:59:53 +00:00
2022-12-18 15:29:54 +00:00
def acct_handle_dir(base_dir: str, handle: str) -> str:
2024-05-12 12:35:26 +00:00
"""Returns the directory for an account on this instance
"""
return data_dir(base_dir) + '/' + handle
2022-12-18 15:29:54 +00:00
2024-09-13 15:11:02 +00:00
def refresh_newswire(base_dir: str) -> None:
"""Causes the newswire to be updates after a change to user accounts
"""
2024-05-12 12:35:26 +00:00
refresh_newswire_filename = data_dir(base_dir) + '/.refresh_newswire'
2021-12-26 12:13:46 +00:00
if os.path.isfile(refresh_newswire_filename):
return
try:
with open(refresh_newswire_filename, 'w+',
2024-07-14 11:09:24 +00:00
encoding='utf-8') as fp_refresh:
fp_refresh.write('\n')
except OSError:
print('EX: refresh_newswire unable to write ' +
refresh_newswire_filename)
2021-12-26 12:13:46 +00:00
def get_sha_256(msg: str):
"""Returns a SHA256 hash of the given string
"""
digest = hashes.Hash(hashes.SHA256(), backend=default_backend())
digest.update(msg)
return digest.finalize()
2021-09-08 10:05:45 +00:00
2021-12-26 12:13:46 +00:00
def get_sha_512(msg: str):
2021-09-08 10:05:45 +00:00
"""Returns a SHA512 hash of the given string
"""
digest = hashes.Hash(hashes.SHA512(), backend=default_backend())
digest.update(msg)
return digest.finalize()
2019-07-02 09:25:29 +00:00
2020-04-04 13:44:49 +00:00
def local_network_host(host: str) -> bool:
"""Returns true if the given host is on the local network
"""
2021-01-25 11:51:42 +00:00
if host.startswith('localhost') or \
host.startswith('192.') or \
host.startswith('127.') or \
host.startswith('10.'):
return True
return False
2021-12-26 12:21:31 +00:00
def decoded_host(host: str) -> str:
"""Convert hostname to internationalized domain
https://en.wikipedia.org/wiki/Internationalized_domain_name
"""
if ':' not in host:
# eg. mydomain:8000
if not local_network_host(host):
if not host.endswith('.onion'):
if not host.endswith('.i2p'):
return idna.decode(host)
return host
2021-12-26 12:16:36 +00:00
def get_locked_account(actor_json: {}) -> bool:
"""Returns whether the given account requires follower approval
"""
2021-12-26 10:29:52 +00:00
if not actor_json.get('manuallyApprovesFollowers'):
return False
2021-12-26 10:29:52 +00:00
if actor_json['manuallyApprovesFollowers'] is True:
return True
return False
2021-12-26 12:31:47 +00:00
def has_users_path(path_str: str) -> bool:
2020-12-23 10:57:44 +00:00
"""Whether there is a /users/ path (or equivalent) in the given string
"""
2024-09-28 16:48:09 +00:00
if not path_str:
return False
2021-12-26 12:31:47 +00:00
users_list = get_user_paths()
for users_str in users_list:
if users_str in path_str:
2020-12-23 10:57:44 +00:00
return True
2021-12-26 12:31:47 +00:00
if '://' in path_str:
domain = path_str.split('://')[1]
2021-06-03 18:30:48 +00:00
if '/' in domain:
domain = domain.split('/')[0]
2021-12-26 12:31:47 +00:00
if '://' + domain + '/' not in path_str:
2021-06-03 18:30:48 +00:00
return False
2021-12-26 12:31:47 +00:00
nickname = path_str.split('://' + domain + '/')[1]
2021-06-03 18:30:48 +00:00
if '/' in nickname or '.' in nickname:
return False
return True
2020-12-23 10:57:44 +00:00
return False
2021-12-26 12:37:53 +00:00
def valid_post_date(published: str, max_age_days: int, debug: bool) -> bool:
"""Returns true if the published date is recent and is not in the future
"""
2023-11-20 22:27:58 +00:00
baseline_time = date_epoch()
2023-11-20 22:27:58 +00:00
days_diff = date_utcnow() - baseline_time
2021-12-27 16:18:52 +00:00
now_days_since_epoch = days_diff.days
2023-11-20 22:27:58 +00:00
post_time_object = \
date_from_string_format(published, ["%Y-%m-%dT%H:%M:%S%z"])
if not post_time_object:
2021-10-29 14:33:52 +00:00
if debug:
2021-12-26 12:31:47 +00:00
print('EX: valid_post_date invalid published date ' +
str(published))
2021-01-09 10:23:05 +00:00
return False
2021-12-26 12:45:03 +00:00
days_diff = post_time_object - baseline_time
post_days_since_epoch = days_diff.days
2021-12-26 12:45:03 +00:00
if post_days_since_epoch > now_days_since_epoch:
2021-03-14 19:53:22 +00:00
if debug:
print("Inbox post has a published date in the future!")
return False
2021-12-26 12:45:03 +00:00
if now_days_since_epoch - post_days_since_epoch >= max_age_days:
2021-03-14 19:53:22 +00:00
if debug:
print("Inbox post is not recent enough")
return False
return True
2021-12-26 12:45:03 +00:00
def get_full_domain(domain: str, port: int) -> str:
2020-12-16 10:30:54 +00:00
"""Returns the full domain name, including port number
"""
if not port:
return domain
if ':' in domain:
return domain
2021-12-27 16:18:52 +00:00
if port in (80, 443):
2020-12-16 10:30:54 +00:00
return domain
return domain + ':' + str(port)
2021-12-26 14:20:09 +00:00
def get_video_extensions() -> []:
2021-08-03 09:09:04 +00:00
"""Returns a list of the possible video file extensions
"""
return ('mp4', 'webm', 'ogv')
2021-12-26 14:24:03 +00:00
def get_audio_extensions() -> []:
2021-08-03 09:09:04 +00:00
"""Returns a list of the possible audio file extensions
"""
2022-10-31 11:05:11 +00:00
return ('mp3', 'ogg', 'flac', 'opus', 'spx', 'wav')
2021-08-03 09:09:04 +00:00
2021-12-26 14:26:16 +00:00
def get_image_extensions() -> []:
2020-11-21 11:21:05 +00:00
"""Returns a list of the possible image file extensions
"""
2022-10-31 17:26:31 +00:00
return ('jpg', 'jpeg', 'gif', 'webp', 'avif', 'heic',
'svg', 'ico', 'jxl', 'png')
2020-11-21 11:21:05 +00:00
2024-02-05 20:05:00 +00:00
def image_mime_types_dict() -> {}:
"""Returns a dict of image mime types
2021-07-09 20:53:49 +00:00
"""
2024-02-05 20:05:00 +00:00
return {
2021-07-09 20:53:49 +00:00
'png': 'png',
'jpg': 'jpeg',
2024-02-05 20:05:00 +00:00
'jpeg': 'jpeg',
2022-02-06 11:04:49 +00:00
'jxl': 'jxl',
2021-07-09 20:53:49 +00:00
'gif': 'gif',
'avif': 'avif',
2022-10-31 17:26:31 +00:00
'heic': 'heic',
2021-07-09 20:53:49 +00:00
'svg': 'svg+xml',
2021-12-17 12:01:54 +00:00
'webp': 'webp',
'ico': 'x-icon'
2021-07-09 20:53:49 +00:00
}
2024-02-05 20:05:00 +00:00
def get_image_mime_type(image_filename: str) -> str:
"""Returns the mime type for the given image filename
"""
extensions_to_mime = image_mime_types_dict()
2021-12-27 16:18:52 +00:00
for ext, mime_ext in extensions_to_mime.items():
2021-12-26 14:42:21 +00:00
if image_filename.endswith('.' + ext):
2021-12-26 15:44:28 +00:00
return 'image/' + mime_ext
2021-07-09 20:53:49 +00:00
return 'image/png'
2021-12-27 16:02:54 +00:00
def get_image_extension_from_mime_type(content_type: str) -> str:
2021-07-09 20:53:49 +00:00
"""Returns the image extension from a mime type, such as image/jpeg
"""
2021-12-26 15:23:01 +00:00
image_media = {
2021-07-09 20:53:49 +00:00
'png': 'png',
'jpeg': 'jpg',
2022-02-06 11:04:49 +00:00
'jxl': 'jxl',
2021-07-09 20:53:49 +00:00
'gif': 'gif',
'svg+xml': 'svg',
'webp': 'webp',
2021-12-17 12:01:54 +00:00
'avif': 'avif',
2022-10-31 17:26:31 +00:00
'heic': 'heic',
2021-12-17 12:01:54 +00:00
'x-icon': 'ico'
2021-07-09 20:53:49 +00:00
}
2021-12-27 16:02:54 +00:00
for mime_ext, ext in image_media.items():
if content_type.endswith(mime_ext):
2021-07-09 20:53:49 +00:00
return ext
return 'png'
2021-12-26 14:39:49 +00:00
def get_media_extensions() -> []:
2020-11-21 11:54:29 +00:00
"""Returns a list of the possible media file extensions
"""
2021-12-26 14:26:16 +00:00
return get_image_extensions() + \
2021-12-26 14:24:03 +00:00
get_video_extensions() + get_audio_extensions()
2020-11-21 11:54:29 +00:00
2021-12-26 15:44:28 +00:00
def get_image_formats() -> str:
2020-11-21 11:21:05 +00:00
"""Returns a string of permissable image formats
used when selecting an image for a new post
"""
2021-12-27 16:02:54 +00:00
image_ext = get_image_extensions()
image_formats = ''
for ext in image_ext:
if image_formats:
image_formats += ', '
image_formats += '.' + ext
return image_formats
2020-11-21 11:21:05 +00:00
2021-12-27 15:56:15 +00:00
def get_media_formats() -> str:
2020-11-21 11:54:29 +00:00
"""Returns a string of permissable media formats
used when selecting an attachment for a new post
"""
2021-12-26 15:44:28 +00:00
media_ext = get_media_extensions()
2020-11-21 11:54:29 +00:00
2021-12-27 15:58:46 +00:00
media_formats = ''
2021-12-26 15:44:28 +00:00
for ext in media_ext:
2021-12-27 15:58:46 +00:00
if media_formats:
media_formats += ', '
media_formats += '.' + ext
return media_formats
2020-11-21 11:54:29 +00:00
2021-12-27 15:43:22 +00:00
def remove_html(content: str) -> str:
"""Removes html links from the given content.
Used to ensure that profile descriptions don't contain dubious content
"""
if '<' not in content:
return content
removing = False
2024-08-08 17:23:33 +00:00
replacements = {
'<a href': ' <a href',
'<q>': '"',
'</q>': '"',
'</p>': '\n\n',
'<br>': '\n'
}
content = replace_strings(content, replacements)
result = ''
2021-12-27 17:16:57 +00:00
for char in content:
if char == '<':
removing = True
2021-12-27 17:16:57 +00:00
elif char == '>':
removing = False
elif not removing:
2021-12-27 17:16:57 +00:00
result += char
2021-03-23 11:22:09 +00:00
2021-12-27 15:56:15 +00:00
plain_text = result.replace(' ', ' ')
2021-03-23 10:38:03 +00:00
# insert spaces after full stops
2021-12-27 17:16:57 +00:00
str_len = len(plain_text)
2021-03-23 10:38:03 +00:00
result = ''
2021-12-27 17:16:57 +00:00
for i in range(str_len):
2021-12-27 15:56:15 +00:00
result += plain_text[i]
2021-12-27 17:16:57 +00:00
if plain_text[i] == '.' and i < str_len - 1:
2021-12-27 15:56:15 +00:00
if plain_text[i + 1] >= 'A' and plain_text[i + 1] <= 'Z':
2021-03-23 10:38:03 +00:00
result += ' '
2021-03-23 11:22:09 +00:00
result = result.replace(' ', ' ').strip()
return result
def remove_style_within_html(content: str) -> str:
"""Removes style="something" within html post content.
Used to ensure that styles
"""
if '<' not in content:
return content
if ' style="' not in content:
return content
sections = content.split(' style="')
result = ''
ctr = 0
for section_text in sections:
if ctr > 0:
result += section_text.split('"', 1)[1]
else:
result = section_text
ctr = 1
return result
2021-12-27 15:52:08 +00:00
def first_paragraph_from_string(content: str) -> str:
2020-11-08 10:52:07 +00:00
"""Get the first paragraph from a blog post
to be used as a summary in the newswire feed
"""
if '<p>' not in content or '</p>' not in content:
2021-12-27 15:43:22 +00:00
return remove_html(content)
2020-11-08 10:52:07 +00:00
paragraph = content.split('<p>')[1]
if '</p>' in paragraph:
paragraph = paragraph.split('</p>')[0]
2021-12-27 15:43:22 +00:00
return remove_html(paragraph)
2020-11-08 10:52:07 +00:00
2023-08-31 22:29:10 +00:00
def get_memorials(base_dir: str) -> str:
"""Returns the nicknames for memorial accounts
"""
2024-05-12 12:35:26 +00:00
memorial_file = data_dir(base_dir) + '/memorial'
2023-08-31 22:29:10 +00:00
if not os.path.isfile(memorial_file):
return ''
memorial_str = ''
try:
with open(memorial_file, 'r', encoding='utf-8') as fp_memorial:
memorial_str = fp_memorial.read()
except OSError:
print('EX: unable to read ' + memorial_file)
return memorial_str
2023-08-31 22:38:09 +00:00
def set_memorials(base_dir: str, domain: str, memorial_str) -> None:
2023-08-31 22:29:10 +00:00
"""Sets the nicknames for memorial accounts
"""
2023-08-31 22:38:09 +00:00
# check that the accounts exist
memorial_list = memorial_str.split('\n')
new_memorial_str = ''
for memorial_item in memorial_list:
memorial_nick = memorial_item.strip()
check_dir = acct_dir(base_dir, memorial_nick, domain)
if os.path.isdir(check_dir):
new_memorial_str += memorial_nick + '\n'
memorial_str = new_memorial_str
# save the accounts
2024-05-12 12:35:26 +00:00
memorial_file = data_dir(base_dir) + '/memorial'
2023-08-31 22:29:10 +00:00
try:
with open(memorial_file, 'w+', encoding='utf-8') as fp_memorial:
fp_memorial.write(memorial_str)
except OSError:
print('EX: unable to write ' + memorial_file)
2021-12-26 14:37:28 +00:00
def _create_config(base_dir: str) -> None:
2020-10-06 08:58:44 +00:00
"""Creates a configuration file
"""
2021-12-26 14:37:28 +00:00
config_filename = base_dir + '/config.json'
if os.path.isfile(config_filename):
2020-10-06 08:58:44 +00:00
return
config_json = {}
2021-12-26 14:47:21 +00:00
save_json(config_json, config_filename)
2020-10-06 08:58:44 +00:00
2021-12-27 20:38:02 +00:00
def set_config_param(base_dir: str, variable_name: str,
variable_value) -> None:
2020-10-06 08:58:44 +00:00
"""Sets a configuration value
"""
2021-12-26 14:37:28 +00:00
_create_config(base_dir)
config_filename = base_dir + '/config.json'
2021-12-27 20:38:02 +00:00
config_json = {}
2021-12-26 14:37:28 +00:00
if os.path.isfile(config_filename):
2021-12-27 20:38:02 +00:00
config_json = load_json(config_filename)
variable_name = _convert_to_camel_case(variable_name)
2021-12-27 20:38:02 +00:00
config_json[variable_name] = variable_value
save_json(config_json, config_filename)
2020-10-06 08:58:44 +00:00
2024-09-13 15:11:02 +00:00
def get_config_param(base_dir: str, variable_name: str) -> str:
2020-10-06 08:58:44 +00:00
"""Gets a configuration value
"""
2021-12-26 14:37:28 +00:00
_create_config(base_dir)
config_filename = base_dir + '/config.json'
2021-12-27 17:16:57 +00:00
config_json = load_json(config_filename)
if config_json:
variable_name = _convert_to_camel_case(variable_name)
2021-12-27 17:16:57 +00:00
if variable_name in config_json:
return config_json[variable_name]
2020-10-06 08:58:44 +00:00
return None
2021-12-27 13:58:17 +00:00
def get_followers_list(base_dir: str,
nickname: str, domain: str,
follow_file='following.txt') -> []:
2020-09-25 14:14:59 +00:00
"""Returns a list of followers for the given account
"""
2021-12-27 13:58:17 +00:00
filename = acct_dir(base_dir, nickname, domain) + '/' + follow_file
2020-09-25 14:14:59 +00:00
if not os.path.isfile(filename):
return []
2024-12-23 18:23:47 +00:00
lines: list[str] = []
2024-07-18 12:43:45 +00:00
try:
with open(filename, 'r', encoding='utf-8') as fp_foll:
lines = fp_foll.readlines()
except OSError:
print('EX: get_followers_list unable to read ' + filename)
if lines:
2022-01-08 10:58:54 +00:00
for i, _ in enumerate(lines):
2020-09-25 14:14:59 +00:00
lines[i] = lines[i].strip()
return lines
return []
2021-12-27 11:31:04 +00:00
def get_followers_of_person(base_dir: str,
nickname: str, domain: str,
follow_file='following.txt') -> []:
2020-09-25 13:21:56 +00:00
"""Returns a list containing the followers of the given person
Used by the shared inbox to know who to send incoming mail to
"""
2024-12-23 18:23:47 +00:00
followers: list[str] = []
2021-12-26 18:17:37 +00:00
domain = remove_domain_port(domain)
2020-09-25 13:21:56 +00:00
handle = nickname + '@' + domain
2022-12-18 15:29:54 +00:00
handle_dir = acct_handle_dir(base_dir, handle)
if not os.path.isdir(handle_dir):
2020-09-25 13:21:56 +00:00
return followers
2024-05-12 12:35:26 +00:00
dir_str = data_dir(base_dir)
for subdir, dirs, _ in os.walk(dir_str):
2020-09-25 13:21:56 +00:00
for account in dirs:
2021-12-27 11:31:04 +00:00
filename = os.path.join(subdir, account) + '/' + follow_file
2021-04-21 16:09:56 +00:00
if account == handle or \
account.startswith('inbox@') or \
2022-02-10 15:07:09 +00:00
account.startswith('Actor@') or \
2021-04-21 16:09:56 +00:00
account.startswith('news@'):
2020-09-25 13:21:56 +00:00
continue
if not os.path.isfile(filename):
continue
2024-07-18 12:55:47 +00:00
try:
with open(filename, 'r', encoding='utf-8') as fp_following:
for following_handle in fp_following:
following_handle2 = remove_eol(following_handle)
if following_handle2 != handle:
continue
2020-09-25 13:21:56 +00:00
if account not in followers:
followers.append(account)
break
2024-07-18 12:55:47 +00:00
except OSError as exc:
print('EX: get_followers_of_person unable to read ' +
filename + ' ' + str(exc))
2020-12-13 22:13:45 +00:00
break
2020-09-25 13:21:56 +00:00
return followers
2021-12-27 11:31:04 +00:00
def remove_id_ending(id_str: str) -> str:
2020-08-23 11:13:35 +00:00
"""Removes endings such as /activity and /undo
"""
2021-12-27 11:31:04 +00:00
if id_str.endswith('/activity'):
id_str = id_str[:-len('/activity')]
elif id_str.endswith('/undo'):
id_str = id_str[:-len('/undo')]
elif id_str.endswith('/event'):
id_str = id_str[:-len('/event')]
elif id_str.endswith('/replies'):
id_str = id_str[:-len('/replies')]
2022-11-13 20:25:07 +00:00
elif id_str.endswith('/delete'):
id_str = id_str[:-len('/delete')]
2022-11-13 20:33:24 +00:00
elif id_str.endswith('/update'):
id_str = id_str[:-len('/update')]
2021-12-27 11:31:04 +00:00
if id_str.endswith('#Create'):
id_str = id_str.split('#Create')[0]
2022-11-13 20:25:07 +00:00
elif id_str.endswith('#delete'):
id_str = id_str.split('#delete')[0]
2023-05-05 10:35:22 +00:00
elif '#update' in id_str:
2022-11-13 20:33:24 +00:00
id_str = id_str.split('#update')[0]
elif '#moved' in id_str:
id_str = id_str.split('#moved')[0]
elif '#primary' in id_str:
id_str = id_str.split('#primary')[0]
elif '#reciprocal' in id_str:
id_str = id_str.split('#reciprocal')[0]
2021-12-27 11:31:04 +00:00
return id_str
2020-08-23 11:13:35 +00:00
2021-12-27 17:16:57 +00:00
def remove_hash_from_post_id(post_id: str) -> str:
2021-12-06 12:42:05 +00:00
"""Removes any has from a post id
"""
2021-12-26 19:47:06 +00:00
if '#' not in post_id:
return post_id
return post_id.split('#')[0]
2021-12-06 12:42:05 +00:00
2021-12-27 17:20:01 +00:00
def get_protocol_prefixes() -> []:
2020-06-11 12:26:15 +00:00
"""Returns a list of valid prefixes
"""
2021-01-02 10:37:19 +00:00
return ('https://', 'http://', 'ftp://',
'dat://', 'i2p://', 'gnunet://',
2022-04-29 13:54:13 +00:00
'ipfs://', 'ipns://',
2020-06-11 12:26:15 +00:00
'hyper://', 'gemini://', 'gopher://')
2021-12-27 17:32:34 +00:00
def get_link_prefixes() -> []:
2020-06-11 12:26:15 +00:00
"""Returns a list of valid web link prefixes
"""
2021-01-02 10:37:19 +00:00
return ('https://', 'http://', 'ftp://',
2021-09-21 10:24:42 +00:00
'dat://', 'i2p://', 'gnunet://', 'payto://',
2020-06-11 12:26:15 +00:00
'hyper://', 'gemini://', 'gopher://', 'briar:')
2021-12-27 20:43:15 +00:00
def remove_avatar_from_cache(base_dir: str, actor_str: str) -> None:
"""Removes any existing avatar entries from the cache
This avoids duplicate entries with differing extensions
"""
2021-12-27 17:35:58 +00:00
avatar_filename_extensions = get_image_extensions()
for extension in avatar_filename_extensions:
avatar_filename = \
2021-12-27 20:43:15 +00:00
base_dir + '/cache/avatars/' + actor_str + '.' + extension
2024-07-18 12:55:47 +00:00
if not os.path.isfile(avatar_filename):
continue
try:
os.remove(avatar_filename)
except OSError:
print('EX: remove_avatar_from_cache ' +
'unable to delete cached avatar ' +
str(avatar_filename))
2020-04-04 13:44:49 +00:00
2021-12-26 15:13:34 +00:00
def save_json(json_object: {}, filename: str) -> bool:
2019-10-22 11:55:06 +00:00
"""Saves json to a file
"""
2024-04-05 10:32:35 +00:00
if not isinstance(json_object, dict):
2024-11-18 12:58:18 +00:00
if not isinstance(json_object, list):
print('EX: save_json object is not json ' + str(json_object))
return False
2024-04-05 10:32:35 +00:00
2023-05-17 12:02:16 +00:00
tries = 1
while tries <= 5:
try:
2024-07-14 11:09:24 +00:00
with open(filename, 'w+', encoding='utf-8') as fp_json:
fp_json.write(json.dumps(json_object))
return True
2024-04-10 17:05:44 +00:00
except OSError as exc:
2024-04-05 10:32:35 +00:00
print('EX: save_json ' + str(tries) + ' ' + str(filename) +
2024-04-10 17:05:44 +00:00
' ' + str(exc))
if exc.errno == 36:
# filename too long
break
time.sleep(1)
tries += 1
2019-10-22 11:55:06 +00:00
return False
2020-04-04 13:44:49 +00:00
2024-06-20 10:47:58 +00:00
def load_json(filename: str) -> {}:
2019-10-22 11:55:06 +00:00
"""Makes a few attempts to load a json formatted file
"""
2022-02-04 17:28:14 +00:00
if '/Actor@' in filename:
filename = filename.replace('/Actor@', '/inbox@')
2024-06-20 10:47:58 +00:00
json_object = None
data = None
2024-06-20 10:47:58 +00:00
# load from file
try:
2024-07-14 10:56:31 +00:00
with open(filename, 'r', encoding='utf-8') as fp_json:
data = fp_json.read()
2024-06-20 10:47:58 +00:00
except OSError as exc:
print('EX: load_json exception ' + str(filename) + ' ' + str(exc))
return json_object
# check that something was loaded
if not data:
print('EX: load_json no data ' + str(filename))
return json_object
# convert to json
try:
json_object = json.loads(data)
except BaseException as exc:
print('EX: load_json exception ' + str(filename) + ' ' + str(exc))
2021-12-27 17:42:35 +00:00
return json_object
2019-10-22 11:55:06 +00:00
2020-04-04 13:44:49 +00:00
2021-12-26 15:13:34 +00:00
def load_json_onionify(filename: str, domain: str, onion_domain: str,
2021-12-26 15:28:08 +00:00
delay_sec: int = 2) -> {}:
2020-03-02 14:35:44 +00:00
"""Makes a few attempts to load a json formatted file
This also converts the domain name to the onion domain
"""
2022-02-04 17:34:00 +00:00
if '/Actor@' in filename:
filename = filename.replace('/Actor@', '/inbox@')
2021-12-27 17:42:35 +00:00
json_object = None
2020-04-04 13:44:49 +00:00
tries = 0
while tries < 5:
2020-03-02 14:35:44 +00:00
try:
2024-07-14 10:56:31 +00:00
with open(filename, 'r', encoding='utf-8') as fp_json:
data = fp_json.read()
if data:
2021-12-25 20:43:43 +00:00
data = data.replace(domain, onion_domain)
data = data.replace('https:', 'http:')
2021-12-27 17:42:35 +00:00
json_object = json.loads(data)
break
2020-04-04 13:44:49 +00:00
except BaseException:
2021-12-26 15:13:34 +00:00
print('EX: load_json_onionify exception ' + str(filename))
2021-12-26 15:28:08 +00:00
if delay_sec > 0:
time.sleep(delay_sec)
2020-04-04 13:44:49 +00:00
tries += 1
2021-12-27 17:42:35 +00:00
return json_object
2020-03-02 14:35:44 +00:00
2020-04-04 13:44:49 +00:00
2021-12-27 17:46:27 +00:00
def get_status_number(published_str: str = None) -> (str, str):
2019-07-02 09:25:29 +00:00
"""Returns the status number and published date
"""
2021-12-27 17:46:27 +00:00
if not published_str:
2023-11-20 22:27:58 +00:00
curr_time = date_utcnow()
2020-10-07 16:55:15 +00:00
else:
2021-12-26 13:17:46 +00:00
curr_time = \
2023-11-20 22:27:58 +00:00
date_from_string_format(published_str, ['%Y-%m-%dT%H:%M:%S%z'])
days_since_epoch = (curr_time - date_epoch()).days
2019-07-02 09:25:29 +00:00
# status is the number of seconds since epoch
2021-12-27 17:46:27 +00:00
status_number = \
2021-12-26 13:17:46 +00:00
str(((days_since_epoch * 24 * 60 * 60) +
(curr_time.hour * 60 * 60) +
(curr_time.minute * 60) +
curr_time.second) * 1000 +
int(curr_time.microsecond / 1000))
2020-04-04 13:44:49 +00:00
# See https://github.com/tootsuite/mastodon/blob/
# 995f8b389a66ab76ec92d9a240de376f1fc13a38/lib/mastodon/snowflake.rb
2019-10-12 12:45:53 +00:00
# use the leftover microseconds as the sequence number
2021-12-27 17:46:27 +00:00
sequence_id = curr_time.microsecond % 1000
2019-10-12 12:45:53 +00:00
# shift by 16bits "sequence data"
2021-12-27 17:46:27 +00:00
status_number = str((int(status_number) << 16) + sequence_id)
2021-12-26 13:17:46 +00:00
published = curr_time.strftime("%Y-%m-%dT%H:%M:%SZ")
2021-12-27 17:46:27 +00:00
return status_number, published
2020-04-04 13:44:49 +00:00
2019-07-02 09:25:29 +00:00
2021-12-27 17:46:27 +00:00
def evil_incarnate() -> []:
2021-12-27 20:43:15 +00:00
"""Hardcoded blocked domains
"""
return ('fedilist.com', 'gab.com', 'gabfed.com', 'spinster.xyz')
2020-04-04 13:44:49 +00:00
2020-03-28 10:33:04 +00:00
2021-12-27 17:53:41 +00:00
def contains_invalid_chars(json_str: str) -> bool:
2020-10-15 08:59:08 +00:00
"""Does the given json string contain invalid characters?
"""
2021-12-27 17:53:41 +00:00
for is_invalid in INVALID_CHARACTERS:
if is_invalid in json_str:
2020-10-15 08:59:08 +00:00
return True
return False
2022-12-26 10:49:41 +00:00
def contains_invalid_actor_url_chars(url: str) -> bool:
"""Does the given actor url contain invalid characters?
"""
for is_invalid in INVALID_ACTOR_URL_CHARACTERS:
if is_invalid in url:
return True
return contains_invalid_chars(url)
2021-12-27 19:33:45 +00:00
def remove_invalid_chars(text: str) -> str:
2021-02-11 10:33:56 +00:00
"""Removes any invalid characters from a string
"""
2021-12-27 17:57:27 +00:00
for is_invalid in INVALID_CHARACTERS:
if is_invalid not in text:
2021-02-11 10:33:56 +00:00
continue
2021-12-27 17:57:27 +00:00
text = text.replace(is_invalid, '')
2021-02-11 10:33:56 +00:00
return text
2021-12-27 19:26:54 +00:00
def create_person_dir(nickname: str, domain: str, base_dir: str,
dir_name: str) -> str:
2019-07-04 10:02:56 +00:00
"""Create a directory for a person
2019-07-02 09:25:29 +00:00
"""
2020-04-04 13:44:49 +00:00
handle = nickname + '@' + domain
2022-12-18 15:29:54 +00:00
handle_dir = acct_handle_dir(base_dir, handle)
if not os.path.isdir(handle_dir):
os.mkdir(handle_dir)
box_dir = acct_handle_dir(base_dir, handle) + '/' + dir_name
2021-12-27 17:57:27 +00:00
if not os.path.isdir(box_dir):
os.mkdir(box_dir)
return box_dir
2019-07-04 10:02:56 +00:00
2020-04-04 13:44:49 +00:00
2021-12-27 17:57:27 +00:00
def create_outbox_dir(nickname: str, domain: str, base_dir: str) -> str:
2019-07-04 10:02:56 +00:00
"""Create an outbox for a person
"""
2021-12-27 19:26:54 +00:00
return create_person_dir(nickname, domain, base_dir, 'outbox')
2019-07-04 10:02:56 +00:00
2020-04-04 13:44:49 +00:00
2021-12-27 18:00:51 +00:00
def create_inbox_queue_dir(nickname: str, domain: str, base_dir: str) -> str:
2019-07-04 10:02:56 +00:00
"""Create an inbox queue and returns the feed filename and directory
"""
2021-12-27 19:26:54 +00:00
return create_person_dir(nickname, domain, base_dir, 'queue')
2020-04-04 13:44:49 +00:00
2019-07-02 10:39:55 +00:00
2021-12-27 19:26:54 +00:00
def domain_permitted(domain: str, federation_list: []) -> bool:
"""Is the given domain permitted according to the federation list?
"""
2021-12-25 23:45:30 +00:00
if len(federation_list) == 0:
2019-07-02 10:39:55 +00:00
return True
2021-12-26 18:17:37 +00:00
domain = remove_domain_port(domain)
2021-12-25 23:45:30 +00:00
if domain in federation_list:
2019-07-02 10:39:55 +00:00
return True
return False
2020-04-04 13:44:49 +00:00
2021-12-27 20:43:15 +00:00
def get_local_network_addresses() -> []:
2021-02-15 10:06:49 +00:00
"""Returns patterns for local network address detection
"""
return ('localhost', '127.0.', '192.168', '10.0.')
2022-03-22 18:22:09 +00:00
def _is_dangerous_string_tag(content: str, allow_local_network_access: bool,
separators: [], invalid_strings: []) -> bool:
"""Returns true if the given string is dangerous
"""
2021-12-27 21:42:08 +00:00
for separator_style in separators:
start_char = separator_style[0]
end_char = separator_style[1]
if start_char not in content:
continue
2021-12-27 21:42:08 +00:00
if end_char not in content:
2021-05-19 11:29:37 +00:00
continue
2021-12-27 21:42:08 +00:00
content_sections = content.split(start_char)
invalid_partials = ()
2021-12-25 18:54:50 +00:00
if not allow_local_network_access:
2021-12-27 21:42:08 +00:00
invalid_partials = get_local_network_addresses()
for markup in content_sections:
if end_char not in markup:
2021-05-19 11:29:37 +00:00
continue
2021-12-27 21:42:08 +00:00
markup = markup.split(end_char)[0].strip()
for partial_match in invalid_partials:
if partial_match in markup:
return True
2021-05-19 11:29:37 +00:00
if ' ' not in markup:
2021-12-27 21:42:08 +00:00
for bad_str in invalid_strings:
2022-03-22 18:22:09 +00:00
if not bad_str.endswith('-'):
if bad_str in markup:
return True
else:
if markup.startswith(bad_str):
return True
2021-05-19 11:29:37 +00:00
else:
2021-12-27 21:42:08 +00:00
for bad_str in invalid_strings:
2022-03-22 18:22:09 +00:00
if not bad_str.endswith('-'):
if bad_str + ' ' in markup:
return True
else:
if markup.startswith(bad_str):
return True
return False
def _is_dangerous_string_simple(content: str, allow_local_network_access: bool,
separators: [], invalid_strings: []) -> bool:
"""Returns true if the given string is dangerous
"""
for separator_style in separators:
start_char = separator_style[0]
end_char = separator_style[1]
if start_char not in content:
continue
if end_char not in content:
continue
content_sections = content.split(start_char)
invalid_partials = ()
if not allow_local_network_access:
invalid_partials = get_local_network_addresses()
for markup in content_sections:
if end_char not in markup:
continue
markup = markup.split(end_char)[0].strip()
for partial_match in invalid_partials:
if partial_match in markup:
return True
for bad_str in invalid_strings:
if bad_str in markup:
return True
return False
2023-01-19 15:17:20 +00:00
def html_tag_has_closing(tag_name: str, content: str) -> bool:
2022-07-17 17:44:52 +00:00
"""Does the given tag have opening and closing labels?
"""
content_lower = content.lower()
if '<' + tag_name not in content_lower:
return True
sections = content_lower.split('<' + tag_name)
ctr = 0
end_tag = '</' + tag_name + '>'
for section in sections:
if ctr == 0:
ctr += 1
continue
# check that an ending tag exists
if end_tag not in section:
return False
2023-04-28 11:38:32 +00:00
if tag_name in ('code', 'pre'):
# check that lines are not too long
2023-01-19 15:17:20 +00:00
section = section.split(end_tag)[0]
2023-04-28 10:13:07 +00:00
section = section.replace('<br>', '\n')
code_lines = section.split('\n')
for line in code_lines:
if len(line) >= 60:
2023-04-28 11:38:32 +00:00
print('<code> or <pre> line too long')
return False
ctr += 1
return True
def dangerous_markup(content: str, allow_local_network_access: bool,
allow_tags: []) -> bool:
"""Returns true if the given content contains dangerous html markup
"""
2023-05-30 09:07:51 +00:00
if '.svg' in content.lower():
return True
separators = [['<', '>'], ['&lt;', '&gt;']]
2022-03-22 18:22:09 +00:00
invalid_strings = [
2022-12-12 21:49:03 +00:00
'ampproject', 'googleapis', '_exec(', ' id=', ' name='
2022-03-22 18:22:09 +00:00
]
if _is_dangerous_string_simple(content, allow_local_network_access,
separators, invalid_strings):
return True
2023-04-28 11:42:25 +00:00
for closing_tag in ('code', 'pre'):
if not html_tag_has_closing(closing_tag, content):
return True
2021-12-27 21:42:08 +00:00
invalid_strings = [
2023-04-28 11:38:32 +00:00
'script', 'noscript', 'canvas', 'style', 'abbr', 'input',
'frame', 'iframe', 'html', 'body', 'hr', 'allow-popups',
'allow-scripts', 'amp-', '?php', 'pre'
2022-03-22 18:25:42 +00:00
]
for allowed in allow_tags:
if allowed in invalid_strings:
invalid_strings.remove(allowed)
2022-03-22 18:22:09 +00:00
return _is_dangerous_string_tag(content, allow_local_network_access,
2022-03-22 18:25:42 +00:00
separators, invalid_strings)
2021-12-27 21:44:48 +00:00
def dangerous_svg(content: str, allow_local_network_access: bool) -> bool:
"""Returns true if the given svg file content contains dangerous scripts
"""
separators = [['<', '>'], ['&lt;', '&gt;']]
2021-12-27 21:44:48 +00:00
invalid_strings = [
'script'
]
2022-03-22 18:22:09 +00:00
return _is_dangerous_string_tag(content, allow_local_network_access,
separators, invalid_strings)
2024-05-26 12:24:08 +00:00
def _get_statuses_list() -> []:
2023-08-14 19:01:15 +00:00
"""Returns a list of statuses path strings
"""
2024-05-26 10:43:57 +00:00
return ('/statuses/', '/objects/', '/honk/', '/p/', '/h/', '/api/posts/',
2024-05-30 08:30:05 +00:00
'/note/', '/notes/', '/comment/', '/post/', '/item/', '/videos/',
2024-06-26 19:06:43 +00:00
'/button/', '/x/', '/o/', '/posts/', '/items/', '/object/', '/r/',
2024-08-31 10:31:11 +00:00
'/content/', '/federation/', '/elsewhere/', '/article/',
2024-09-19 09:49:40 +00:00
'/activity/', '/blog/', '/app.bsky.feed.post/')
2023-08-14 19:01:15 +00:00
def contains_statuses(url: str) -> bool:
"""Whether the given url contains /statuses/
"""
2024-05-26 12:24:08 +00:00
statuses_list = _get_statuses_list()
2023-08-14 19:01:15 +00:00
for status_str in statuses_list:
if status_str in url:
return True
2024-05-26 16:23:13 +00:00
# wordpress-style blog post
today = datetime.date.today()
if '/' + str(today.year) + '/' in url or \
'/' + str(today.year - 1) + '/' in url:
return True
2023-08-14 19:01:15 +00:00
return False
2023-08-03 17:31:47 +00:00
def get_actor_from_post_id(post_id: str) -> str:
2023-08-14 19:01:15 +00:00
"""Returns an actor url from a post id containing /statuses/ or equivalent
2023-08-03 17:31:47 +00:00
eg. https://somedomain/users/nick/statuses/123 becomes
https://somedomain/users/nick
"""
actor = post_id
2024-05-26 12:24:08 +00:00
statuses_list = _get_statuses_list()
2023-08-14 19:01:15 +00:00
pixelfed_style_statuses = ['/p/']
for status_str in statuses_list:
2023-08-14 19:07:14 +00:00
if status_str not in actor:
continue
2023-08-14 19:01:15 +00:00
if status_str in pixelfed_style_statuses:
# pixelfed style post id
nick = actor.split(status_str)[1]
if '/' in nick:
nick = nick.split('/')[0]
actor = actor.split(status_str)[0] + '/users/' + nick
break
2023-08-14 19:11:58 +00:00
if has_users_path(actor):
2023-08-14 19:07:14 +00:00
actor = actor.split(status_str)[0]
break
2023-08-03 17:31:47 +00:00
return actor
2021-12-27 21:59:07 +00:00
def get_display_name(base_dir: str, actor: str, person_cache: {}) -> str:
"""Returns the display name for the given actor
2019-08-22 12:41:16 +00:00
"""
2023-08-03 17:31:47 +00:00
actor = get_actor_from_post_id(actor)
2021-12-25 22:17:49 +00:00
if not person_cache.get(actor):
2019-08-22 13:29:57 +00:00
return None
2021-12-27 22:12:29 +00:00
name_found = None
2021-12-25 22:17:49 +00:00
if person_cache[actor].get('actor'):
if person_cache[actor]['actor'].get('name'):
2021-12-27 22:12:29 +00:00
name_found = person_cache[actor]['actor']['name']
else:
# Try to obtain from the cached actors
2021-12-27 22:12:29 +00:00
cached_actor_filename = \
2021-12-25 16:17:53 +00:00
base_dir + '/cache/actors/' + (actor.replace('/', '#')) + '.json'
2021-12-27 22:12:29 +00:00
if os.path.isfile(cached_actor_filename):
2024-06-20 10:47:58 +00:00
actor_json = load_json(cached_actor_filename)
2021-12-26 10:29:52 +00:00
if actor_json:
if actor_json.get('name'):
2021-12-27 22:12:29 +00:00
name_found = actor_json['name']
if name_found:
if dangerous_markup(name_found, False, []):
2021-12-27 22:12:29 +00:00
name_found = "*ADVERSARY*"
return standardize_text(name_found)
2019-08-22 12:41:16 +00:00
2020-04-04 13:44:49 +00:00
def display_name_is_emoji(display_name: str) -> bool:
"""Returns true if the given display name is an emoji
"""
if ' ' in display_name:
2022-05-02 22:50:14 +00:00
words = display_name.split(' ')
for wrd in words:
if not wrd.startswith(':'):
return False
if not wrd.endswith(':'):
return False
return True
if len(display_name) < 2:
return False
if not display_name.startswith(':'):
return False
if not display_name.endswith(':'):
return False
return True
2021-12-27 22:12:29 +00:00
def _gender_from_string(translate: {}, text: str) -> str:
2021-06-24 19:28:26 +00:00
"""Given some text, does it contain a gender description?
"""
2021-06-24 19:25:39 +00:00
gender = None
2021-07-23 14:32:21 +00:00
if not text:
return None
2021-12-27 22:12:29 +00:00
text_orig = text
2021-06-24 19:25:39 +00:00
text = text.lower()
if translate['He/Him'].lower() in text or \
translate['boy'].lower() in text:
gender = 'He/Him'
elif (translate['She/Her'].lower() in text or
translate['girl'].lower() in text):
gender = 'She/Her'
elif 'him' in text or 'male' in text:
gender = 'He/Him'
elif 'her' in text or 'she' in text or \
'fem' in text or 'woman' in text:
gender = 'She/Her'
2021-12-27 22:12:29 +00:00
elif 'man' in text or 'He' in text_orig:
2021-06-24 19:25:39 +00:00
gender = 'He/Him'
return gender
2021-12-27 22:12:29 +00:00
def get_gender_from_bio(base_dir: str, actor: str, person_cache: {},
translate: {}) -> str:
2021-03-03 13:02:47 +00:00
"""Tries to ascertain gender from bio description
2021-06-24 19:25:39 +00:00
This is for use by text-to-speech for pitch setting
2021-03-03 13:02:47 +00:00
"""
2021-12-27 22:19:18 +00:00
default_gender = 'They/Them'
2023-08-03 17:31:47 +00:00
actor = get_actor_from_post_id(actor)
2021-12-25 22:17:49 +00:00
if not person_cache.get(actor):
2021-12-27 22:19:18 +00:00
return default_gender
bio_found = None
2021-03-03 19:15:32 +00:00
if translate:
2021-12-27 22:19:18 +00:00
pronoun_str = translate['pronoun'].lower()
2021-03-03 19:15:32 +00:00
else:
2021-12-27 22:19:18 +00:00
pronoun_str = 'pronoun'
2021-12-26 10:29:52 +00:00
actor_json = None
2021-12-25 22:17:49 +00:00
if person_cache[actor].get('actor'):
2021-12-26 10:29:52 +00:00
actor_json = person_cache[actor]['actor']
2021-03-03 13:02:47 +00:00
else:
# Try to obtain from the cached actors
2021-12-27 22:19:18 +00:00
cached_actor_filename = \
2021-12-25 16:17:53 +00:00
base_dir + '/cache/actors/' + (actor.replace('/', '#')) + '.json'
2021-12-27 22:19:18 +00:00
if os.path.isfile(cached_actor_filename):
2024-06-20 10:47:58 +00:00
actor_json = load_json(cached_actor_filename)
2021-12-26 10:29:52 +00:00
if not actor_json:
2021-12-27 22:19:18 +00:00
return default_gender
2021-06-24 19:10:23 +00:00
# is gender defined as a profile tag?
2021-12-26 10:29:52 +00:00
if actor_json.get('attachment'):
2021-12-27 22:19:18 +00:00
tags_list = actor_json['attachment']
if isinstance(tags_list, list):
2021-06-24 19:25:39 +00:00
# look for a gender field name
2021-12-27 22:19:18 +00:00
for tag in tags_list:
2021-06-24 19:10:23 +00:00
if not isinstance(tag, dict):
continue
name_value = None
if tag.get('name'):
name_value = tag['name']
if tag.get('schema:name'):
name_value = tag['schema:name']
if not name_value:
2021-06-24 19:10:23 +00:00
continue
prop_value_name, _ = get_attachment_property_value(tag)
if not prop_value_name:
continue
if name_value.lower() == \
2021-06-24 19:10:23 +00:00
translate['gender'].lower():
2022-05-11 18:16:20 +00:00
bio_found = tag[prop_value_name]
2021-06-24 19:10:23 +00:00
break
if name_value.lower().startswith(pronoun_str):
2022-05-11 18:16:20 +00:00
bio_found = tag[prop_value_name]
2021-06-24 19:10:23 +00:00
break
2021-06-24 19:25:39 +00:00
# the field name could be anything,
# just look at the value
2021-12-27 22:19:18 +00:00
if not bio_found:
for tag in tags_list:
2021-06-24 19:25:39 +00:00
if not isinstance(tag, dict):
continue
if not tag.get('name') and not tag.get('schema:name'):
continue
prop_value_name, _ = get_attachment_property_value(tag)
if not prop_value_name:
2021-06-24 19:25:39 +00:00
continue
gender = \
_gender_from_string(translate, tag[prop_value_name])
2021-06-24 19:25:39 +00:00
if gender:
return gender
2021-06-24 19:10:23 +00:00
# if not then use the bio
2021-12-27 22:19:18 +00:00
if not bio_found and actor_json.get('summary'):
bio_found = actor_json['summary']
if not bio_found:
return default_gender
gender = _gender_from_string(translate, bio_found)
2021-06-24 19:25:39 +00:00
if not gender:
2021-12-27 22:19:18 +00:00
gender = default_gender
2021-03-03 13:02:47 +00:00
return gender
2021-12-27 22:19:18 +00:00
def get_nickname_from_actor(actor: str) -> str:
2019-07-06 15:17:21 +00:00
"""Returns the nickname from an actor url
"""
2020-08-13 16:41:02 +00:00
if actor.startswith('@'):
actor = actor[1:]
2024-09-19 09:49:40 +00:00
# handle brid.gy urls
actor = actor.replace('at://did:', 'did:')
2021-12-27 22:32:59 +00:00
users_paths = get_user_paths()
for possible_path in users_paths:
2024-08-04 19:29:10 +00:00
if possible_path not in actor:
continue
nick_str = actor.split(possible_path)[1].replace('@', '')
if '/' not in nick_str:
return nick_str
return nick_str.split('/')[0]
2023-04-23 15:55:48 +00:00
if '/@/' not in actor:
if '/@' in actor:
# https://domain/@nick
nick_str = actor.split('/@')[1]
if '/' in nick_str:
nick_str = nick_str.split('/')[0]
return nick_str
if '@' in actor:
nick_str = actor.split('@')[0]
return nick_str
2021-12-27 22:32:59 +00:00
if '://' in actor:
2021-06-24 19:55:29 +00:00
domain = actor.split('://')[1]
if '/' in domain:
domain = domain.split('/')[0]
if '://' + domain + '/' not in actor:
return None
2021-12-27 22:32:59 +00:00
nick_str = actor.split('://' + domain + '/')[1]
if '/' in nick_str or '.' in nick_str:
2021-06-24 19:55:29 +00:00
return None
2021-12-27 22:32:59 +00:00
return nick_str
2021-06-24 19:55:29 +00:00
return None
2019-07-06 15:17:21 +00:00
2020-04-04 13:44:49 +00:00
2021-12-26 12:24:40 +00:00
def get_user_paths() -> []:
2021-07-04 22:58:01 +00:00
"""Returns possible user paths
e.g. /users/nickname, /channel/nickname
2021-07-04 22:58:01 +00:00
"""
2024-03-22 13:51:39 +00:00
return ('/users/', '/profile/', '/accounts/', '/channel/',
2024-05-28 21:37:50 +00:00
'/u/', '/c/', '/m/', '/a/', '/video-channels/',
2024-07-26 09:45:41 +00:00
'/nieuws/author/', '/author/', '/federation/user/',
2023-06-26 10:38:21 +00:00
'/activitypub/', '/actors/', '/snac/', '/@/', '/~/',
2024-06-07 10:09:04 +00:00
'/fediverse/blog/', '/user/', '/@', '/api/collections/',
2024-09-19 09:49:40 +00:00
'/feed/', '/actor/', '/ap/')
2021-07-04 22:58:01 +00:00
2021-12-26 17:53:07 +00:00
def get_group_paths() -> []:
2021-07-30 13:00:23 +00:00
"""Returns possible group paths
2021-07-30 13:03:29 +00:00
e.g. https://lemmy/c/groupname
2021-07-30 13:00:23 +00:00
"""
2023-06-17 09:28:16 +00:00
return ['/c/', '/video-channels/', '/m/']
2021-07-30 13:00:23 +00:00
2021-12-27 19:05:25 +00:00
def get_domain_from_actor(actor: str) -> (str, int):
2019-07-06 15:17:21 +00:00
"""Returns the domain name from an actor url
"""
2020-08-13 16:41:02 +00:00
if actor.startswith('@'):
actor = actor[1:]
2020-04-04 13:44:49 +00:00
port = None
2021-12-27 17:20:01 +00:00
prefixes = get_protocol_prefixes()
2021-12-27 22:32:59 +00:00
users_paths = get_user_paths()
for possible_path in users_paths:
2024-08-04 19:29:10 +00:00
if possible_path not in actor:
continue
domain = actor.split(possible_path)[0]
for prefix in prefixes:
domain = domain.replace(prefix, '')
break
2023-04-23 15:55:48 +00:00
if '/@' in actor and '/@/' not in actor:
2020-08-13 16:41:02 +00:00
domain = actor.split('/@')[0]
for prefix in prefixes:
domain = domain.replace(prefix, '')
2023-04-23 15:55:48 +00:00
elif '@' in actor and '/@/' not in actor:
2020-08-13 16:41:02 +00:00
domain = actor.split('@')[1].strip()
2019-07-06 15:17:21 +00:00
else:
2020-08-13 16:41:02 +00:00
domain = actor
2020-08-13 16:19:35 +00:00
for prefix in prefixes:
domain = domain.replace(prefix, '')
2020-08-13 16:41:02 +00:00
if '/' in actor:
domain = domain.split('/')[0]
2019-07-06 15:17:21 +00:00
if ':' in domain:
2021-12-26 18:14:21 +00:00
port = get_port_from_domain(domain)
2021-12-26 18:17:37 +00:00
domain = remove_domain_port(domain)
2020-04-04 13:44:49 +00:00
return domain, port
2021-12-27 19:26:54 +00:00
def _set_default_pet_name(base_dir: str, nickname: str, domain: str,
follow_nickname: str, follow_domain: str) -> None:
2020-11-23 15:07:55 +00:00
"""Sets a default petname
This helps especially when using onion or i2p address
"""
2021-12-26 18:17:37 +00:00
domain = remove_domain_port(domain)
2021-12-27 22:32:59 +00:00
user_path = acct_dir(base_dir, nickname, domain)
petnames_filename = user_path + '/petnames.txt'
2020-11-23 15:07:55 +00:00
2021-12-27 22:32:59 +00:00
petname_lookup_entry = follow_nickname + ' ' + \
2021-12-27 19:26:54 +00:00
follow_nickname + '@' + follow_domain + '\n'
2021-12-27 22:32:59 +00:00
if not os.path.isfile(petnames_filename):
2020-11-23 15:07:55 +00:00
# if there is no existing petnames lookup file
try:
with open(petnames_filename, 'w+',
2024-07-14 11:09:24 +00:00
encoding='utf-8') as fp_petnames:
fp_petnames.write(petname_lookup_entry)
except OSError:
print('EX: _set_default_pet_name unable to write ' +
petnames_filename)
2020-11-23 15:07:55 +00:00
return
2024-07-14 10:56:31 +00:00
try:
2024-07-14 11:09:24 +00:00
with open(petnames_filename, 'r', encoding='utf-8') as fp_petnames:
petnames_str = fp_petnames.read()
2024-07-14 10:56:31 +00:00
if petnames_str:
petnames_list = petnames_str.split('\n')
for pet in petnames_list:
if pet.startswith(follow_nickname + ' '):
# petname already exists
return
except OSError:
2024-07-14 11:09:24 +00:00
print('EX: _set_default_pet_name unable to read 1 ' +
petnames_filename)
2020-11-23 15:07:55 +00:00
# petname doesn't already exist
2024-07-14 11:09:24 +00:00
try:
with open(petnames_filename, 'a+', encoding='utf-8') as fp_petnames:
fp_petnames.write(petname_lookup_entry)
except OSError:
print('EX: _set_default_pet_name unable to read 2 ' +
petnames_filename)
2020-11-23 15:07:55 +00:00
2021-12-27 17:08:19 +00:00
def follow_person(base_dir: str, nickname: str, domain: str,
2021-12-27 19:26:54 +00:00
follow_nickname: str, follow_domain: str,
2021-12-27 17:08:19 +00:00
federation_list: [], debug: bool,
group_account: bool,
2024-02-19 20:47:23 +00:00
follow_file: str) -> bool:
2019-07-06 19:24:52 +00:00
"""Adds a person to the follow list
"""
2022-06-21 11:58:50 +00:00
follow_domain_str_lower1 = follow_domain.lower()
follow_domain_str_lower = remove_eol(follow_domain_str_lower1)
2021-12-27 22:32:59 +00:00
if not domain_permitted(follow_domain_str_lower,
2021-12-27 18:28:26 +00:00
federation_list):
2019-07-06 19:24:52 +00:00
if debug:
2020-04-04 13:44:49 +00:00
print('DEBUG: follow of domain ' +
2021-12-27 19:26:54 +00:00
follow_domain + ' not permitted')
2019-07-06 19:24:52 +00:00
return False
2019-07-11 12:29:31 +00:00
if debug:
2021-12-27 19:26:54 +00:00
print('DEBUG: follow of domain ' + follow_domain)
2019-07-16 22:57:45 +00:00
if ':' in domain:
2021-12-27 22:32:59 +00:00
domain_only = remove_domain_port(domain)
handle = nickname + '@' + domain_only
2019-07-16 22:57:45 +00:00
else:
2020-09-15 09:16:03 +00:00
handle = nickname + '@' + domain
2020-03-03 11:02:34 +00:00
2022-12-18 15:29:54 +00:00
handle_dir = acct_handle_dir(base_dir, handle)
if not os.path.isdir(handle_dir):
2020-04-04 13:44:49 +00:00
print('WARN: account for ' + handle + ' does not exist')
2020-03-03 09:56:48 +00:00
return False
2021-12-27 19:26:54 +00:00
if ':' in follow_domain:
2021-12-27 22:32:59 +00:00
follow_domain_only = remove_domain_port(follow_domain)
handle_to_follow = follow_nickname + '@' + follow_domain_only
2019-07-16 22:57:45 +00:00
else:
2021-12-27 22:32:59 +00:00
handle_to_follow = follow_nickname + '@' + follow_domain
2021-12-26 00:07:44 +00:00
if group_account:
2021-12-27 22:32:59 +00:00
handle_to_follow = '!' + handle_to_follow
2021-07-31 11:56:28 +00:00
# was this person previously unfollowed?
2022-12-18 15:29:54 +00:00
unfollowed_filename = acct_handle_dir(base_dir, handle) + '/unfollowed.txt'
2021-12-27 22:32:59 +00:00
if os.path.isfile(unfollowed_filename):
2022-06-10 09:24:11 +00:00
if text_in_file(handle_to_follow, unfollowed_filename):
# remove them from the unfollowed file
2021-12-27 22:32:59 +00:00
new_lines = ''
try:
with open(unfollowed_filename, 'r',
2024-07-14 11:09:24 +00:00
encoding='utf-8') as fp_unfoll:
lines = fp_unfoll.readlines()
for line in lines:
if handle_to_follow not in line:
new_lines += line
except OSError:
print('EX: follow_person unable to read ' +
unfollowed_filename)
try:
with open(unfollowed_filename, 'w+',
2024-07-14 11:09:24 +00:00
encoding='utf-8') as fp_unfoll:
fp_unfoll.write(new_lines)
except OSError:
print('EX: follow_person unable to write ' +
unfollowed_filename)
2024-05-12 12:35:26 +00:00
dir_str = data_dir(base_dir)
if not os.path.isdir(dir_str):
os.mkdir(dir_str)
2021-12-27 22:32:59 +00:00
handle_to_follow = follow_nickname + '@' + follow_domain
2021-12-26 00:07:44 +00:00
if group_account:
2021-12-27 22:32:59 +00:00
handle_to_follow = '!' + handle_to_follow
2022-12-18 15:29:54 +00:00
filename = acct_handle_dir(base_dir, handle) + '/' + follow_file
2019-07-06 19:24:52 +00:00
if os.path.isfile(filename):
2022-06-10 09:24:11 +00:00
if text_in_file(handle_to_follow, filename):
2019-07-11 12:29:31 +00:00
if debug:
print('DEBUG: follow already exists')
2019-07-06 19:24:52 +00:00
return True
2019-10-26 15:15:38 +00:00
# prepend to follow file
try:
2024-07-14 11:09:24 +00:00
with open(filename, 'r+', encoding='utf-8') as fp_foll:
content = fp_foll.read()
2021-12-27 22:32:59 +00:00
if handle_to_follow + '\n' not in content:
2024-07-14 11:09:24 +00:00
fp_foll.seek(0, 0)
fp_foll.write(handle_to_follow + '\n' + content)
print('DEBUG: follow added')
2021-12-27 16:18:52 +00:00
except OSError as ex:
2020-04-04 13:44:49 +00:00
print('WARN: Failed to write entry to follow file ' +
2021-12-25 15:28:52 +00:00
filename + ' ' + str(ex))
2020-09-03 10:09:40 +00:00
else:
# first follow
if debug:
print('DEBUG: ' + handle +
2021-12-27 22:32:59 +00:00
' creating new following file to follow ' +
handle_to_follow +
', filename is ' + filename)
try:
2024-07-14 11:09:24 +00:00
with open(filename, 'w+', encoding='utf-8') as fp_foll:
fp_foll.write(handle_to_follow + '\n')
except OSError:
print('EX: follow_person unable to write ' + filename)
2020-09-03 10:09:40 +00:00
2021-12-27 11:31:04 +00:00
if follow_file.endswith('following.txt'):
2020-11-23 15:07:55 +00:00
# Default to adding new follows to the calendar.
# Possibly this could be made optional
2020-09-03 10:09:40 +00:00
# if following a person add them to the list of
# calendar follows
print('DEBUG: adding ' +
2021-12-27 19:26:54 +00:00
follow_nickname + '@' + follow_domain + ' to calendar of ' +
nickname + '@' + domain)
2021-12-27 16:18:52 +00:00
add_person_to_calendar(base_dir, nickname, domain,
2021-12-27 19:26:54 +00:00
follow_nickname, follow_domain)
2020-11-23 15:07:55 +00:00
# add a default petname
2021-12-27 19:26:54 +00:00
_set_default_pet_name(base_dir, nickname, domain,
follow_nickname, follow_domain)
2019-07-06 19:24:52 +00:00
return True
2019-07-11 12:29:31 +00:00
2020-04-04 13:44:49 +00:00
2021-12-27 22:32:59 +00:00
def votes_on_newswire_item(status: []) -> int:
2020-10-08 19:47:23 +00:00
"""Returns the number of votes on a newswire item
"""
2021-12-27 22:32:59 +00:00
total_votes = 0
2020-10-08 19:47:23 +00:00
for line in status:
if 'vote:' in line:
2021-12-27 22:32:59 +00:00
total_votes += 1
return total_votes
2020-10-08 19:47:23 +00:00
2021-12-27 22:38:48 +00:00
def locate_news_votes(base_dir: str, domain: str,
post_url: str) -> str:
2020-10-08 19:47:23 +00:00
"""Returns the votes filename for a news post
within the news user account
"""
2022-06-21 11:58:50 +00:00
post_url1 = post_url.strip()
post_url = remove_eol(post_url1)
2020-10-08 19:47:23 +00:00
# if this post in the shared inbox?
2021-12-27 22:38:48 +00:00
post_url = remove_id_ending(post_url.strip()).replace('/', '#')
2020-10-08 19:47:23 +00:00
2021-12-27 22:38:48 +00:00
if post_url.endswith('.json'):
post_url = post_url + '.votes'
2020-10-08 19:47:23 +00:00
else:
2021-12-27 22:38:48 +00:00
post_url = post_url + '.json.votes'
2020-10-08 19:47:23 +00:00
2024-05-12 12:35:26 +00:00
account_dir = data_dir(base_dir) + '/news@' + domain + '/'
2021-12-27 22:38:48 +00:00
post_filename = account_dir + 'outbox/' + post_url
2021-12-26 23:41:34 +00:00
if os.path.isfile(post_filename):
return post_filename
2020-10-09 12:15:20 +00:00
return None
2021-12-27 22:46:10 +00:00
def locate_news_arrival(base_dir: str, domain: str,
post_url: str) -> str:
2020-10-09 12:15:20 +00:00
"""Returns the arrival time for a news post
within the news user account
"""
2022-06-21 11:58:50 +00:00
post_url1 = post_url.strip()
post_url = remove_eol(post_url1)
2020-10-09 12:15:20 +00:00
# if this post in the shared inbox?
2021-12-27 22:46:10 +00:00
post_url = remove_id_ending(post_url.strip()).replace('/', '#')
2020-10-09 12:15:20 +00:00
2021-12-27 22:46:10 +00:00
if post_url.endswith('.json'):
post_url = post_url + '.arrived'
2020-10-09 12:15:20 +00:00
else:
2021-12-27 22:46:10 +00:00
post_url = post_url + '.json.arrived'
2020-10-09 12:15:20 +00:00
2024-05-12 12:35:26 +00:00
account_dir = data_dir(base_dir) + '/news@' + domain + '/'
2021-12-27 22:46:10 +00:00
post_filename = account_dir + 'outbox/' + post_url
2021-12-26 23:41:34 +00:00
if os.path.isfile(post_filename):
2024-07-14 10:56:31 +00:00
try:
2024-07-14 11:09:24 +00:00
with open(post_filename, 'r', encoding='utf-8') as fp_arrival:
arrival = fp_arrival.read()
2024-07-14 10:56:31 +00:00
if arrival:
arrival_date = \
date_from_string_format(arrival,
["%Y-%m-%dT%H:%M:%S%z"])
return arrival_date
except OSError:
print('EX: locate_news_arrival unable to read ' + post_filename)
2020-10-09 12:15:20 +00:00
2020-10-08 19:47:23 +00:00
return None
2021-12-28 10:17:58 +00:00
def clear_from_post_caches(base_dir: str, recent_posts_cache: {},
post_id: str) -> None:
2020-10-18 16:19:28 +00:00
"""Clears cached html for the given post, so that edits
to news will appear
"""
2021-12-26 19:47:06 +00:00
filename = '/postcache/' + post_id + '.html'
2024-05-12 12:35:26 +00:00
dir_str = data_dir(base_dir)
for _, dirs, _ in os.walk(dir_str):
2020-10-18 16:19:28 +00:00
for acct in dirs:
if '@' not in acct:
continue
2022-02-10 15:07:09 +00:00
if acct.startswith('inbox@') or acct.startswith('Actor@'):
2020-10-18 16:19:28 +00:00
continue
2024-05-12 12:35:26 +00:00
cache_dir = os.path.join(dir_str, acct)
2021-12-28 10:17:58 +00:00
post_filename = cache_dir + filename
2021-12-26 23:41:34 +00:00
if os.path.isfile(post_filename):
2020-10-18 16:19:28 +00:00
try:
2021-12-26 23:41:34 +00:00
os.remove(post_filename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-12-28 10:17:58 +00:00
print('EX: clear_from_post_caches file not removed ' +
2021-12-26 23:41:34 +00:00
str(post_filename))
# if the post is in the recent posts cache then remove it
2021-12-26 20:01:37 +00:00
if recent_posts_cache.get('index'):
if post_id in recent_posts_cache['index']:
recent_posts_cache['index'].remove(post_id)
if recent_posts_cache.get('json'):
if recent_posts_cache['json'].get(post_id):
del recent_posts_cache['json'][post_id]
if recent_posts_cache.get('html'):
if recent_posts_cache['html'].get(post_id):
del recent_posts_cache['html'][post_id]
2020-12-13 22:13:45 +00:00
break
2020-10-18 16:19:28 +00:00
2021-12-26 20:36:08 +00:00
def locate_post(base_dir: str, nickname: str, domain: str,
2021-12-28 12:15:46 +00:00
post_url: str, replies: bool = False) -> str:
2019-07-11 12:29:31 +00:00
"""Returns the filename for the given status post url
"""
2019-07-13 19:28:14 +00:00
if not replies:
2020-04-04 13:44:49 +00:00
extension = 'json'
2019-07-13 19:28:14 +00:00
else:
2020-04-04 13:44:49 +00:00
extension = 'replies'
2019-11-18 14:42:18 +00:00
2019-07-11 19:31:02 +00:00
# if this post in the shared inbox?
2021-12-28 12:15:46 +00:00
post_url = remove_id_ending(post_url.strip()).replace('/', '#')
2019-11-18 14:42:18 +00:00
2020-05-18 10:19:31 +00:00
# add the extension
2021-12-28 12:15:46 +00:00
post_url = post_url + '.' + extension
2020-05-18 10:19:31 +00:00
2020-05-18 10:14:29 +00:00
# search boxes
boxes = ('inbox', 'outbox', 'tlblogs')
2021-12-28 10:17:58 +00:00
account_dir = acct_dir(base_dir, nickname, domain) + '/'
2021-12-28 12:15:46 +00:00
for box_name in boxes:
post_filename = account_dir + box_name + '/' + post_url
2021-12-26 23:41:34 +00:00
if os.path.isfile(post_filename):
return post_filename
2020-10-08 13:07:17 +00:00
# check news posts
2024-05-12 12:35:26 +00:00
account_dir = data_dir(base_dir) + '/news' + '@' + domain + '/'
2021-12-28 12:15:46 +00:00
post_filename = account_dir + 'outbox/' + post_url
2021-12-26 23:41:34 +00:00
if os.path.isfile(post_filename):
return post_filename
2020-05-18 10:14:29 +00:00
# is it in the announce cache?
2021-12-28 12:15:46 +00:00
post_filename = base_dir + '/cache/announce/' + nickname + '/' + post_url
2021-12-26 23:41:34 +00:00
if os.path.isfile(post_filename):
return post_filename
2020-05-18 10:14:29 +00:00
2021-12-28 12:15:46 +00:00
# print('WARN: unable to locate ' + nickname + ' ' + post_url)
2019-11-18 14:42:18 +00:00
return None
2019-07-14 16:37:01 +00:00
2020-04-04 13:44:49 +00:00
2021-12-28 10:25:50 +00:00
def _get_published_date(post_json_object: {}) -> str:
"""Returns the published date on the given post
"""
published = None
2021-12-25 22:09:19 +00:00
if post_json_object.get('published'):
published = post_json_object['published']
2021-12-26 10:57:03 +00:00
elif has_object_dict(post_json_object):
2021-12-25 22:09:19 +00:00
if post_json_object['object'].get('published'):
published = post_json_object['object']['published']
if not published:
return None
if not isinstance(published, str):
return None
return published
2021-12-28 10:25:50 +00:00
def get_reply_interval_hours(base_dir: str, nickname: str, domain: str,
default_reply_interval_hrs: int) -> int:
"""Returns the reply interval for the given account.
The reply interval is the number of hours after a post being made
during which replies are allowed
"""
2021-12-28 10:25:50 +00:00
reply_interval_filename = \
2021-12-28 12:15:46 +00:00
acct_dir(base_dir, nickname, domain) + '/.reply_interval_hours'
2021-12-28 10:25:50 +00:00
if os.path.isfile(reply_interval_filename):
2024-07-14 10:56:31 +00:00
try:
with open(reply_interval_filename, 'r',
encoding='utf-8') as fp_interval:
hours_str = fp_interval.read()
if hours_str.isdigit():
return int(hours_str)
except OSError:
print('EX: get_reply_interval_hours unable to read ' +
reply_interval_filename)
2021-12-25 17:31:22 +00:00
return default_reply_interval_hrs
2021-12-28 12:15:46 +00:00
def set_reply_interval_hours(base_dir: str, nickname: str, domain: str,
reply_interval_hours: int) -> bool:
"""Sets the reply interval for the given account.
The reply interval is the number of hours after a post being made
during which replies are allowed
"""
2021-12-28 10:25:50 +00:00
reply_interval_filename = \
2021-12-28 12:15:46 +00:00
acct_dir(base_dir, nickname, domain) + '/.reply_interval_hours'
try:
2022-06-09 14:46:30 +00:00
with open(reply_interval_filename, 'w+',
2024-07-14 11:09:24 +00:00
encoding='utf-8') as fp_interval:
fp_interval.write(str(reply_interval_hours))
return True
2021-12-28 12:15:46 +00:00
except OSError:
print('EX: set_reply_interval_hours unable to save reply interval ' +
str(reply_interval_filename) + ' ' +
str(reply_interval_hours))
return False
2021-12-28 12:15:46 +00:00
def can_reply_to(base_dir: str, nickname: str, domain: str,
post_url: str, reply_interval_hours: int,
curr_date_str: str = None,
post_json_object: {} = None) -> bool:
2023-08-03 17:31:47 +00:00
"""Is replying to the given local post permitted?
This is a spam mitigation feature, so that spammers can't
add a lot of replies to old post which you don't notice.
"""
2021-12-28 12:15:46 +00:00
if '/statuses/' not in post_url:
return True
2021-12-25 22:09:19 +00:00
if not post_json_object:
2021-12-28 12:15:46 +00:00
post_filename = locate_post(base_dir, nickname, domain, post_url)
2021-12-26 23:41:34 +00:00
if not post_filename:
2023-02-16 14:42:56 +00:00
# the post is not stored locally
return True
2021-12-26 23:41:34 +00:00
post_json_object = load_json(post_filename)
2021-12-25 22:09:19 +00:00
if not post_json_object:
return False
2021-12-28 10:25:50 +00:00
published = _get_published_date(post_json_object)
if not published:
return False
2023-11-20 22:27:58 +00:00
pub_date = date_from_string_format(published, ['%Y-%m-%dT%H:%M:%S%z'])
if not pub_date:
2021-12-28 12:15:46 +00:00
print('EX: can_reply_to unrecognized published date ' + str(published))
return False
2021-12-28 12:15:46 +00:00
if not curr_date_str:
2023-11-20 22:27:58 +00:00
curr_date = date_utcnow()
else:
2023-11-20 22:27:58 +00:00
curr_date = \
date_from_string_format(curr_date_str, ['%Y-%m-%dT%H:%M:%S%z'])
if not curr_date:
2021-12-28 12:15:46 +00:00
print('EX: can_reply_to unrecognized current date ' +
str(curr_date_str))
return False
2021-12-28 12:15:46 +00:00
hours_since_publication = \
int((curr_date - pub_date).total_seconds() / 3600)
if hours_since_publication < 0 or \
hours_since_publication >= reply_interval_hours:
return False
return True
2021-12-28 13:49:44 +00:00
def _remove_attachment(base_dir: str, http_prefix: str, domain: str,
2024-09-13 15:11:02 +00:00
post_json: {}) -> None:
2023-02-18 22:32:50 +00:00
"""Removes media files for an attachment
"""
post_attachments = get_post_attachments(post_json)
if not post_attachments:
2019-07-14 16:57:06 +00:00
return
if not post_attachments[0].get('url'):
2019-07-14 16:57:06 +00:00
return
attachment_url = get_url_from_post(post_attachments[0]['url'])
2021-12-28 12:15:46 +00:00
if not attachment_url:
2019-07-14 16:57:06 +00:00
return
attachment_url = remove_html(attachment_url)
2021-12-28 12:15:46 +00:00
media_filename = base_dir + '/' + \
attachment_url.replace(http_prefix + '://' + domain + '/', '')
if os.path.isfile(media_filename):
try:
2021-12-28 12:15:46 +00:00
os.remove(media_filename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-12-28 13:49:44 +00:00
print('EX: _remove_attachment unable to delete media file ' +
2021-12-28 12:15:46 +00:00
str(media_filename))
2023-02-18 22:32:50 +00:00
if os.path.isfile(media_filename + '.vtt'):
try:
os.remove(media_filename + '.vtt')
except OSError:
print('EX: _remove_attachment unable to delete media transcript ' +
str(media_filename) + '.vtt')
2021-12-28 12:15:46 +00:00
etag_filename = media_filename + '.etag'
if os.path.isfile(etag_filename):
try:
2021-12-28 12:15:46 +00:00
os.remove(etag_filename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-12-28 13:49:44 +00:00
print('EX: _remove_attachment unable to delete etag file ' +
2021-12-28 12:15:46 +00:00
str(etag_filename))
2024-12-23 18:23:47 +00:00
post_json['attachment']: list[dict] = []
2020-04-04 13:44:49 +00:00
2019-07-14 16:57:06 +00:00
2024-10-23 12:20:07 +00:00
def remove_post_from_index(post_url: str, debug: bool,
index_file: str) -> None:
"""Removes a url from a box index
2019-08-12 18:02:29 +00:00
"""
2024-10-23 12:20:07 +00:00
if not os.path.isfile(index_file):
2019-08-12 18:02:29 +00:00
return
2021-12-28 12:15:46 +00:00
post_id = remove_id_ending(post_url)
2024-10-23 12:20:07 +00:00
if not text_in_file(post_id, index_file):
return
2024-12-23 18:23:47 +00:00
lines: list[str] = []
2024-10-23 12:20:07 +00:00
try:
with open(index_file, 'r', encoding='utf-8') as fp_mod1:
lines = fp_mod1.readlines()
except OSError as exc:
print('EX: remove_post_from_index unable to read ' +
index_file + ' ' + str(exc))
if lines:
try:
2024-10-23 12:20:07 +00:00
with open(index_file, 'w+',
encoding='utf-8') as fp_mod2:
for line in lines:
if line.strip("\n").strip("\r") != post_id:
fp_mod2.write(line)
continue
if debug:
print('DEBUG: removed ' + post_id +
' from index ' + index_file)
2024-07-18 12:55:47 +00:00
except OSError as exc:
2024-10-23 12:20:07 +00:00
print('EX: ' +
'remove_post_from_index unable to write ' +
index_file + ' ' + str(exc))
2024-07-18 12:55:47 +00:00
2024-10-23 12:20:07 +00:00
def remove_moderation_post_from_index(base_dir: str, post_url: str,
debug: bool) -> None:
"""Removes a url from the moderation index
"""
moderation_index_file = data_dir(base_dir) + '/moderation.txt'
remove_post_from_index(post_url, debug, moderation_index_file)
2020-04-04 13:44:49 +00:00
2019-08-12 18:02:29 +00:00
2021-12-26 19:36:40 +00:00
def _is_reply_to_blog_post(base_dir: str, nickname: str, domain: str,
2024-09-13 15:11:02 +00:00
post_json_object: str) -> bool:
"""Is the given post a reply to a blog post?
"""
2021-12-26 10:57:03 +00:00
if not has_object_dict(post_json_object):
return False
reply_id = get_reply_to(post_json_object['object'])
if not reply_id:
return False
if not isinstance(reply_id, str):
2020-08-28 14:45:07 +00:00
return False
2021-12-26 19:36:40 +00:00
blogs_index_filename = \
2021-12-26 12:02:29 +00:00
acct_dir(base_dir, nickname, domain) + '/tlblogs.index'
2021-12-26 19:36:40 +00:00
if not os.path.isfile(blogs_index_filename):
return False
post_id = remove_id_ending(reply_id)
2021-12-26 19:36:40 +00:00
post_id = post_id.replace('/', '#')
2022-06-10 09:24:11 +00:00
if text_in_file(post_id, blogs_index_filename):
return True
return False
2021-12-28 14:55:45 +00:00
def _delete_post_remove_replies(base_dir: str, nickname: str, domain: str,
http_prefix: str, post_filename: str,
recent_posts_cache: {}, debug: bool,
manual: bool) -> None:
2021-07-05 09:24:29 +00:00
"""Removes replies when deleting a post
"""
2021-12-28 12:15:46 +00:00
replies_filename = post_filename.replace('.json', '.replies')
if not os.path.isfile(replies_filename):
2021-07-05 09:24:29 +00:00
return
if debug:
2021-12-26 23:41:34 +00:00
print('DEBUG: removing replies to ' + post_filename)
2024-07-14 10:56:31 +00:00
try:
2024-07-14 11:09:24 +00:00
with open(replies_filename, 'r', encoding='utf-8') as fp_replies:
for reply_id in fp_replies:
2024-07-14 10:56:31 +00:00
reply_file = locate_post(base_dir, nickname, domain, reply_id)
if not reply_file:
continue
2024-08-04 19:29:10 +00:00
if not os.path.isfile(reply_file):
continue
delete_post(base_dir, http_prefix,
nickname, domain, reply_file, debug,
recent_posts_cache, manual)
2024-07-14 10:56:31 +00:00
except OSError:
print('EX: _delete_post_remove_replies unable to read ' +
replies_filename)
2021-07-05 09:24:29 +00:00
# remove the replies file
try:
2021-12-28 12:15:46 +00:00
os.remove(replies_filename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-12-28 14:55:45 +00:00
print('EX: _delete_post_remove_replies ' +
'unable to delete replies file ' + str(replies_filename))
2021-07-05 09:24:29 +00:00
2021-12-28 13:12:10 +00:00
def _is_bookmarked(base_dir: str, nickname: str, domain: str,
post_filename: str) -> bool:
2021-07-05 09:24:29 +00:00
"""Returns True if the given post is bookmarked
"""
2021-12-28 12:15:46 +00:00
bookmarks_index_filename = \
2021-12-26 12:02:29 +00:00
acct_dir(base_dir, nickname, domain) + '/bookmarks.index'
2021-12-28 12:15:46 +00:00
if os.path.isfile(bookmarks_index_filename):
bookmark_index = post_filename.split('/')[-1] + '\n'
2022-06-10 09:24:11 +00:00
if text_in_file(bookmark_index, bookmarks_index_filename):
2021-07-05 09:24:29 +00:00
return True
return False
2021-12-27 11:05:24 +00:00
def remove_post_from_cache(post_json_object: {},
recent_posts_cache: {}) -> None:
2021-07-05 10:09:11 +00:00
""" if the post exists in the recent posts cache then remove it
2021-07-05 09:45:55 +00:00
"""
2021-12-26 20:01:37 +00:00
if not recent_posts_cache:
2021-07-05 09:45:55 +00:00
return
2021-12-25 22:09:19 +00:00
if not post_json_object.get('id'):
2021-07-05 10:09:11 +00:00
return
2021-12-26 20:01:37 +00:00
if not recent_posts_cache.get('index'):
2021-07-05 10:09:11 +00:00
return
2021-12-26 19:47:06 +00:00
post_id = post_json_object['id']
if '#' in post_id:
post_id = post_id.split('#', 1)[0]
2021-12-27 11:20:57 +00:00
post_id = remove_id_ending(post_id).replace('/', '#')
2021-12-26 20:01:37 +00:00
if post_id not in recent_posts_cache['index']:
2021-07-05 10:09:11 +00:00
return
2021-07-05 09:45:55 +00:00
2021-12-26 20:01:37 +00:00
if recent_posts_cache.get('index'):
if post_id in recent_posts_cache['index']:
recent_posts_cache['index'].remove(post_id)
2021-07-05 09:45:55 +00:00
2021-12-26 20:01:37 +00:00
if recent_posts_cache.get('json'):
if recent_posts_cache['json'].get(post_id):
del recent_posts_cache['json'][post_id]
2021-07-05 09:45:55 +00:00
2021-12-26 20:01:37 +00:00
if recent_posts_cache.get('html'):
if recent_posts_cache['html'].get(post_id):
del recent_posts_cache['html'][post_id]
2021-07-05 09:45:55 +00:00
2022-04-09 15:11:22 +00:00
def delete_cached_html(base_dir: str, nickname: str, domain: str,
2024-09-13 15:11:02 +00:00
post_json_object: {}) -> None:
2021-07-05 09:45:55 +00:00
"""Removes cached html file for the given post
"""
2021-12-27 11:05:24 +00:00
cached_post_filename = \
2021-12-26 23:41:34 +00:00
get_cached_post_filename(base_dir, nickname, domain, post_json_object)
2021-12-27 11:05:24 +00:00
if cached_post_filename:
if os.path.isfile(cached_post_filename):
try:
2021-12-27 11:05:24 +00:00
os.remove(cached_post_filename)
2021-11-25 18:42:38 +00:00
except OSError:
2022-04-09 15:11:22 +00:00
print('EX: delete_cached_html ' +
2021-10-29 18:48:15 +00:00
'unable to delete cached post file ' +
2021-12-27 11:05:24 +00:00
str(cached_post_filename))
2022-05-12 11:56:45 +00:00
2022-05-12 10:13:55 +00:00
cached_post_filename = cached_post_filename.replace('.html', '.ssml')
if os.path.isfile(cached_post_filename):
try:
os.remove(cached_post_filename)
except OSError:
print('EX: delete_cached_html ' +
'unable to delete cached ssml post file ' +
str(cached_post_filename))
2021-07-05 09:45:55 +00:00
2022-05-12 11:56:45 +00:00
cached_post_filename = \
cached_post_filename.replace('/postcache/', '/outbox/')
if os.path.isfile(cached_post_filename):
try:
os.remove(cached_post_filename)
except OSError:
print('EX: delete_cached_html ' +
'unable to delete cached outbox ssml post file ' +
str(cached_post_filename))
2021-07-05 09:45:55 +00:00
def _remove_post_id_from_tag_index(tag_index_filename: str,
post_id: str) -> None:
"""Remove post_id from the tag index file
"""
lines = None
2024-07-14 10:56:31 +00:00
try:
with open(tag_index_filename, 'r', encoding='utf-8') as fp_index:
lines = fp_index.readlines()
except OSError:
print('EX: _remove_post_id_from_tag_index unable to read ' +
tag_index_filename)
if not lines:
return
newlines = ''
for file_line in lines:
if post_id in file_line:
# skip over the deleted post
continue
newlines += file_line
if not newlines.strip():
# if there are no lines then remove the hashtag file
try:
os.remove(tag_index_filename)
except OSError:
print('EX: _delete_hashtags_on_post ' +
'unable to delete tag index ' + str(tag_index_filename))
else:
# write the new hashtag index without the given post in it
try:
with open(tag_index_filename, 'w+',
2024-07-14 11:09:24 +00:00
encoding='utf-8') as fp_index:
fp_index.write(newlines)
except OSError:
print('EX: _remove_post_id_from_tag_index unable to write ' +
tag_index_filename)
2021-12-28 14:55:45 +00:00
def _delete_hashtags_on_post(base_dir: str, post_json_object: {}) -> None:
2021-07-05 09:45:55 +00:00
"""Removes hashtags when a post is deleted
"""
2021-12-28 12:15:46 +00:00
remove_hashtag_index = False
2021-12-26 10:57:03 +00:00
if has_object_dict(post_json_object):
2021-12-25 22:09:19 +00:00
if post_json_object['object'].get('content'):
if '#' in post_json_object['object']['content']:
2021-12-28 12:15:46 +00:00
remove_hashtag_index = True
2021-07-05 09:45:55 +00:00
2021-12-28 12:15:46 +00:00
if not remove_hashtag_index:
2021-07-05 09:45:55 +00:00
return
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('id') or \
not post_json_object['object'].get('tag'):
2021-07-05 09:45:55 +00:00
return
# get the id of the post
2021-12-27 11:20:57 +00:00
post_id = remove_id_ending(post_json_object['object']['id'])
2021-12-25 22:09:19 +00:00
for tag in post_json_object['object']['tag']:
2021-10-30 19:26:52 +00:00
if not tag.get('type'):
continue
2021-07-05 09:45:55 +00:00
if tag['type'] != 'Hashtag':
continue
if not tag.get('name'):
continue
# find the index file for this tag
tag_map_filename = base_dir + '/tagmaps/' + tag['name'][1:] + '.txt'
if os.path.isfile(tag_map_filename):
_remove_post_id_from_tag_index(tag_map_filename, post_id)
# find the index file for this tag
2021-12-28 12:15:46 +00:00
tag_index_filename = base_dir + '/tags/' + tag['name'][1:] + '.txt'
if os.path.isfile(tag_index_filename):
_remove_post_id_from_tag_index(tag_index_filename, post_id)
2021-07-05 09:45:55 +00:00
2021-12-29 21:55:09 +00:00
def _delete_conversation_post(base_dir: str, nickname: str, domain: str,
post_json_object: {}) -> None:
2021-08-12 10:22:04 +00:00
"""Deletes a post from a conversation
"""
2021-12-26 10:57:03 +00:00
if not has_object_dict(post_json_object):
2021-08-12 10:22:04 +00:00
return False
# Due to lack of AP specification maintenance, a conversation can also be
# referred to as a thread or (confusingly) "context"
if not post_json_object['object'].get('conversation') and \
not post_json_object['object'].get('thread') and \
not post_json_object['object'].get('context'):
2021-08-12 10:22:04 +00:00
return False
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('id'):
2021-08-12 10:22:04 +00:00
return False
2021-12-28 12:15:46 +00:00
conversation_dir = \
acct_dir(base_dir, nickname, domain) + '/conversation'
if post_json_object['object'].get('conversation'):
conversation_id = post_json_object['object']['conversation']
2024-10-06 16:22:13 +00:00
elif post_json_object['object'].get('context'):
conversation_id = post_json_object['object']['context']
2024-10-06 16:22:13 +00:00
else:
conversation_id = post_json_object['object']['thread']
if not isinstance(conversation_id, str):
return False
2021-12-28 12:15:46 +00:00
conversation_id = conversation_id.replace('/', '#')
2021-12-26 19:47:06 +00:00
post_id = post_json_object['object']['id']
2021-12-28 12:15:46 +00:00
conversation_filename = conversation_dir + '/' + conversation_id
if not os.path.isfile(conversation_filename):
return False
conversation_str = ''
2024-07-14 10:56:31 +00:00
try:
with open(conversation_filename, 'r', encoding='utf-8') as fp_conv:
conversation_str = fp_conv.read()
except OSError:
print('EX: _delete_conversation_post unable to read ' +
conversation_filename)
2021-12-28 12:15:46 +00:00
if post_id + '\n' not in conversation_str:
return False
conversation_str = conversation_str.replace(post_id + '\n', '')
if conversation_str:
try:
with open(conversation_filename, 'w+',
2024-07-14 10:56:31 +00:00
encoding='utf-8') as fp_conv:
fp_conv.write(conversation_str)
except OSError:
print('EX: _delete_conversation_post unable to write ' +
conversation_filename)
2021-08-12 10:22:04 +00:00
else:
2021-12-28 12:15:46 +00:00
if os.path.isfile(conversation_filename + '.muted'):
try:
2021-12-28 12:15:46 +00:00
os.remove(conversation_filename + '.muted')
2021-11-25 18:42:38 +00:00
except OSError:
2021-12-29 21:55:09 +00:00
print('EX: _delete_conversation_post ' +
2021-10-29 18:48:15 +00:00
'unable to remove conversation ' +
2021-12-28 12:15:46 +00:00
str(conversation_filename) + '.muted')
try:
2021-12-28 12:15:46 +00:00
os.remove(conversation_filename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-12-29 21:55:09 +00:00
print('EX: _delete_conversation_post ' +
2021-10-29 18:48:15 +00:00
'unable to remove conversation ' +
2021-12-28 12:15:46 +00:00
str(conversation_filename))
2021-08-12 10:22:04 +00:00
def is_dm(post_json_object: {}) -> bool:
"""Returns true if the given post is a DM
"""
if post_json_object['type'] != 'Create':
return False
if not has_object_dict(post_json_object):
return False
if post_json_object['object']['type'] != 'ChatMessage':
2024-01-09 10:41:19 +00:00
if post_json_object['object']['type'] not in ('Note', 'Event',
'Page', 'Patch',
'EncryptedMessage',
'Article'):
return False
if post_json_object['object'].get('moderationStatus'):
return False
fields = ('to', 'cc')
for field_name in fields:
if not post_json_object['object'].get(field_name):
continue
if isinstance(post_json_object['object'][field_name], list):
for to_address in post_json_object['object'][field_name]:
if to_address.endswith('#Public') or \
to_address == 'as:Public' or \
to_address == 'Public':
return False
if to_address.endswith('followers'):
return False
elif isinstance(post_json_object['object'][field_name], str):
if post_json_object['object'][field_name].endswith('#Public'):
return False
return True
def _is_remote_dm(domain_full: str, post_json_object: {}) -> bool:
"""Is the given post a DM from a different domain?
"""
if not is_dm(post_json_object):
return False
this_post_json = post_json_object
if has_object_dict(post_json_object):
this_post_json = post_json_object['object']
if this_post_json.get('attributedTo'):
attrib = get_attributed_to(this_post_json['attributedTo'])
if attrib:
if '://' + domain_full not in attrib:
return True
return False
2021-12-28 14:55:45 +00:00
def delete_post(base_dir: str, http_prefix: str,
nickname: str, domain: str, post_filename: str,
debug: bool, recent_posts_cache: {},
manual: bool) -> None:
2019-07-14 16:37:01 +00:00
"""Recursively deletes a post and its replies and attachments
"""
2024-06-20 10:47:58 +00:00
post_json_object = load_json(post_filename)
2021-12-25 22:09:19 +00:00
if not post_json_object:
2021-07-05 09:24:29 +00:00
# remove any replies
2021-12-28 14:55:45 +00:00
_delete_post_remove_replies(base_dir, nickname, domain,
http_prefix, post_filename,
recent_posts_cache, debug, manual)
2021-07-05 09:24:29 +00:00
# finally, remove the post itself
try:
2021-12-26 23:41:34 +00:00
os.remove(post_filename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-10-29 14:33:52 +00:00
if debug:
2021-12-28 14:55:45 +00:00
print('EX: delete_post unable to delete post ' +
2021-12-26 23:41:34 +00:00
str(post_filename))
2021-07-05 09:24:29 +00:00
return
# don't allow DMs to be deleted if they came from a different instance
# otherwise this breaks expectations about how DMs should operate
# i.e. DMs should only be removed if they are manually deleted
if not manual:
if _is_remote_dm(domain, post_json_object):
return
2021-07-05 09:24:29 +00:00
# don't allow deletion of bookmarked posts
2021-12-28 13:12:10 +00:00
if _is_bookmarked(base_dir, nickname, domain, post_filename):
2021-07-05 09:24:29 +00:00
return
# don't remove replies to blog posts
2021-12-26 19:36:40 +00:00
if _is_reply_to_blog_post(base_dir, nickname, domain,
post_json_object):
2021-07-05 09:24:29 +00:00
return
# remove from recent posts cache in memory
2021-12-27 11:05:24 +00:00
remove_post_from_cache(post_json_object, recent_posts_cache)
2021-07-05 09:24:29 +00:00
2021-08-12 10:22:04 +00:00
# remove from conversation index
2021-12-29 21:55:09 +00:00
_delete_conversation_post(base_dir, nickname, domain, post_json_object)
2021-08-12 10:22:04 +00:00
2021-07-05 09:24:29 +00:00
# remove any attachment
2021-12-28 13:49:44 +00:00
_remove_attachment(base_dir, http_prefix, domain, post_json_object)
2021-07-05 09:24:29 +00:00
2022-04-09 16:02:18 +00:00
extensions = (
'votes', 'arrived', 'muted', 'tts', 'reject', 'mitm', 'edits', 'seen'
2022-04-09 16:02:18 +00:00
)
2021-07-05 09:24:29 +00:00
for ext in extensions:
2021-12-28 12:15:46 +00:00
ext_filename = post_filename + '.' + ext
if os.path.isfile(ext_filename):
try:
2021-12-28 12:15:46 +00:00
os.remove(ext_filename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-12-28 14:55:45 +00:00
print('EX: delete_post unable to remove ext ' +
2021-12-28 12:15:46 +00:00
str(ext_filename))
2022-04-09 16:02:18 +00:00
elif post_filename.endswith('.json'):
ext_filename = post_filename.replace('.json', '') + '.' + ext
if os.path.isfile(ext_filename):
try:
os.remove(ext_filename)
except OSError:
print('EX: delete_post unable to remove ext ' +
str(ext_filename))
2021-07-05 09:24:29 +00:00
# remove cached html version of the post
2022-04-09 15:11:22 +00:00
delete_cached_html(base_dir, nickname, domain, post_json_object)
2021-07-05 09:24:29 +00:00
2021-12-26 23:53:16 +00:00
has_object = False
2021-12-25 22:09:19 +00:00
if post_json_object.get('object'):
2021-12-26 23:53:16 +00:00
has_object = True
2021-07-05 09:24:29 +00:00
# remove from moderation index file
2021-12-26 23:53:16 +00:00
if has_object:
2021-12-26 10:57:03 +00:00
if has_object_dict(post_json_object):
2021-12-25 22:09:19 +00:00
if post_json_object['object'].get('moderationStatus'):
if post_json_object.get('id'):
2021-12-27 11:20:57 +00:00
post_id = remove_id_ending(post_json_object['id'])
2021-12-28 13:12:10 +00:00
remove_moderation_post_from_index(base_dir, post_id, debug)
2021-07-05 09:24:29 +00:00
# remove any hashtags index entries
2021-12-26 23:53:16 +00:00
if has_object:
2021-12-28 14:55:45 +00:00
_delete_hashtags_on_post(base_dir, post_json_object)
2019-07-14 17:02:41 +00:00
# remove any replies
2021-12-28 14:55:45 +00:00
_delete_post_remove_replies(base_dir, nickname, domain,
http_prefix, post_filename,
recent_posts_cache, debug, manual)
2019-07-14 17:02:41 +00:00
# finally, remove the post itself
try:
2021-12-26 23:41:34 +00:00
os.remove(post_filename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-10-29 14:33:52 +00:00
if debug:
2021-12-28 14:55:45 +00:00
print('EX: delete_post unable to delete post ' +
str(post_filename))
2019-07-27 22:48:34 +00:00
2020-04-04 13:44:49 +00:00
2022-01-13 15:10:41 +00:00
def _is_valid_language(text: str) -> bool:
"""Returns true if the given text contains a valid
natural language string
"""
2021-12-28 12:15:46 +00:00
natural_languages = {
"Latin": [65, 866],
"Greek": [880, 1280],
"isArmenian": [1328, 1424],
"isHebrew": [1424, 1536],
"Arabic": [1536, 1792],
"Syriac": [1792, 1872],
"Thaan": [1920, 1984],
"Devanagari": [2304, 2432],
"Bengali": [2432, 2560],
"Gurmukhi": [2560, 2688],
"Gujarati": [2688, 2816],
"Oriya": [2816, 2944],
"Tamil": [2944, 3072],
"Telugu": [3072, 3200],
"Kannada": [3200, 3328],
"Malayalam": [3328, 3456],
"Sinhala": [3456, 3584],
"Thai": [3584, 3712],
"Lao": [3712, 3840],
"Tibetan": [3840, 4096],
"Myanmar": [4096, 4256],
"Georgian": [4256, 4352],
"HangulJamo": [4352, 4608],
"Cherokee": [5024, 5120],
"UCAS": [5120, 5760],
"Ogham": [5760, 5792],
"Runic": [5792, 5888],
"Khmer": [6016, 6144],
2022-03-02 10:10:43 +00:00
"Hangul Syllables": [44032, 55203],
"Hangul Jamo": [4352, 4607],
"Hangul Compatibility Jamo": [12592, 12687],
"Hangul Jamo Extended-A": [43360, 43391],
"Hangul Jamo Extended-B": [55216, 55295],
2022-03-04 18:57:43 +00:00
"Mongolian": [6144, 6320],
2022-03-04 19:08:54 +00:00
"Cyrillic": [1024, 1279],
"Cyrillic Supplement": [1280, 1327],
"Cyrillic Extended A": [11744, 11775],
"Cyrillic Extended B": [42560, 42655],
"Cyrillic Extended C": [7296, 7311],
"Phonetic Extensions": [7467, 7544],
"Combining Half Marks": [65070, 65071]
}
2022-05-30 20:47:23 +00:00
for _, lang_range in natural_languages.items():
2021-12-28 12:15:46 +00:00
ok_lang = True
for char in text:
2022-03-03 11:52:55 +00:00
if char.isdigit() or char == '_':
continue
2021-12-28 12:15:46 +00:00
if ord(char) not in range(lang_range[0], lang_range[1]):
ok_lang = False
break
2021-12-28 12:15:46 +00:00
if ok_lang:
return True
return False
2021-12-28 14:55:45 +00:00
def _get_reserved_words() -> str:
2024-05-30 08:34:33 +00:00
"""Returns a list of reserved words which should not be
used for nicknames in order to avoid confusion
"""
return ('inbox', 'dm', 'outbox', 'following',
'public', 'followers', 'category',
2021-09-13 13:57:37 +00:00
'channel', 'calendar', 'video-channels',
2024-05-30 08:34:33 +00:00
'videos', 'tlreplies', 'tlmedia', 'tlblogs',
'tlblogs', 'tlfeatures',
'moderation', 'moderationaction',
'activity', 'undo', 'pinned',
2022-02-26 17:14:10 +00:00
'actor', 'Actor', 'instance.actor',
'reply', 'replies', 'question', 'like',
2024-07-26 09:47:41 +00:00
'likes', 'user', 'users', 'statuses',
'tags', 'author', 'accounts', 'headers', 'snac',
'channels', 'profile', 'u', 'c',
'updates', 'repeat', 'announce',
'shares', 'fonts', 'icons', 'avatars',
'welcome', 'helpimages',
'bookmark', 'bookmarks', 'tlbookmarks',
'ignores', 'linksmobile', 'newswiremobile',
'minimal', 'search', 'eventdelete',
'searchemoji', 'catalog', 'conversationId', 'thread',
2022-04-29 13:54:13 +00:00
'mention', 'http', 'https', 'ipfs', 'ipns',
2022-12-01 19:44:12 +00:00
'ontologies', 'data', 'postedit', 'moved',
2023-05-03 18:56:05 +00:00
'inactive', 'activitypub', 'actors',
2024-05-30 08:34:33 +00:00
'note', 'notes', 'offers', 'wanted', 'honk',
2024-06-26 19:08:07 +00:00
'button', 'post', 'item', 'comment',
2024-08-10 12:17:08 +00:00
'content', 'federation', 'elsewhere',
2024-08-31 10:31:11 +00:00
'article', 'activity')
2021-12-28 14:55:45 +00:00
def get_nickname_validation_pattern() -> str:
"""Returns a html text input validation pattern for nickname
"""
2021-12-28 14:55:45 +00:00
reserved_names = _get_reserved_words()
pattern = ''
2021-12-28 12:15:46 +00:00
for word in reserved_names:
if pattern:
2021-07-29 13:27:29 +00:00
pattern += '(?!.*\\b' + word + '\\b)'
else:
2021-07-29 13:27:29 +00:00
pattern = '^(?!.*\\b' + word + '\\b)'
return pattern + '.*${1,30}'
2021-12-28 14:55:45 +00:00
def _is_reserved_name(nickname: str) -> bool:
"""Is the given nickname reserved for some special function?
"""
2021-12-28 14:55:45 +00:00
reserved_names = _get_reserved_words()
2021-12-28 12:15:46 +00:00
if nickname in reserved_names:
return True
return False
2021-12-28 14:41:10 +00:00
def valid_nickname(domain: str, nickname: str) -> bool:
"""Is the given nickname valid?
"""
2021-07-29 14:24:29 +00:00
if len(nickname) == 0:
return False
if len(nickname) > 30:
return False
2022-01-13 15:10:41 +00:00
if not _is_valid_language(nickname):
return False
2021-12-28 12:15:46 +00:00
forbidden_chars = ('.', ' ', '/', '?', ':', ';', '@', '#', '!')
2021-12-28 13:12:10 +00:00
for char in forbidden_chars:
if char in nickname:
return False
# this should only apply for the shared inbox
if nickname == domain:
return False
2021-12-28 14:55:45 +00:00
if _is_reserved_name(nickname):
2019-07-27 22:48:34 +00:00
return False
return True
2019-08-08 11:24:26 +00:00
2020-04-04 13:44:49 +00:00
2021-12-28 14:41:10 +00:00
def no_of_accounts(base_dir: str) -> bool:
2019-08-08 11:24:26 +00:00
"""Returns the number of accounts on the system
"""
2021-12-28 12:15:46 +00:00
account_ctr = 0
2024-05-12 12:35:26 +00:00
dir_str = data_dir(base_dir)
for _, dirs, _ in os.walk(dir_str):
2019-08-08 11:24:26 +00:00
for account in dirs:
2021-12-26 18:46:43 +00:00
if is_account_dir(account):
2021-12-28 12:15:46 +00:00
account_ctr += 1
2020-12-13 22:13:45 +00:00
break
2021-12-28 12:15:46 +00:00
return account_ctr
2019-08-10 11:31:42 +00:00
2020-04-04 13:44:49 +00:00
2021-12-28 14:41:10 +00:00
def no_of_active_accounts_monthly(base_dir: str, months: int) -> bool:
2019-11-13 15:15:08 +00:00
"""Returns the number of accounts on the system this month
"""
2021-12-28 12:15:46 +00:00
account_ctr = 0
2021-12-26 13:17:46 +00:00
curr_time = int(time.time())
2021-12-28 12:15:46 +00:00
month_seconds = int(60*60*24*30*months)
2024-05-12 12:35:26 +00:00
dir_str = data_dir(base_dir)
for _, dirs, _ in os.walk(dir_str):
2019-11-13 15:15:08 +00:00
for account in dirs:
2021-12-26 18:46:43 +00:00
if not is_account_dir(account):
continue
2021-12-28 12:15:46 +00:00
last_used_filename = \
2024-05-12 12:35:26 +00:00
dir_str + '/' + account + '/.lastUsed'
2021-12-28 12:15:46 +00:00
if not os.path.isfile(last_used_filename):
continue
2024-07-14 10:56:31 +00:00
try:
with open(last_used_filename, 'r',
encoding='utf-8') as fp_last_used:
last_used = fp_last_used.read()
if last_used.isdigit():
time_diff = curr_time - int(last_used)
if time_diff < month_seconds:
account_ctr += 1
except OSError:
print('EX: no_of_active_accounts_monthly unable to read ' +
last_used_filename)
2020-12-13 22:13:45 +00:00
break
2021-12-28 12:15:46 +00:00
return account_ctr
2019-11-13 15:15:08 +00:00
2020-04-04 13:44:49 +00:00
2024-02-19 18:48:55 +00:00
def copytree(src: str, dst: str, symlinks: str, ignore: bool):
2019-09-29 18:48:34 +00:00
"""Copy a directory
"""
for item in os.listdir(src):
2021-12-28 12:15:46 +00:00
s_dir = os.path.join(src, item)
d_dir = os.path.join(dst, item)
if os.path.isdir(s_dir):
shutil.copytree(s_dir, d_dir, symlinks, ignore)
2019-09-29 18:48:34 +00:00
else:
2021-12-28 12:15:46 +00:00
shutil.copy2(s_dir, d_dir)
2019-10-19 17:50:05 +00:00
2020-04-04 13:44:49 +00:00
2021-12-26 23:53:16 +00:00
def get_cached_post_directory(base_dir: str,
nickname: str, domain: str) -> str:
2019-10-19 17:50:05 +00:00
"""Returns the directory where the html post cache exists
"""
2021-12-26 23:53:16 +00:00
html_post_cache_dir = acct_dir(base_dir, nickname, domain) + '/postcache'
return html_post_cache_dir
2019-10-19 17:50:05 +00:00
2020-04-04 13:44:49 +00:00
2021-12-26 23:41:34 +00:00
def get_cached_post_filename(base_dir: str, nickname: str, domain: str,
post_json_object: {}) -> str:
2019-10-19 17:50:05 +00:00
"""Returns the html cache filename for the given post
"""
2021-12-28 12:15:46 +00:00
cached_post_dir = get_cached_post_directory(base_dir, nickname, domain)
if not os.path.isdir(cached_post_dir):
# print('ERROR: invalid html cache directory ' + cached_post_dir)
2019-11-29 23:04:37 +00:00
return None
2021-12-28 12:15:46 +00:00
if '@' not in cached_post_dir:
# print('ERROR: invalid html cache directory ' + cached_post_dir)
2019-11-29 23:04:37 +00:00
return None
2021-12-28 12:15:46 +00:00
cached_post_id = remove_id_ending(post_json_object['id'])
cached_post_filename = \
cached_post_dir + '/' + cached_post_id.replace('/', '#')
2021-12-27 11:05:24 +00:00
return cached_post_filename + '.html'
2019-11-24 13:46:28 +00:00
2020-04-04 13:44:49 +00:00
2021-12-28 14:24:14 +00:00
def update_recent_posts_cache(recent_posts_cache: {}, max_recent_posts: int,
post_json_object: {}, html_str: str) -> None:
"""Store recent posts in memory so that they can be quickly recalled
"""
2021-12-25 22:09:19 +00:00
if not post_json_object.get('id'):
return
2021-12-26 19:47:06 +00:00
post_id = post_json_object['id']
if '#' in post_id:
post_id = post_id.split('#', 1)[0]
2021-12-27 11:20:57 +00:00
post_id = remove_id_ending(post_id).replace('/', '#')
2021-12-26 20:01:37 +00:00
if recent_posts_cache.get('index'):
if post_id in recent_posts_cache['index']:
return
2021-12-26 20:01:37 +00:00
recent_posts_cache['index'].append(post_id)
2021-12-25 22:09:19 +00:00
post_json_object['muted'] = False
2021-12-26 20:01:37 +00:00
recent_posts_cache['json'][post_id] = json.dumps(post_json_object)
2021-12-28 14:24:14 +00:00
recent_posts_cache['html'][post_id] = html_str
2021-12-26 20:01:37 +00:00
while len(recent_posts_cache['html'].items()) > max_recent_posts:
post_id = recent_posts_cache['index'][0]
recent_posts_cache['index'].pop(0)
if recent_posts_cache['json'].get(post_id):
del recent_posts_cache['json'][post_id]
if recent_posts_cache['html'].get(post_id):
del recent_posts_cache['html'][post_id]
else:
2021-12-26 20:01:37 +00:00
recent_posts_cache['index'] = [post_id]
recent_posts_cache['json'] = {}
recent_posts_cache['html'] = {}
recent_posts_cache['json'][post_id] = json.dumps(post_json_object)
2021-12-28 14:24:14 +00:00
recent_posts_cache['html'][post_id] = html_str
2020-04-04 13:44:49 +00:00
2020-02-21 10:19:02 +00:00
2021-12-28 14:01:37 +00:00
def file_last_modified(filename: str) -> str:
2020-02-21 10:19:02 +00:00
"""Returns the date when a file was last modified
"""
2021-12-28 12:15:46 +00:00
time_val = os.path.getmtime(filename)
2023-11-20 22:27:58 +00:00
modified_time = \
datetime.datetime.fromtimestamp(time_val, datetime.timezone.utc)
2021-12-28 12:15:46 +00:00
return modified_time.strftime("%Y-%m-%dT%H:%M:%SZ")
2020-02-22 16:00:27 +00:00
2020-04-04 13:44:49 +00:00
2022-07-12 19:03:30 +00:00
def get_css(base_dir: str, css_filename: str) -> str:
2020-10-29 12:48:58 +00:00
"""Retrieves the css for a given file, or from a cache
"""
# does the css file exist?
2021-12-28 12:15:46 +00:00
if not os.path.isfile(css_filename):
2020-10-29 12:48:58 +00:00
return None
2024-07-14 10:56:31 +00:00
try:
with open(css_filename, 'r', encoding='utf-8') as fp_css:
css = fp_css.read()
return css
except OSError:
print('EX: get_css unable to read ' + css_filename)
2020-10-29 12:48:58 +00:00
return None
2021-12-28 13:07:02 +00:00
def _search_virtual_box_posts(base_dir: str, nickname: str, domain: str,
search_str: str, max_results: int,
box_name: str) -> []:
2021-05-03 22:31:06 +00:00
"""Searches through a virtual box, which is typically an index on the inbox
"""
2021-12-28 12:15:46 +00:00
index_filename = \
acct_dir(base_dir, nickname, domain) + '/' + box_name + '.index'
if box_name == 'bookmarks':
box_name = 'inbox'
path = acct_dir(base_dir, nickname, domain) + '/' + box_name
2021-05-03 22:31:06 +00:00
if not os.path.isdir(path):
return []
2021-12-28 12:15:46 +00:00
search_str = search_str.lower().strip()
2021-05-03 22:31:06 +00:00
2021-12-28 12:15:46 +00:00
if '+' in search_str:
search_words = search_str.split('+')
2022-01-08 10:58:54 +00:00
for index, _ in enumerate(search_words):
2021-12-28 12:15:46 +00:00
search_words[index] = search_words[index].strip()
print('SEARCH: ' + str(search_words))
2021-05-03 22:31:06 +00:00
else:
2021-12-28 12:15:46 +00:00
search_words = [search_str]
2021-05-03 22:31:06 +00:00
2024-12-23 18:23:47 +00:00
res: list[str] = []
2024-07-14 10:56:31 +00:00
try:
with open(index_filename, 'r', encoding='utf-8') as fp_index:
post_filename = 'start'
while post_filename:
post_filename = fp_index.readline()
if not post_filename:
break
if '.json' not in post_filename:
break
post_filename = path + '/' + post_filename.strip()
if not os.path.isfile(post_filename):
2021-05-03 22:31:06 +00:00
continue
2024-07-14 10:56:31 +00:00
with open(post_filename, 'r', encoding='utf-8') as fp_post:
data = fp_post.read().lower()
2021-05-03 22:31:06 +00:00
2024-07-14 10:56:31 +00:00
not_found = False
for keyword in search_words:
if keyword not in data:
not_found = True
break
if not_found:
continue
res.append(post_filename)
if len(res) >= max_results:
return res
except OSError as exc:
print('EX: _search_virtual_box_posts unable to read ' +
index_filename + ' ' + str(exc))
2021-05-03 22:31:06 +00:00
return res
def _get_mutuals_of_person(base_dir: str,
nickname: str, domain: str) -> []:
"""Returns the mutuals of a person
i.e. accounts which they follow and which also follow back
"""
followers = \
get_followers_list(base_dir, nickname, domain, 'followers.txt')
following = \
get_followers_list(base_dir, nickname, domain, 'following.txt')
2024-12-23 18:23:47 +00:00
mutuals: list[str] = []
for handle in following:
if handle in followers:
mutuals.append(handle)
return mutuals
def _actor_in_searchable_by(searchable_by: str, following_list: []) -> bool:
"""Does the given actor within searchable_by exist within the given list?
"""
data_actor = searchable_by.split('/followers')[0]
if '"' in data_actor:
data_actor = data_actor.split('"')[-1]
if data_actor not in following_list:
data_nickname = get_nickname_from_actor(data_actor)
data_domain, data_port = get_domain_from_actor(data_actor)
if not data_nickname or not data_domain:
return False
data_domain_full = get_full_domain(data_domain, data_port)
data_handle = data_nickname + '@' + data_domain_full
if data_handle not in following_list:
return False
return True
2021-12-28 13:07:02 +00:00
def search_box_posts(base_dir: str, nickname: str, domain: str,
search_str: str, max_results: int,
box_name='outbox') -> []:
2020-04-11 13:20:52 +00:00
"""Search your posts and return a list of the filenames
containing matching strings
2020-04-11 10:19:35 +00:00
"""
2021-12-28 12:15:46 +00:00
path = acct_dir(base_dir, nickname, domain) + '/' + box_name
2021-07-05 10:22:23 +00:00
# is this a virtual box, such as direct messages?
2020-04-11 10:19:35 +00:00
if not os.path.isdir(path):
2021-05-03 22:31:06 +00:00
if os.path.isfile(path + '.index'):
2021-12-28 13:07:02 +00:00
return _search_virtual_box_posts(base_dir, nickname, domain,
search_str, max_results, box_name)
2020-04-11 10:19:35 +00:00
return []
2021-12-28 12:15:46 +00:00
search_str = search_str.lower().strip()
2020-04-11 13:20:52 +00:00
2021-12-28 12:15:46 +00:00
if '+' in search_str:
search_words = search_str.split('+')
2022-01-08 10:58:54 +00:00
for index, _ in enumerate(search_words):
2021-12-28 12:15:46 +00:00
search_words[index] = search_words[index].strip()
print('SEARCH: ' + str(search_words))
else:
2021-12-28 12:15:46 +00:00
search_words = [search_str]
2024-12-23 18:23:47 +00:00
following_list: list[str] = []
mutuals_list: list[str] = []
check_searchable_by = False
if box_name == 'inbox':
check_searchable_by = True
2024-10-12 20:40:26 +00:00
# https://codeberg.org/fediverse/fep/
# src/branch/main/fep/268d/fep-268d.md
# create a list containing all of the handles followed
following_list = get_followers_list(base_dir, nickname, domain,
'following.txt')
# create a list containing all of the mutuals
mutuals_list = _get_mutuals_of_person(base_dir, nickname, domain)
2024-12-23 18:23:47 +00:00
res: list[str] = []
2022-05-30 20:47:23 +00:00
for root, _, fnames in os.walk(path):
2020-04-11 10:19:35 +00:00
for fname in fnames:
2021-12-28 12:15:46 +00:00
file_path = os.path.join(root, fname)
try:
2024-07-14 10:56:31 +00:00
with open(file_path, 'r', encoding='utf-8') as fp_post:
data = fp_post.read()
data_lower = data.lower()
not_found = False
for keyword in search_words:
if keyword not in data_lower:
not_found = True
break
if not_found:
continue
2020-04-11 13:14:53 +00:00
# if this is not an outbox/bookmarks search then is the
# post marked as being searchable?
2024-10-12 20:40:26 +00:00
# https://codeberg.org/fediverse/fep/
# src/branch/main/fep/268d/fep-268d.md
if check_searchable_by:
if '"searchableBy":' not in data:
continue
searchable_by = \
data.split('"searchableBy":')[1].strip()
if searchable_by.startswith('['):
searchable_by = searchable_by.split(']')[0]
if '"' in searchable_by:
searchable_by = searchable_by.split('"')[1]
elif "'" in searchable_by:
searchable_by = searchable_by.split("'")[1]
else:
continue
if '#Public' not in searchable_by:
if '/followers' in searchable_by and \
following_list:
if not _actor_in_searchable_by(searchable_by,
following_list):
continue
elif '/mutuals' in searchable_by and mutuals_list:
if not _actor_in_searchable_by(searchable_by,
mutuals_list):
continue
else:
continue
res.append(file_path)
if len(res) >= max_results:
return res
except OSError as exc:
print('EX: search_box_posts unable to read ' +
file_path + ' ' + str(exc))
2020-12-13 22:13:45 +00:00
break
2020-04-11 10:19:35 +00:00
return res
2020-05-04 18:24:30 +00:00
2021-12-29 21:55:09 +00:00
def get_file_case_insensitive(path: str) -> str:
2020-05-04 18:24:30 +00:00
"""Returns a case specific filename given a case insensitive version of it
"""
2020-08-29 11:14:19 +00:00
if os.path.isfile(path):
return path
if path != path.lower():
if os.path.isfile(path.lower()):
return path.lower()
2020-08-29 19:54:30 +00:00
return None
2020-06-06 18:16:16 +00:00
2021-12-27 23:23:07 +00:00
def undo_likes_collection_entry(recent_posts_cache: {},
base_dir: str, post_filename: str,
actor: str, domain: str, debug: bool,
post_json_object: {}) -> None:
2020-06-06 18:16:16 +00:00
"""Undoes a like for a particular actor
"""
2021-12-25 22:09:19 +00:00
if not post_json_object:
2021-12-26 23:41:34 +00:00
post_json_object = load_json(post_filename)
2021-12-25 22:09:19 +00:00
if not post_json_object:
2021-07-05 10:22:23 +00:00
return
# remove any cached version of this post so that the
# like icon is changed
2021-12-27 22:19:18 +00:00
nickname = get_nickname_from_actor(actor)
if not nickname:
return
2021-12-27 11:05:24 +00:00
cached_post_filename = \
2021-12-26 23:41:34 +00:00
get_cached_post_filename(base_dir, nickname,
domain, post_json_object)
2021-12-27 11:05:24 +00:00
if cached_post_filename:
if os.path.isfile(cached_post_filename):
try:
2021-12-27 11:05:24 +00:00
os.remove(cached_post_filename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-12-27 23:23:07 +00:00
print('EX: undo_likes_collection_entry ' +
2021-10-29 18:48:15 +00:00
'unable to delete cached post ' +
2021-12-27 11:05:24 +00:00
str(cached_post_filename))
remove_post_from_cache(post_json_object, recent_posts_cache)
2021-07-05 10:22:23 +00:00
2021-12-25 22:09:19 +00:00
if not post_json_object.get('type'):
2021-07-05 10:22:23 +00:00
return
2021-12-25 22:09:19 +00:00
if post_json_object['type'] != 'Create':
2021-07-05 10:22:23 +00:00
return
2021-12-25 22:09:19 +00:00
obj = post_json_object
2021-12-26 10:57:03 +00:00
if has_object_dict(post_json_object):
2021-12-25 22:09:19 +00:00
obj = post_json_object['object']
2021-10-14 22:43:42 +00:00
if not obj.get('likes'):
2021-07-05 10:22:23 +00:00
return
2021-10-14 22:43:42 +00:00
if not isinstance(obj['likes'], dict):
2021-07-05 10:22:23 +00:00
return
2021-10-14 22:43:42 +00:00
if not obj['likes'].get('items'):
2021-07-05 10:22:23 +00:00
return
2021-12-27 23:23:07 +00:00
total_items = 0
2021-10-14 22:43:42 +00:00
if obj['likes'].get('totalItems'):
2021-12-27 23:23:07 +00:00
total_items = obj['likes']['totalItems']
2021-12-28 12:15:46 +00:00
item_found = False
for like_item in obj['likes']['items']:
2024-08-04 19:29:10 +00:00
if not like_item.get('actor'):
continue
if like_item['actor'] != actor:
continue
if debug:
print('DEBUG: like was removed for ' + actor)
obj['likes']['items'].remove(like_item)
item_found = True
break
2021-12-28 12:15:46 +00:00
if not item_found:
2021-07-05 10:22:23 +00:00
return
2021-12-27 23:23:07 +00:00
if total_items == 1:
2021-07-05 10:22:23 +00:00
if debug:
print('DEBUG: likes was removed from post')
2021-10-14 22:43:42 +00:00
del obj['likes']
2021-07-05 10:22:23 +00:00
else:
2021-10-14 22:43:42 +00:00
itlen = len(obj['likes']['items'])
obj['likes']['totalItems'] = itlen
2020-06-06 18:16:16 +00:00
2021-12-26 23:41:34 +00:00
save_json(post_json_object, post_filename)
2020-06-06 18:16:16 +00:00
2021-12-27 23:02:50 +00:00
def undo_reaction_collection_entry(recent_posts_cache: {},
base_dir: str, post_filename: str,
actor: str, domain: str, debug: bool,
post_json_object: {},
2021-12-28 12:15:46 +00:00
emoji_content: str) -> None:
2021-11-10 12:16:03 +00:00
"""Undoes an emoji reaction for a particular actor
"""
2021-12-25 22:09:19 +00:00
if not post_json_object:
2021-12-26 23:41:34 +00:00
post_json_object = load_json(post_filename)
2021-12-25 22:09:19 +00:00
if not post_json_object:
2021-11-10 12:16:03 +00:00
return
# remove any cached version of this post so that the
# like icon is changed
2021-12-27 22:19:18 +00:00
nickname = get_nickname_from_actor(actor)
if not nickname:
return
2021-12-27 11:05:24 +00:00
cached_post_filename = \
2021-12-26 23:41:34 +00:00
get_cached_post_filename(base_dir, nickname,
domain, post_json_object)
2021-12-27 11:05:24 +00:00
if cached_post_filename:
if os.path.isfile(cached_post_filename):
2021-11-10 12:16:03 +00:00
try:
2021-12-27 11:05:24 +00:00
os.remove(cached_post_filename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-12-27 23:02:50 +00:00
print('EX: undo_reaction_collection_entry ' +
2021-11-10 12:16:03 +00:00
'unable to delete cached post ' +
2021-12-27 11:05:24 +00:00
str(cached_post_filename))
remove_post_from_cache(post_json_object, recent_posts_cache)
2021-11-10 12:16:03 +00:00
2021-12-25 22:09:19 +00:00
if not post_json_object.get('type'):
2021-11-10 12:16:03 +00:00
return
2021-12-25 22:09:19 +00:00
if post_json_object['type'] != 'Create':
2021-11-10 12:16:03 +00:00
return
2021-12-25 22:09:19 +00:00
obj = post_json_object
2021-12-26 10:57:03 +00:00
if has_object_dict(post_json_object):
2021-12-25 22:09:19 +00:00
obj = post_json_object['object']
2021-11-10 12:16:03 +00:00
if not obj.get('reactions'):
return
if not isinstance(obj['reactions'], dict):
return
if not obj['reactions'].get('items'):
return
2021-12-27 23:23:07 +00:00
total_items = 0
2021-11-10 12:16:03 +00:00
if obj['reactions'].get('totalItems'):
2021-12-27 23:23:07 +00:00
total_items = obj['reactions']['totalItems']
2021-12-28 12:15:46 +00:00
item_found = False
for like_item in obj['reactions']['items']:
2024-08-04 19:14:17 +00:00
if not like_item.get('actor'):
continue
if like_item['actor'] == actor and \
like_item['content'] == emoji_content:
if debug:
print('DEBUG: emoji reaction was removed for ' + actor)
obj['reactions']['items'].remove(like_item)
item_found = True
break
2021-12-28 12:15:46 +00:00
if not item_found:
2021-11-10 12:16:03 +00:00
return
2021-12-27 23:23:07 +00:00
if total_items == 1:
2021-11-10 12:16:03 +00:00
if debug:
print('DEBUG: emoji reaction was removed from post')
del obj['reactions']
else:
itlen = len(obj['reactions']['items'])
obj['reactions']['totalItems'] = itlen
2021-12-26 23:41:34 +00:00
save_json(post_json_object, post_filename)
2021-11-10 12:16:03 +00:00
2021-12-27 10:55:48 +00:00
def undo_announce_collection_entry(recent_posts_cache: {},
base_dir: str, post_filename: str,
actor: str, domain: str,
debug: bool) -> None:
"""Undoes an announce for a particular actor by removing it from
the "shares" collection within a post. Note that the "shares"
collection has no relation to shared items in shares.py. It's
shares of posts, not shares of physical objects.
"""
2021-12-26 23:41:34 +00:00
post_json_object = load_json(post_filename)
2021-12-25 22:09:19 +00:00
if not post_json_object:
2021-07-05 10:25:21 +00:00
return
# remove any cached version of this announce so that the announce
# icon is changed
2021-12-27 22:19:18 +00:00
nickname = get_nickname_from_actor(actor)
if not nickname:
return
2021-12-27 11:05:24 +00:00
cached_post_filename = \
2021-12-26 23:41:34 +00:00
get_cached_post_filename(base_dir, nickname, domain,
post_json_object)
2021-12-27 11:05:24 +00:00
if cached_post_filename:
if os.path.isfile(cached_post_filename):
try:
2021-12-27 11:05:24 +00:00
os.remove(cached_post_filename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-10-29 14:33:52 +00:00
if debug:
2021-12-27 10:55:48 +00:00
print('EX: undo_announce_collection_entry ' +
2021-10-29 18:48:15 +00:00
'unable to delete cached post ' +
2021-12-27 11:05:24 +00:00
str(cached_post_filename))
remove_post_from_cache(post_json_object, recent_posts_cache)
2021-07-05 10:25:21 +00:00
2021-12-25 22:09:19 +00:00
if not post_json_object.get('type'):
2021-07-05 10:25:21 +00:00
return
2021-12-25 22:09:19 +00:00
if post_json_object['type'] != 'Create':
2021-07-05 10:25:21 +00:00
return
2021-12-26 10:57:03 +00:00
if not has_object_dict(post_json_object):
2021-07-05 10:25:21 +00:00
if debug:
2021-12-25 22:09:19 +00:00
pprint(post_json_object)
2021-07-05 10:25:21 +00:00
print('DEBUG: post has no object')
return
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('shares'):
2021-07-05 10:25:21 +00:00
return
2021-12-25 22:09:19 +00:00
if not post_json_object['object']['shares'].get('items'):
2021-07-05 10:25:21 +00:00
return
2021-12-27 23:23:07 +00:00
total_items = 0
2021-12-25 22:09:19 +00:00
if post_json_object['object']['shares'].get('totalItems'):
2021-12-27 23:23:07 +00:00
total_items = post_json_object['object']['shares']['totalItems']
2021-12-28 12:15:46 +00:00
item_found = False
for announce_item in post_json_object['object']['shares']['items']:
2024-08-04 19:13:29 +00:00
if not announce_item.get('actor'):
continue
if announce_item['actor'] != actor:
continue
if debug:
print('DEBUG: Announce was removed for ' + actor)
an_it = announce_item
post_json_object['object']['shares']['items'].remove(an_it)
item_found = True
break
2021-12-28 12:15:46 +00:00
if not item_found:
2021-07-05 10:25:21 +00:00
return
2021-12-27 23:23:07 +00:00
if total_items == 1:
2021-07-05 10:25:21 +00:00
if debug:
print('DEBUG: shares (announcements) ' +
'was removed from post')
2021-12-25 22:09:19 +00:00
del post_json_object['object']['shares']
2021-07-05 10:25:21 +00:00
else:
2021-12-25 22:09:19 +00:00
itlen = len(post_json_object['object']['shares']['items'])
post_json_object['object']['shares']['totalItems'] = itlen
2021-12-26 23:41:34 +00:00
save_json(post_json_object, post_filename)
2021-12-26 23:41:34 +00:00
def update_announce_collection(recent_posts_cache: {},
base_dir: str, post_filename: str,
actor: str, nickname: str, domain: str,
debug: bool) -> None:
"""Updates the announcements collection within a post
Confusingly this is known as "shares", but isn't the
same as shared items within shares.py
It's shares of posts, not shares of physical objects.
"""
2021-12-26 23:41:34 +00:00
post_json_object = load_json(post_filename)
2021-12-25 22:09:19 +00:00
if not post_json_object:
2021-05-07 15:58:39 +00:00
return
# remove any cached version of this announce so that the announce
# icon is changed
2021-12-27 11:05:24 +00:00
cached_post_filename = \
2021-12-26 23:41:34 +00:00
get_cached_post_filename(base_dir, nickname, domain,
post_json_object)
2021-12-27 11:05:24 +00:00
if cached_post_filename:
if os.path.isfile(cached_post_filename):
2024-10-07 21:39:05 +00:00
print('update_announce_collection: removing ' +
cached_post_filename)
try:
2021-12-27 11:05:24 +00:00
os.remove(cached_post_filename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-10-29 14:33:52 +00:00
if debug:
2021-12-26 23:41:34 +00:00
print('EX: update_announce_collection ' +
2021-10-29 18:48:15 +00:00
'unable to delete cached post ' +
2021-12-27 11:05:24 +00:00
str(cached_post_filename))
remove_post_from_cache(post_json_object, recent_posts_cache)
2021-12-26 10:57:03 +00:00
if not has_object_dict(post_json_object):
2021-05-07 15:58:39 +00:00
if debug:
2021-12-25 22:09:19 +00:00
pprint(post_json_object)
2021-12-26 23:41:34 +00:00
print('DEBUG: post ' + post_filename + ' has no object')
2021-05-07 15:58:39 +00:00
return
2021-12-28 12:15:46 +00:00
post_url = remove_id_ending(post_json_object['id']) + '/shares'
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('shares'):
2021-05-07 15:58:39 +00:00
if debug:
print('DEBUG: Adding initial shares (announcements) to ' +
2021-12-28 12:15:46 +00:00
post_url)
announcements_json = {
"@context": [
'https://www.w3.org/ns/activitystreams',
'https://w3id.org/security/v1'
],
2021-12-28 12:15:46 +00:00
'id': post_url,
2021-05-07 15:58:39 +00:00
'type': 'Collection',
"totalItems": 1,
'items': [{
'type': 'Announce',
'actor': actor
}]
}
2021-12-28 12:15:46 +00:00
post_json_object['object']['shares'] = announcements_json
2021-05-07 15:58:39 +00:00
else:
2021-12-25 22:09:19 +00:00
if post_json_object['object']['shares'].get('items'):
2021-12-28 12:15:46 +00:00
shares_items = post_json_object['object']['shares']['items']
for announce_item in shares_items:
if announce_item.get('actor'):
if announce_item['actor'] == actor:
2021-05-07 15:58:39 +00:00
return
2021-12-27 23:23:07 +00:00
new_announce = {
2021-05-07 15:58:39 +00:00
'type': 'Announce',
'actor': actor
}
2021-12-27 23:23:07 +00:00
post_json_object['object']['shares']['items'].append(new_announce)
2021-12-25 22:09:19 +00:00
itlen = len(post_json_object['object']['shares']['items'])
post_json_object['object']['shares']['totalItems'] = itlen
else:
2021-05-07 15:58:39 +00:00
if debug:
print('DEBUG: shares (announcements) section of post ' +
'has no items list')
2021-05-07 15:58:39 +00:00
if debug:
print('DEBUG: saving post with shares (announcements) added')
2021-12-25 22:09:19 +00:00
pprint(post_json_object)
2021-12-26 23:41:34 +00:00
save_json(post_json_object, post_filename)
2020-06-22 16:55:19 +00:00
2021-12-26 20:52:11 +00:00
def week_day_of_month_start(month_number: int, year: int) -> int:
2020-11-09 19:41:01 +00:00
"""Gets the day number of the first day of the month
1=sun, 7=sat
"""
first_day_of_month = date_from_numbers(year, month_number, 1, 0, 0)
2021-12-26 20:52:11 +00:00
return int(first_day_of_month.strftime("%w")) + 1
2020-11-13 13:34:14 +00:00
2021-12-26 20:48:15 +00:00
def media_file_mime_type(filename: str) -> str:
2020-11-13 13:34:14 +00:00
"""Given a media filename return its mime type
"""
if '.' not in filename:
return 'image/png'
extensions = {
'json': 'application/json',
'png': 'image/png',
'jpg': 'image/jpeg',
2022-02-06 11:04:49 +00:00
'jxl': 'image/jxl',
2020-11-13 13:34:14 +00:00
'jpeg': 'image/jpeg',
'gif': 'image/gif',
2021-01-11 22:27:57 +00:00
'svg': 'image/svg+xml',
2020-11-13 13:34:14 +00:00
'webp': 'image/webp',
'avif': 'image/avif',
2022-10-31 17:26:31 +00:00
'heic': 'image/heic',
2021-12-16 23:47:01 +00:00
'ico': 'image/x-icon',
2020-11-13 13:34:14 +00:00
'mp3': 'audio/mpeg',
'ogg': 'audio/ogg',
2022-10-31 11:05:11 +00:00
'audio/wav': 'wav',
'audio/x-wav': 'wav',
'audio/x-pn-wave': 'wav',
'wav': 'audio/vnd.wave',
2022-04-18 13:21:45 +00:00
'opus': 'audio/opus',
2022-10-20 19:37:59 +00:00
'spx': 'audio/speex',
2021-08-03 09:09:04 +00:00
'flac': 'audio/flac',
2020-11-13 13:34:14 +00:00
'mp4': 'video/mp4',
'ogv': 'video/ogv'
}
2021-12-26 20:52:11 +00:00
file_ext = filename.split('.')[-1]
if not extensions.get(file_ext):
2020-11-13 13:34:14 +00:00
return 'image/png'
2021-12-26 20:52:11 +00:00
return extensions[file_ext]
def time_days_ago(datestr: str) -> int:
"""returns the number of days ago for the given date
"""
date1 = \
date_from_string_format(datestr,
["%Y-%m-%dT%H:%M:%S%z"])
if not date1:
return 0
2024-02-16 22:50:23 +00:00
date_diff = date_utcnow() - date1
return date_diff.days
2021-12-26 20:39:35 +00:00
def camel_case_split(text: str) -> str:
""" Splits CamelCase into "Camel Case"
"""
matches = re.finditer('.+?(?:(?<=[a-z])(?=[A-Z])|' +
'(?<=[A-Z])(?=[A-Z][a-z])|$)', text)
if not matches:
return text
2021-12-28 13:07:02 +00:00
result_str = ''
for word in matches:
2021-12-28 13:07:02 +00:00
result_str += word.group(0) + ' '
return result_str.strip()
2021-03-05 19:00:37 +00:00
2021-12-29 10:39:46 +00:00
def convert_to_snake_case(text: str) -> str:
"""Convert camel case to snake case
"""
return camel_case_split(text).lower().replace(' ', '_')
def _convert_to_camel_case(text: str) -> str:
"""Convers a snake case string to camel case
"""
if '_' not in text:
return text
words = text.split('_')
result = ''
ctr = 0
for wrd in words:
if ctr > 0:
result += wrd.title()
else:
result = wrd.lower()
ctr += 1
return result
2021-12-26 20:20:36 +00:00
def reject_post_id(base_dir: str, nickname: str, domain: str,
2024-10-23 12:20:07 +00:00
post_id: str, recent_posts_cache: {},
debug: bool) -> None:
""" Marks the given post as rejected,
for example an announce which is too old
2021-03-05 19:00:37 +00:00
"""
2021-12-26 20:43:03 +00:00
post_filename = locate_post(base_dir, nickname, domain, post_id)
if not post_filename:
2021-03-05 19:00:37 +00:00
return
2024-10-23 13:02:25 +00:00
post_url = None
2021-12-26 20:01:37 +00:00
if recent_posts_cache.get('index'):
# if this is a full path then remove the directories
2021-12-26 20:43:03 +00:00
index_filename = post_filename
if '/' in post_filename:
index_filename = post_filename.split('/')[-1]
# filename of the post without any extension or path
# This should also correspond to any index entry in
# the posts cache
2022-06-21 11:58:50 +00:00
post_url = remove_eol(index_filename)
2021-12-28 12:15:46 +00:00
post_url = post_url.replace('.json', '').strip()
2021-12-28 12:15:46 +00:00
if post_url in recent_posts_cache['index']:
if recent_posts_cache['json'].get(post_url):
del recent_posts_cache['json'][post_url]
if recent_posts_cache['html'].get(post_url):
del recent_posts_cache['html'][post_url]
try:
with open(post_filename + '.reject', 'w+',
2024-07-14 11:09:24 +00:00
encoding='utf-8') as fp_reject:
fp_reject.write('\n')
except OSError:
print('EX: reject_post_id unable to write ' +
post_filename + '.reject')
2024-10-23 12:20:07 +00:00
# if the post is in the inbox index then remove it
index_file = \
acct_dir(base_dir, nickname, domain) + '/inbox.index'
2024-10-23 13:02:25 +00:00
if not post_url:
2024-10-23 14:13:02 +00:00
index_filename = post_filename
if '/' in post_filename:
index_filename = post_filename.split('/')[-1]
2024-10-23 13:02:25 +00:00
post_url = remove_eol(index_filename)
post_url = post_url.replace('.json', '').strip()
2024-10-23 12:45:14 +00:00
post_url2 = post_url.replace('/', '#') + '.json'
remove_post_from_index(post_url2, debug, index_file)
2024-10-23 12:20:07 +00:00
2021-12-26 19:12:02 +00:00
def load_translations_from_file(base_dir: str, language: str) -> ({}, str):
2021-03-18 17:27:46 +00:00
"""Returns the translations dictionary
"""
2021-12-25 16:17:53 +00:00
if not os.path.isdir(base_dir + '/translations'):
2021-03-18 17:27:46 +00:00
print('ERROR: translations directory not found')
2021-10-29 14:33:52 +00:00
return None, None
2021-03-18 17:27:46 +00:00
if not language:
2023-11-01 20:34:28 +00:00
system_language = locale.getlocale()[0]
2021-03-18 17:27:46 +00:00
else:
2021-12-25 23:03:28 +00:00
system_language = language
if not system_language:
system_language = 'en'
if '_' in system_language:
system_language = system_language.split('_')[0]
while '/' in system_language:
system_language = system_language.split('/')[1]
if '.' in system_language:
system_language = system_language.split('.')[0]
2021-12-26 19:12:02 +00:00
translations_file = base_dir + '/translations/' + \
2021-12-25 23:03:28 +00:00
system_language + '.json'
2021-12-26 19:12:02 +00:00
if not os.path.isfile(translations_file):
2021-12-25 23:03:28 +00:00
system_language = 'en'
2021-12-26 19:12:02 +00:00
translations_file = base_dir + '/translations/' + \
2021-12-25 23:03:28 +00:00
system_language + '.json'
2021-12-26 19:12:02 +00:00
return load_json(translations_file), system_language
2021-04-22 09:27:20 +00:00
2021-12-26 19:09:04 +00:00
def dm_allowed_from_domain(base_dir: str,
nickname: str, domain: str,
sending_actor_domain: str) -> bool:
2021-04-22 09:27:20 +00:00
"""When a DM is received and the .followDMs flag file exists
Then optionally some domains can be specified as allowed,
regardless of individual follows.
i.e. Mostly you only want DMs from followers, but there are
a few particular instances that you trust
"""
2021-12-26 19:09:04 +00:00
dm_allowed_instances_file = \
2021-12-26 12:02:29 +00:00
acct_dir(base_dir, nickname, domain) + '/dmAllowedInstances.txt'
2021-12-26 19:09:04 +00:00
if not os.path.isfile(dm_allowed_instances_file):
2021-04-22 09:27:20 +00:00
return False
2022-06-10 09:24:11 +00:00
if text_in_file(sending_actor_domain + '\n', dm_allowed_instances_file):
2021-04-22 09:27:20 +00:00
return True
return False
2021-05-16 15:10:39 +00:00
2021-12-26 19:01:36 +00:00
def get_occupation_skills(actor_json: {}) -> []:
2021-05-16 15:10:39 +00:00
"""Returns the list of skills for an actor
"""
2021-12-26 10:29:52 +00:00
if 'hasOccupation' not in actor_json:
2021-05-16 15:10:39 +00:00
return []
2021-12-26 10:29:52 +00:00
if not isinstance(actor_json['hasOccupation'], list):
2021-05-16 15:10:39 +00:00
return []
2021-12-26 19:01:36 +00:00
for occupation_item in actor_json['hasOccupation']:
if not isinstance(occupation_item, dict):
2021-05-16 15:10:39 +00:00
continue
2021-12-26 19:01:36 +00:00
if not occupation_item.get('@type'):
2021-05-16 15:10:39 +00:00
continue
2021-12-26 19:01:36 +00:00
if not occupation_item['@type'] == 'Occupation':
2021-05-16 15:10:39 +00:00
continue
2021-12-26 19:01:36 +00:00
if not occupation_item.get('skills'):
2021-05-16 15:10:39 +00:00
continue
2021-12-26 19:01:36 +00:00
if isinstance(occupation_item['skills'], list):
return occupation_item['skills']
2021-12-28 12:15:46 +00:00
if isinstance(occupation_item['skills'], str):
2021-12-26 19:01:36 +00:00
return [occupation_item['skills']]
2021-05-16 15:10:39 +00:00
break
return []
2021-12-26 18:58:06 +00:00
def get_occupation_name(actor_json: {}) -> str:
2021-05-16 15:10:39 +00:00
"""Returns the occupation name an actor
"""
2021-12-26 10:29:52 +00:00
if not actor_json.get('hasOccupation'):
2021-05-16 15:10:39 +00:00
return ""
2021-12-26 10:29:52 +00:00
if not isinstance(actor_json['hasOccupation'], list):
2021-05-16 15:10:39 +00:00
return ""
2021-12-26 18:58:06 +00:00
for occupation_item in actor_json['hasOccupation']:
if not isinstance(occupation_item, dict):
2021-05-16 15:10:39 +00:00
continue
2021-12-26 18:58:06 +00:00
if not occupation_item.get('@type'):
2021-05-16 15:10:39 +00:00
continue
2021-12-26 18:58:06 +00:00
if occupation_item['@type'] != 'Occupation':
2021-05-16 15:10:39 +00:00
continue
2021-12-26 18:58:06 +00:00
if not occupation_item.get('name'):
2021-05-16 15:10:39 +00:00
continue
2021-12-26 18:58:06 +00:00
if isinstance(occupation_item['name'], str):
return occupation_item['name']
2021-05-16 15:10:39 +00:00
break
return ""
2021-12-26 18:55:07 +00:00
def set_occupation_name(actor_json: {}, name: str) -> bool:
2021-05-16 15:10:39 +00:00
"""Sets the occupation name of an actor
"""
2021-12-26 10:29:52 +00:00
if not actor_json.get('hasOccupation'):
2021-05-16 15:10:39 +00:00
return False
2021-12-26 10:29:52 +00:00
if not isinstance(actor_json['hasOccupation'], list):
2021-05-16 15:10:39 +00:00
return False
2022-01-08 10:58:54 +00:00
for index, _ in enumerate(actor_json['hasOccupation']):
2021-12-26 18:55:07 +00:00
occupation_item = actor_json['hasOccupation'][index]
if not isinstance(occupation_item, dict):
2021-05-16 15:10:39 +00:00
continue
2021-12-26 18:55:07 +00:00
if not occupation_item.get('@type'):
2021-05-16 15:10:39 +00:00
continue
2021-12-26 18:55:07 +00:00
if occupation_item['@type'] != 'Occupation':
2021-05-16 15:10:39 +00:00
continue
2021-12-26 18:55:07 +00:00
occupation_item['name'] = name
2021-05-16 15:10:39 +00:00
return True
return False
2021-12-26 18:50:59 +00:00
def set_occupation_skills_list(actor_json: {}, skills_list: []) -> bool:
2021-05-16 15:10:39 +00:00
"""Sets the occupation skills for an actor
"""
2021-12-26 10:29:52 +00:00
if 'hasOccupation' not in actor_json:
2021-05-16 15:10:39 +00:00
return False
2021-12-26 10:29:52 +00:00
if not isinstance(actor_json['hasOccupation'], list):
2021-05-16 15:10:39 +00:00
return False
2022-01-08 10:58:54 +00:00
for index, _ in enumerate(actor_json['hasOccupation']):
2021-12-26 18:50:59 +00:00
occupation_item = actor_json['hasOccupation'][index]
if not isinstance(occupation_item, dict):
2021-05-16 15:10:39 +00:00
continue
2021-12-26 18:50:59 +00:00
if not occupation_item.get('@type'):
2021-05-16 15:10:39 +00:00
continue
2021-12-26 18:50:59 +00:00
if occupation_item['@type'] != 'Occupation':
2021-05-16 15:10:39 +00:00
continue
2021-12-26 18:50:59 +00:00
occupation_item['skills'] = skills_list
2021-05-16 15:10:39 +00:00
return True
return False
2021-12-26 18:40:10 +00:00
def permitted_dir(path: str) -> bool:
2021-06-07 19:18:13 +00:00
"""These are special paths which should not be accessible
directly via GET or POST
"""
if path.startswith('/wfendpoints') or \
path.startswith('/keys') or \
path.startswith('/accounts'):
return False
return True
2021-06-20 15:45:29 +00:00
2021-12-26 18:37:07 +00:00
def user_agent_domain(user_agent: str, debug: bool) -> str:
2021-06-20 15:45:29 +00:00
"""If the User-Agent string contains a domain
then return it
"""
2022-02-03 12:30:57 +00:00
if 'https://' not in user_agent and 'http://' not in user_agent:
2021-06-20 15:45:29 +00:00
return None
2022-02-03 12:30:57 +00:00
agent_domain = ''
if 'https://' in user_agent:
agent_domain = user_agent.split('https://')[1].strip()
else:
agent_domain = user_agent.split('http://')[1].strip()
2021-12-26 18:37:07 +00:00
if '/' in agent_domain:
agent_domain = agent_domain.split('/')[0]
if ')' in agent_domain:
agent_domain = agent_domain.split(')')[0].strip()
if ' ' in agent_domain:
agent_domain = agent_domain.replace(' ', '')
if ';' in agent_domain:
agent_domain = agent_domain.replace(';', '')
if '.' not in agent_domain:
2021-06-20 15:45:29 +00:00
return None
if debug:
2021-12-26 18:37:07 +00:00
print('User-Agent Domain: ' + agent_domain)
return agent_domain
2021-12-26 18:32:02 +00:00
def get_alt_path(actor: str, domain_full: str, calling_domain: str) -> str:
2021-06-26 11:16:41 +00:00
"""Returns alternate path from the actor
eg. https://clearnetdomain/path becomes http://oniondomain/path
"""
2021-12-26 18:29:39 +00:00
post_actor = actor
if calling_domain not in actor and domain_full in actor:
if calling_domain.endswith('.onion') or \
calling_domain.endswith('.i2p'):
post_actor = \
'http://' + calling_domain + actor.split(domain_full)[1]
print('Changed POST domain from ' + actor + ' to ' + post_actor)
return post_actor
2021-06-26 11:16:41 +00:00
2021-12-26 18:22:20 +00:00
def get_actor_property_url(actor_json: {}, property_name: str) -> str:
2021-06-26 11:16:41 +00:00
"""Returns a url property from an actor
"""
2021-12-26 10:29:52 +00:00
if not actor_json.get('attachment'):
2021-06-26 11:16:41 +00:00
return ''
2021-12-26 18:19:58 +00:00
property_name = property_name.lower()
2021-12-26 10:32:45 +00:00
for property_value in actor_json['attachment']:
2022-05-11 16:10:38 +00:00
name_value = None
if property_value.get('name'):
name_value = property_value['name']
elif property_value.get('schema:name'):
name_value = property_value['schema:name']
if not name_value:
2021-06-26 11:16:41 +00:00
continue
2022-05-11 16:10:38 +00:00
if not name_value.lower().startswith(property_name):
2021-06-26 11:16:41 +00:00
continue
2021-12-26 10:32:45 +00:00
if not property_value.get('type'):
2021-06-26 11:16:41 +00:00
continue
prop_value_name, _ = \
get_attachment_property_value(property_value)
if not prop_value_name:
2021-06-26 11:16:41 +00:00
continue
2022-05-11 16:16:34 +00:00
if not property_value['type'].endswith('PropertyValue'):
2021-06-26 11:16:41 +00:00
continue
property_value['value'] = property_value[prop_value_name].strip()
2021-12-27 17:20:01 +00:00
prefixes = get_protocol_prefixes()
2021-12-28 12:15:46 +00:00
prefix_found = False
2024-08-12 20:39:31 +00:00
prop_value = remove_html(property_value[prop_value_name])
2021-06-26 11:16:41 +00:00
for prefix in prefixes:
2024-08-12 20:39:31 +00:00
if prop_value.startswith(prefix):
2021-12-28 12:15:46 +00:00
prefix_found = True
2021-06-26 11:16:41 +00:00
break
2021-12-28 12:15:46 +00:00
if not prefix_found:
2021-06-26 11:16:41 +00:00
continue
2024-08-12 20:39:31 +00:00
if '.' not in prop_value:
2021-06-26 11:16:41 +00:00
continue
2024-08-12 20:39:31 +00:00
if ' ' in prop_value:
2021-06-26 11:16:41 +00:00
continue
2024-08-12 20:39:31 +00:00
if ',' in prop_value:
2021-06-26 11:16:41 +00:00
continue
2024-08-12 20:39:31 +00:00
return prop_value
2021-06-26 11:16:41 +00:00
return ''
2021-06-26 14:21:24 +00:00
2021-12-26 18:17:37 +00:00
def remove_domain_port(domain: str) -> str:
2021-06-26 14:21:24 +00:00
"""If the domain has a port appended then remove it
eg. mydomain.com:80 becomes mydomain.com
"""
if ':' in domain:
if domain.startswith('did:'):
return domain
domain = domain.split(':')[0]
return domain
2021-12-26 18:14:21 +00:00
def get_port_from_domain(domain: str) -> int:
2021-06-26 14:21:24 +00:00
"""If the domain has a port number appended then return it
eg. mydomain.com:80 returns 80
"""
if ':' in domain:
if domain.startswith('did:'):
return None
2021-12-28 12:15:46 +00:00
port_str = domain.split(':')[1]
if port_str.isdigit():
return int(port_str)
2021-06-26 14:21:24 +00:00
return None
2021-07-06 09:44:45 +00:00
2021-12-26 18:10:53 +00:00
def valid_url_prefix(url: str) -> bool:
2021-07-06 09:44:45 +00:00
"""Does the given url have a valid prefix?
"""
if '/' not in url:
return False
prefixes = ('https:', 'http:', 'hyper:', 'i2p:', 'gnunet:')
for pre in prefixes:
if url.startswith(pre):
return True
return False
2024-04-16 13:47:21 +00:00
def valid_password(password: str, debug: bool) -> bool:
"""Returns true if the given password contains valid characters and
is within a range of lengths
2021-07-20 20:39:26 +00:00
"""
2024-04-16 13:47:21 +00:00
if len(password) < 8 or len(password) > 1024:
if debug:
print('WARN: password length out of range (8-255): ' +
str(len(password)))
return False
# check for trailing end of line or carriage returns
if remove_eol(password) != password:
2021-07-20 20:39:26 +00:00
return False
return True
2021-07-25 13:09:39 +00:00
2021-12-26 18:01:02 +00:00
def date_string_to_seconds(date_str: str) -> int:
2021-07-28 09:35:21 +00:00
"""Converts a date string (eg "published") into seconds since epoch
"""
2023-11-20 22:27:58 +00:00
expiry_time = \
date_from_string_format(date_str, ['%Y-%m-%dT%H:%M:%S%z'])
if not expiry_time:
2021-12-26 18:01:02 +00:00
print('EX: date_string_to_seconds unable to parse date ' +
str(date_str))
2021-07-28 09:35:21 +00:00
return None
2023-11-20 22:27:58 +00:00
return _datetime_to_timestamp(expiry_time)
2021-07-28 09:35:21 +00:00
2021-12-26 17:55:38 +00:00
def date_seconds_to_string(date_sec: int) -> str:
2021-07-28 09:35:21 +00:00
"""Converts a date in seconds since epoch to a string
"""
2023-11-20 22:27:58 +00:00
this_date = \
datetime.datetime.fromtimestamp(date_sec, datetime.timezone.utc)
if not this_date.tzinfo:
this_date = this_date.replace(tzinfo=datetime.timezone.utc)
this_date_tz = this_date.astimezone(datetime.timezone.utc)
return this_date_tz.strftime("%Y-%m-%dT%H:%M:%SZ")
2021-07-30 16:06:34 +00:00
2021-12-26 17:29:09 +00:00
def get_currencies() -> {}:
2021-08-07 17:44:25 +00:00
"""Returns a dictionary of currencies
"""
return {
"CA$": "CAD",
"J$": "JMD",
"£": "GBP",
"": "EUR",
"؋": "AFN",
"ƒ": "AWG",
"": "AZN",
"Br": "BYN",
"BZ$": "BZD",
"$b": "BOB",
"KM": "BAM",
"P": "BWP",
"лв": "BGN",
"R$": "BRL",
"": "KHR",
"$U": "UYU",
"RD$": "DOP",
"$": "USD",
"": "CRC",
"kn": "HRK",
"": "CUP",
"": "CZK",
"kr": "NOK",
"¢": "GHS",
"Q": "GTQ",
"L": "HNL",
"Ft": "HUF",
"Rp": "IDR",
"": "INR",
"": "IRR",
"": "ILS",
"¥": "JPY",
"": "KRW",
"": "LAK",
"ден": "MKD",
"RM": "MYR",
"": "MUR",
"": "MNT",
"MT": "MZN",
"C$": "NIO",
"": "NGN",
"Gs": "PYG",
"": "PLN",
"lei": "RON",
"": "RUB",
"Дин": "RSD",
"S": "SOS",
"R": "ZAR",
"CHF": "CHF",
"NT$": "TWD",
"฿": "THB",
"TT$": "TTD",
"": "UAH",
2023-08-20 12:20:34 +00:00
"Bs": "VEB",
2021-08-07 17:44:25 +00:00
"": "VND",
"Z$": "ZQD"
}
2021-08-08 11:16:18 +00:00
2021-12-26 17:26:55 +00:00
def get_supported_languages(base_dir: str) -> []:
2021-08-08 11:16:18 +00:00
"""Returns a list of supported languages
"""
2021-12-26 17:26:55 +00:00
translations_dir = base_dir + '/translations'
2024-12-23 18:23:47 +00:00
languages_str: list[str] = []
2021-12-26 17:26:55 +00:00
for _, _, files in os.walk(translations_dir):
2021-12-28 12:15:46 +00:00
for fname in files:
if not fname.endswith('.json'):
2021-08-08 11:16:18 +00:00
continue
2021-12-28 12:15:46 +00:00
lang = fname.split('.')[0]
2021-08-08 11:16:18 +00:00
if len(lang) == 2:
2021-12-26 17:26:55 +00:00
languages_str.append(lang)
2021-08-08 11:16:18 +00:00
break
2021-12-26 17:26:55 +00:00
return languages_str
2021-12-26 17:18:34 +00:00
def get_category_types(base_dir: str) -> []:
"""Returns the list of ontologies
"""
2021-12-26 17:18:34 +00:00
ontology_dir = base_dir + '/ontology'
2024-12-23 18:23:47 +00:00
categories: list[str] = []
2021-12-26 17:18:34 +00:00
for _, _, files in os.walk(ontology_dir):
2021-12-28 12:15:46 +00:00
for fname in files:
if not fname.endswith('.json'):
continue
2021-12-28 12:15:46 +00:00
if '#' in fname or '~' in fname:
2021-08-08 20:05:40 +00:00
continue
2021-12-28 12:15:46 +00:00
if fname.startswith('custom'):
2021-08-08 19:55:54 +00:00
continue
2021-12-28 12:15:46 +00:00
ontology_filename = fname.split('.')[0]
2021-12-26 17:18:34 +00:00
if 'Types' in ontology_filename:
categories.append(ontology_filename.replace('Types', ''))
break
return categories
2021-12-26 17:24:00 +00:00
def get_shares_files_list() -> []:
"""Returns the possible shares files
"""
return ('shares', 'wanted')
2021-12-26 17:21:37 +00:00
def replace_users_with_at(actor: str) -> str:
""" https://domain/users/nick becomes https://domain/@nick
"""
2021-12-26 17:15:04 +00:00
u_paths = get_user_paths()
for path in u_paths:
if path in actor:
2023-04-23 15:55:48 +00:00
if '/@/' not in actor:
actor = actor.replace(path, '/@')
break
return actor
2024-01-10 11:04:41 +00:00
def get_actor_from_post(post_json_object: {}) -> str:
"""Gets the actor url from the given post
"""
if not post_json_object.get('actor'):
return ''
actor_id = None
if isinstance(post_json_object['actor'], str):
# conventionally the actor is just a string url
actor_id = post_json_object['actor']
elif isinstance(post_json_object['actor'], dict):
# in pixelfed/friendica the actor is sometimes a dict
# with a lot of properties
if post_json_object['actor'].get('id'):
if isinstance(post_json_object['actor']['id'], str):
actor_id = post_json_object['actor']['id']
if actor_id:
# looks vaguely like a url
2024-01-27 17:04:21 +00:00
if resembles_url(actor_id):
2024-01-10 11:04:41 +00:00
return actor_id
return ''
2021-12-26 17:15:04 +00:00
def has_actor(post_json_object: {}, debug: bool) -> bool:
"""Does the given post have an actor?
"""
2021-12-25 22:09:19 +00:00
if post_json_object.get('actor'):
2024-01-09 16:59:23 +00:00
actor_url = get_actor_from_post(post_json_object)
if '#' in actor_url or not actor_url:
return False
return True
if debug:
2021-12-25 22:09:19 +00:00
if post_json_object.get('type'):
msg = post_json_object['type'] + ' has missing actor'
if post_json_object.get('id'):
msg += ' ' + post_json_object['id']
print(msg)
return False
2021-10-13 10:11:02 +00:00
2022-04-09 15:11:22 +00:00
def has_object_string_type(post_json_object: {}, debug: bool) -> bool:
2021-10-13 10:37:52 +00:00
"""Does the given post have a type field within an object dict?
2021-10-13 10:11:02 +00:00
"""
2021-12-26 10:57:03 +00:00
if not has_object_dict(post_json_object):
2021-10-13 10:11:02 +00:00
if debug:
2022-04-09 15:11:22 +00:00
print('has_object_string_type no object found')
2021-10-13 10:11:02 +00:00
return False
2021-12-25 22:09:19 +00:00
if post_json_object['object'].get('type'):
if isinstance(post_json_object['object']['type'], str):
2021-10-13 10:11:02 +00:00
return True
2023-04-23 09:36:20 +00:00
if post_json_object.get('type'):
print('DEBUG: ' + post_json_object['type'] +
' type within object is not a string ' +
str(post_json_object))
2021-10-13 10:11:02 +00:00
if debug:
2021-12-25 22:09:19 +00:00
print('No type field within object ' + post_json_object['id'])
2021-10-13 10:11:02 +00:00
return False
2021-10-13 10:37:52 +00:00
2021-12-26 15:54:46 +00:00
def has_object_string_object(post_json_object: {}, debug: bool) -> bool:
2021-10-13 10:37:52 +00:00
"""Does the given post have an object string field within an object dict?
"""
2021-12-26 10:57:03 +00:00
if not has_object_dict(post_json_object):
2021-10-13 10:37:52 +00:00
if debug:
2022-04-09 15:11:22 +00:00
print('has_object_string_type no object found')
2021-10-13 10:37:52 +00:00
return False
2021-12-25 22:09:19 +00:00
if post_json_object['object'].get('object'):
if isinstance(post_json_object['object']['object'], str):
2021-10-13 10:37:52 +00:00
return True
2022-05-30 20:47:23 +00:00
if debug:
2021-12-25 22:09:19 +00:00
if post_json_object.get('type'):
print('DEBUG: ' + post_json_object['type'] +
2021-10-13 10:37:52 +00:00
' object within dict is not a string')
if debug:
2021-12-25 22:09:19 +00:00
print('No object field within dict ' + post_json_object['id'])
2021-10-13 10:37:52 +00:00
return False
2021-10-13 11:15:06 +00:00
2021-12-26 17:12:07 +00:00
def has_object_string(post_json_object: {}, debug: bool) -> bool:
2021-10-13 11:15:06 +00:00
"""Does the given post have an object string field?
"""
2021-12-25 22:09:19 +00:00
if post_json_object.get('object'):
if isinstance(post_json_object['object'], str):
2021-10-13 11:15:06 +00:00
return True
if debug:
2021-12-25 22:09:19 +00:00
if post_json_object.get('type'):
print('DEBUG: ' + post_json_object['type'] +
2021-10-13 11:15:06 +00:00
' object is not a string')
if debug:
2021-12-25 22:09:19 +00:00
print('No object field within post ' + post_json_object['id'])
2021-10-13 11:15:06 +00:00
return False
2021-11-03 11:25:26 +00:00
2021-12-26 16:59:38 +00:00
def get_new_post_endpoints() -> []:
2021-11-03 11:25:26 +00:00
"""Returns a list of endpoints for new posts
"""
return (
'newpost', 'newblog', 'newunlisted', 'newfollowers', 'newdm',
2021-11-03 11:32:38 +00:00
'newreminder', 'newreport', 'newquestion', 'newshare', 'newwanted',
2024-01-01 23:45:54 +00:00
'editblogpost', 'newreadingstatus'
2021-11-03 11:25:26 +00:00
)
2021-12-17 12:01:54 +00:00
2021-12-26 16:59:38 +00:00
def get_fav_filename_from_url(base_dir: str, favicon_url: str) -> str:
2021-12-17 12:01:54 +00:00
"""Returns the cached filename for a favicon based upon its url
"""
2021-12-26 16:59:38 +00:00
if '://' in favicon_url:
favicon_url = favicon_url.split('://')[1]
if '/favicon.' in favicon_url:
favicon_url = favicon_url.replace('/favicon.', '.')
return base_dir + '/favicons/' + favicon_url.replace('/', '-')
2022-01-13 15:10:41 +00:00
def valid_hash_tag(hashtag: str) -> bool:
"""Returns true if the give hashtag contains valid characters
"""
# long hashtags are not valid
if len(hashtag) >= 32:
return False
2022-05-20 10:17:53 +00:00
# numbers are not permitted to be hashtags
if hashtag.isdigit():
return False
2022-01-13 15:10:41 +00:00
if set(hashtag).issubset(VALID_HASHTAG_CHARS):
return True
if _is_valid_language(hashtag):
return True
return False
2022-02-25 19:12:40 +00:00
def convert_published_to_local_timezone(published, timezone: str) -> str:
"""Converts a post published time into local time
"""
from_zone = tz.gettz('UTC')
if timezone:
2022-02-25 21:00:53 +00:00
try:
to_zone = tz.gettz(timezone)
except BaseException:
pass
if not timezone:
return published
2022-02-25 19:12:40 +00:00
utc = published.replace(tzinfo=from_zone)
local_time = utc.astimezone(to_zone)
return local_time
def load_account_timezones(base_dir: str) -> {}:
"""Returns a dictionary containing the preferred timezone for each account
"""
account_timezone = {}
2024-05-12 12:35:26 +00:00
dir_str = data_dir(base_dir)
for _, dirs, _ in os.walk(dir_str):
2022-02-25 19:12:40 +00:00
for acct in dirs:
if '@' not in acct:
continue
if acct.startswith('inbox@') or acct.startswith('Actor@'):
continue
2024-05-12 12:35:26 +00:00
acct_directory = os.path.join(dir_str, acct)
2022-05-30 20:47:23 +00:00
tz_filename = acct_directory + '/timezone.txt'
2022-02-25 19:12:40 +00:00
if not os.path.isfile(tz_filename):
continue
timezone = None
try:
with open(tz_filename, 'r', encoding='utf-8') as fp_timezone:
timezone = fp_timezone.read().strip()
except OSError:
print('EX: load_account_timezones unable to read ' +
tz_filename)
2022-02-25 19:12:40 +00:00
if timezone:
nickname = acct.split('@')[0]
account_timezone[nickname] = timezone
break
return account_timezone
2022-03-24 13:14:41 +00:00
def load_bold_reading(base_dir: str) -> {}:
"""Returns a dictionary containing the bold reading status for each account
"""
bold_reading = {}
2024-05-12 12:35:26 +00:00
dir_str = data_dir(base_dir)
for _, dirs, _ in os.walk(dir_str):
2022-03-24 13:14:41 +00:00
for acct in dirs:
if '@' not in acct:
continue
if acct.startswith('inbox@') or acct.startswith('Actor@'):
continue
2024-05-12 12:35:26 +00:00
bold_reading_filename = dir_str + '/' + acct + '/.boldReading'
2022-03-24 13:14:41 +00:00
if os.path.isfile(bold_reading_filename):
nickname = acct.split('@')[0]
bold_reading[nickname] = True
2022-05-30 20:47:23 +00:00
break
2022-03-24 13:14:41 +00:00
return bold_reading
def load_hide_follows(base_dir: str) -> {}:
"""Returns a dictionary containing the hide follows status for each account
"""
hide_follows = {}
2024-05-12 12:35:26 +00:00
dir_str = data_dir(base_dir)
for _, dirs, _ in os.walk(dir_str):
for acct in dirs:
if '@' not in acct:
continue
if acct.startswith('inbox@') or acct.startswith('Actor@'):
continue
2024-05-12 12:35:26 +00:00
hide_follows_filename = dir_str + '/' + acct + '/.hideFollows'
if os.path.isfile(hide_follows_filename):
nickname = acct.split('@')[0]
hide_follows[nickname] = True
break
return hide_follows
2022-02-25 19:12:40 +00:00
def get_account_timezone(base_dir: str, nickname: str, domain: str) -> str:
"""Returns the timezone for the given account
"""
tz_filename = \
2022-12-18 13:58:48 +00:00
acct_dir(base_dir, nickname, domain) + '/timezone.txt'
2022-02-25 19:12:40 +00:00
if not os.path.isfile(tz_filename):
return None
timezone = None
try:
with open(tz_filename, 'r', encoding='utf-8') as fp_timezone:
timezone = fp_timezone.read().strip()
except OSError:
print('EX: get_account_timezone unable to read ' + tz_filename)
2022-02-25 19:12:40 +00:00
return timezone
2022-02-25 19:57:31 +00:00
def set_account_timezone(base_dir: str, nickname: str, domain: str,
timezone: str) -> None:
"""Sets the timezone for the given account
"""
tz_filename = \
2022-12-18 13:58:48 +00:00
acct_dir(base_dir, nickname, domain) + '/timezone.txt'
2022-02-25 19:57:31 +00:00
timezone = timezone.strip()
try:
with open(tz_filename, 'w+', encoding='utf-8') as fp_timezone:
fp_timezone.write(timezone)
except OSError:
print('EX: set_account_timezone unable to write ' +
tz_filename)
2024-01-28 19:11:55 +00:00
def _is_onion_request(calling_domain: str, referer_domain: str,
domain: str, onion_domain: str) -> bool:
"""Do the given domains indicate that this is a request
from an onion instance
"""
if not onion_domain:
return False
if domain == onion_domain:
return True
if calling_domain.endswith('.onion'):
return True
if not referer_domain:
return False
if referer_domain.endswith('.onion'):
return True
return False
2024-01-28 19:11:55 +00:00
def _is_i2p_request(calling_domain: str, referer_domain: str,
domain: str, i2p_domain: str) -> bool:
"""Do the given domains indicate that this is a request
from an i2p instance
"""
if not i2p_domain:
return False
if domain == i2p_domain:
return True
if calling_domain.endswith('.i2p'):
return True
if not referer_domain:
return False
if referer_domain.endswith('.i2p'):
return True
return False
def disallow_announce(content: str, attachment: [], capabilities: {}) -> bool:
"""Are announces/boosts not allowed for the given post?
"""
# pixelfed style capabilities
if capabilities:
if 'announce' in capabilities:
if isinstance(capabilities['announce'], str):
if not capabilities['announce'].endswith('#Public'):
# TODO handle non-public announce permissions
print('CAPABILITIES: announce ' + capabilities['announce'])
return True
else:
# capabilities exist but with no announce defined
return True
# emojis
disallow_strings = (
':boost_no:',
':noboost:',
':noboosts:',
':no_boost:',
':no_boosts:',
':boosts_no:',
'dont_repeat',
'dont_announce',
2022-06-03 09:28:34 +00:00
'dont_boost',
'do not boost',
2022-06-14 20:38:56 +00:00
"don't boost",
'boost_denied',
'boosts_denied',
2022-09-11 18:09:32 +00:00
'boostdenied',
'boostsdenied'
)
2022-09-11 18:09:32 +00:00
content_lower = content.lower()
for diss in disallow_strings:
2022-09-11 18:09:32 +00:00
if diss in content_lower:
return True
# check for attached images without descriptions
if isinstance(attachment, list):
for item in attachment:
2022-12-29 11:55:46 +00:00
if not isinstance(item, dict):
continue
if not item.get('mediaType'):
continue
if not item.get('url'):
continue
if not item['mediaType'].startswith('image/'):
continue
if not item.get('name'):
# no image description
return True
image_description = item['name']
if not isinstance(image_description, str):
continue
if len(image_description) < 5:
# not enough description
return True
return False
def disallow_reply(content: str) -> bool:
"""Are replies not allowed for the given post?
"""
disallow_strings = (
':reply_no:',
':noreply:',
':noreplies:',
':no_reply:',
':no_replies:',
2024-04-03 09:42:58 +00:00
':no_responses:',
':replies_no:',
2022-06-03 09:28:34 +00:00
'dont_at_me',
'do not reply',
2022-06-14 20:38:56 +00:00
"don't reply",
2022-07-13 20:50:22 +00:00
"don't @ me",
2022-09-11 18:09:32 +00:00
'dont@me',
2024-04-03 09:36:29 +00:00
'dontatme',
'noresponses'
)
2022-09-11 18:09:32 +00:00
content_lower = content.lower()
for diss in disallow_strings:
2022-09-11 18:09:32 +00:00
if diss in content_lower:
return True
return False
def get_attachment_property_value(property_value: {}) -> (str, str):
"""Returns the fieldname and value for an attachment property
"""
prop_value = None
prop_value_name = None
if property_value.get('value'):
prop_value = property_value['value']
prop_value_name = 'value'
elif property_value.get('http://schema.org#value'):
prop_value_name = 'http://schema.org#value'
2022-05-11 19:05:21 +00:00
prop_value = property_value[prop_value_name]
elif property_value.get('https://schema.org#value'):
prop_value_name = 'https://schema.org#value'
2022-05-11 19:05:21 +00:00
prop_value = property_value[prop_value_name]
2023-07-09 10:41:51 +00:00
elif property_value.get('href'):
prop_value_name = 'href'
prop_value = property_value[prop_value_name]
return prop_value_name, prop_value
def safe_system_string(text: str) -> str:
"""Returns a safe version of a string which can be used within a
system command
"""
text = text.replace('$(', '(').replace('`', '')
return text
2022-09-13 09:27:43 +00:00
def get_json_content_from_accept(accept: str) -> str:
"""returns the json content type for the given accept
"""
protocol_str = 'application/json'
if accept:
if 'application/ld+json' in accept:
protocol_str = 'application/ld+json'
return protocol_str
2022-09-25 17:26:11 +00:00
def remove_inverted_text(text: str, system_language: str) -> str:
"""Removes any inverted text from the given string
"""
if system_language != 'en':
return text
2024-02-02 13:03:50 +00:00
text = uninvert_text(text)
2022-09-25 20:12:10 +00:00
inverted_lower = [*"_ʎ_ʍʌ_ʇ_ɹ____ɯʃʞɾıɥƃɟǝ_ɔ_ɐ"]
inverted_upper = [*"_⅄__ᴧ∩⊥_ᴚΌԀ_ᴎ_⅂⋊ſ__⅁ℲƎ◖Ↄ𐐒∀"]
2022-09-25 19:47:15 +00:00
start_separator = ''
separator = '\n'
if '</p>' in text:
text = text.replace('<p>', '')
start_separator = '<p>'
separator = '</p>'
paragraphs = text.split(separator)
new_text = ''
2022-09-26 09:37:44 +00:00
inverted_list = (inverted_lower, inverted_upper)
z_value = (ord('z'), ord('Z'))
2022-09-25 19:47:15 +00:00
for para in paragraphs:
replaced_chars = 0
2022-09-26 09:37:44 +00:00
for idx in range(2):
index = 0
for test_ch in inverted_list[idx]:
if test_ch == '_':
index += 1
continue
if test_ch in para:
para = para.replace(test_ch, chr(z_value[idx] - index))
replaced_chars += 1
2022-09-25 19:47:15 +00:00
index += 1
if replaced_chars > 2:
para = para[::-1]
if para:
new_text += start_separator + para
if separator in text:
new_text += separator
return new_text
2022-10-05 17:55:24 +00:00
def remove_square_capitals(text: str, system_language: str) -> str:
"""Removes any square capital text from the given string
"""
if system_language != 'en':
return text
offset = ord('A')
start_value = ord('🅰')
end_value = start_value + 26
result = ''
for text_ch in text:
text_value = ord(text_ch)
if text_value < start_value or text_value > end_value:
result += text_ch
else:
result += chr(offset + text_value - start_value)
return result
2022-11-11 11:26:17 +00:00
def dont_speak_hashtags(content: str) -> str:
"""Ensure that hashtags aren't announced by screen readers
"""
if not content:
return content
return content.replace('>#<span',
2022-11-11 11:40:43 +00:00
'><span aria-hidden="true">#</span><span')
def load_min_images_for_accounts(base_dir: str) -> []:
"""Loads a list of nicknames for accounts where all images should
be minimized by default
"""
2024-12-23 18:23:47 +00:00
min_images_for_accounts: list[str] = []
2024-05-12 12:35:26 +00:00
dir_str = data_dir(base_dir)
for subdir, dirs, _ in os.walk(dir_str):
for account in dirs:
if not is_account_dir(account):
continue
filename = os.path.join(subdir, account) + '/.minimize_all_images'
if os.path.isfile(filename):
min_images_for_accounts.append(account.split('@')[0])
break
return min_images_for_accounts
def set_minimize_all_images(base_dir: str,
nickname: str, domain: str,
minimize: bool,
min_images_for_accounts: []) -> None:
"""Add of remove a file indicating that all images for an account
should be minimized by default
"""
filename = acct_dir(base_dir, nickname, domain) + '/.minimize_all_images'
if minimize:
if nickname not in min_images_for_accounts:
min_images_for_accounts.append(nickname)
if not os.path.isfile(filename):
try:
with open(filename, 'w+', encoding='utf-8') as fp_min:
fp_min.write('\n')
except OSError:
print('EX: unable to write ' + filename)
return
if nickname in min_images_for_accounts:
min_images_for_accounts.remove(nickname)
if os.path.isfile(filename):
try:
os.remove(filename)
except OSError:
print('EX: unable to delete ' + filename)
2022-11-26 15:39:36 +00:00
def load_reverse_timeline(base_dir: str) -> []:
"""Loads flags for each user indicating whether they prefer to
see reversed timelines
"""
2024-12-23 18:23:47 +00:00
reverse_sequence: list[str] = []
2024-05-12 12:35:26 +00:00
dir_str = data_dir(base_dir)
for _, dirs, _ in os.walk(dir_str):
2022-11-26 15:39:36 +00:00
for acct in dirs:
if not is_account_dir(acct):
continue
nickname = acct.split('@')[0]
domain = acct.split('@')[1]
reverse_filename = \
acct_dir(base_dir, nickname, domain) + '/.reverse_timeline'
if os.path.isfile(reverse_filename):
if nickname not in reverse_sequence:
reverse_sequence.append(nickname)
break
return reverse_sequence
def save_reverse_timeline(base_dir: str, reverse_sequence: []) -> []:
"""Saves flags for each user indicating whether they prefer to
see reversed timelines
"""
2024-05-12 12:35:26 +00:00
dir_str = data_dir(base_dir)
for _, dirs, _ in os.walk(dir_str):
for acct in dirs:
if not is_account_dir(acct):
continue
nickname = acct.split('@')[0]
domain = acct.split('@')[1]
reverse_filename = \
acct_dir(base_dir, nickname, domain) + '/.reverse_timeline'
if nickname in reverse_sequence:
if not os.path.isfile(reverse_filename):
try:
with open(reverse_filename, 'w+',
encoding='utf-8') as fp_reverse:
fp_reverse.write('\n')
except OSError:
print('EX: failed to save reverse ' + reverse_filename)
else:
if os.path.isfile(reverse_filename):
try:
os.remove(reverse_filename)
except OSError:
print('EX: failed to delete reverse ' +
reverse_filename)
break
2022-12-24 15:33:26 +00:00
2024-04-20 11:07:45 +00:00
def get_quote_toot_url(post_json_object: str) -> str:
""" Returns the url for a quote toot
2024-04-20 11:07:45 +00:00
"""
# adhoc quote toot implementations
object_quote_url_fields = ('quoteUri', 'quoteUrl', 'quoteReply',
'toot:quoteReply', '_misskey_quote')
for fieldname in object_quote_url_fields:
2024-08-05 10:04:05 +00:00
if not post_json_object['object'].get(fieldname):
continue
quote_url = post_json_object['object'][fieldname]
if isinstance(quote_url, str):
if resembles_url(quote_url):
return remove_html(quote_url)
2024-04-20 11:07:45 +00:00
2023-01-08 13:16:54 +00:00
# More correct ActivityPub implementation - adding a Link tag
2024-08-05 09:38:09 +00:00
if not post_json_object['object'].get('tag'):
return ''
if not isinstance(post_json_object['object']['tag'], list):
return ''
for item in post_json_object['object']['tag']:
if not isinstance(item, dict):
continue
if item.get('rel'):
mk_quote = False
if isinstance(item['rel'], list):
for rel_str in item['rel']:
if not isinstance(rel_str, str):
continue
if '_misskey_quote' in rel_str:
mk_quote = True
elif isinstance(item['rel'], str):
if '_misskey_quote' in item['rel']:
mk_quote = True
if mk_quote and item.get('href'):
if isinstance(item['href'], str):
if resembles_url(item['href']):
return remove_html(item['href'])
if not item.get('type'):
continue
if not item.get('mediaType'):
continue
if not isinstance(item['type'], str):
continue
if item['type'] != 'Link':
continue
if not isinstance(item['mediaType'], str):
continue
if 'json' not in item['mediaType']:
continue
if item.get('href'):
if isinstance(item['href'], str):
if resembles_url(item['href']):
return remove_html(item['href'])
2024-04-20 11:07:45 +00:00
return ''
2024-04-20 09:59:56 +00:00
def quote_toots_allowed(base_dir: str, nickname: str, domain: str,
sender_nickname: str, sender_domain: str) -> bool:
""" Returns true if quote toots are allowed by the given account
for the given sender
"""
account_dir = acct_dir(base_dir, nickname, domain)
quotes_enabled_filename = account_dir + '/.allowQuotes'
if os.path.isfile(quotes_enabled_filename):
# check blocks on individual sending accounts
quotes_blocked_filename = account_dir + '/quotesblocked.txt'
if sender_nickname is None:
return True
if os.path.isfile(quotes_blocked_filename):
sender_handle = sender_nickname + '@' + sender_domain
if text_in_file(sender_handle, quotes_blocked_filename, False):
2024-04-20 09:59:56 +00:00
# quote toots not permitted from this sender
return False
return True
return False
def license_link_from_name(license_name: str) -> str:
2022-12-27 21:30:20 +00:00
"""Returns the license link from its name
"""
if '://' in license_name:
return license_name
value_upper = license_name.upper()
2024-08-02 12:00:07 +00:00
cc_strings1 = ('CC-BY-SA-NC', 'CC-BY-NC-SA', 'CC BY SA NC', 'CC BY NC SA')
cc_strings2 = ('CC-BY-SA', 'CC-SA-BY', 'CC BY SA', 'CC SA BY')
if string_contains(value_upper, cc_strings1):
2022-12-27 21:30:20 +00:00
value = 'https://creativecommons.org/licenses/by-nc-sa/4.0'
2024-08-02 12:00:07 +00:00
elif string_contains(value_upper, cc_strings2):
2022-12-27 21:30:20 +00:00
value = 'https://creativecommons.org/licenses/by-sa/4.0'
elif 'CC-BY-NC' in value_upper or 'CC BY NC' in value_upper:
value = 'https://creativecommons.org/licenses/by-nc/4.0'
elif 'CC-BY-ND' in value_upper or 'CC BY ND' in value_upper:
value = 'https://creativecommons.org/licenses/by-nc-nd/4.0'
elif 'CC-BY' in value_upper or 'CC BY' in value_upper:
value = 'https://creativecommons.org/licenses/by/4.0'
elif 'GFDL' in value_upper or 'GNU FREE DOC' in value_upper:
value = 'https://www.gnu.org/licenses/fdl-1.3.html'
elif 'OPL' in value_upper or 'OPEN PUBLICATION LIC' in value_upper:
value = 'https://opencontent.org/openpub'
2023-01-16 17:52:54 +00:00
elif 'PDDL' in value_upper or 'OPEN DATA COMMONS PUBLIC' in value_upper:
value = 'https://opendatacommons.org/licenses/pddl'
elif 'ODBL' in value_upper or 'OPEN DATA COMMONS OPEN' in value_upper:
value = 'https://opendatacommons.org/licenses/odbl'
elif 'ODC' in value_upper or 'OPEN DATA COMMONS ATTR' in value_upper:
value = 'https://opendatacommons.org/licenses/by'
elif 'OGL' in value_upper or 'OPEN GOVERNMENT LIC' in value_upper:
value = \
'https://www.nationalarchives.gov.uk/doc/open-government-licence'
2022-12-27 21:30:20 +00:00
elif 'PDL' in value_upper or \
'PUBLIC DOCUMENTATION LIC' in value_upper:
value = 'http://www.openoffice.org/licenses/PDL.html'
elif 'FREEBSD' in value_upper:
value = 'https://www.freebsd.org/copyright/freebsd-doc-license'
elif 'WTF' in value_upper:
value = 'http://www.wtfpl.net/txt/copying'
elif 'UNLICENSE' in value_upper:
value = 'https://unlicense.org'
else:
value = 'https://creativecommons.org/publicdomain/zero/1.0'
return value
2023-01-02 09:55:41 +00:00
2023-01-02 11:41:48 +00:00
def _get_escaped_chars() -> {}:
"""Returns escaped characters
2023-01-02 09:55:41 +00:00
"""
2023-01-02 11:41:48 +00:00
return {
2023-01-02 09:55:41 +00:00
"&": "&amp;",
"<": "&lt;",
">": "&gt;",
'"': "&quot;",
"'": "&apos;"
}
2023-01-02 11:41:48 +00:00
def escape_text(txt: str) -> str:
"""Escape text for inclusion in xml/rss
"""
for orig, replacement in _get_escaped_chars().items():
2023-01-02 09:55:41 +00:00
txt = txt.replace(orig, replacement)
return txt
2023-01-02 10:24:35 +00:00
def unescaped_text(txt: str) -> str:
"""Escape text for inclusion in xml/rss
"""
2023-01-02 11:41:48 +00:00
for orig, replacement in _get_escaped_chars().items():
2023-01-02 10:24:35 +00:00
txt = txt.replace(replacement, orig)
return txt
2023-01-07 11:45:19 +00:00
2024-07-09 10:36:23 +00:00
def valid_content_warning(summary: str) -> str:
"""Returns a validated content warning
"""
cw_str = remove_html(summary)
# hashtags within content warnings apparently cause a lot of trouble
# so remove them
if '#' in cw_str:
cw_str = cw_str.replace('#', '').replace(' ', ' ')
return remove_invalid_chars(cw_str)
2023-01-07 11:45:19 +00:00
def harmless_markup(post_json_object: {}) -> None:
"""render harmless any dangerous markup
"""
2024-06-22 13:58:55 +00:00
if not isinstance(post_json_object['object'], dict):
return
2024-06-21 20:59:12 +00:00
remove_trash = [' id="wordads-inline-marker"']
2024-06-22 13:58:55 +00:00
for field_name in ('content', 'summary'):
2023-01-07 11:45:19 +00:00
if post_json_object['object'].get(field_name):
2024-07-09 10:36:23 +00:00
# tidy up content warnings
if field_name == 'summary':
summary = post_json_object['object'][field_name]
post_json_object['object'][field_name] = \
valid_content_warning(summary)
text = post_json_object['object'][field_name]
# take out the trash
for trash in remove_trash:
if trash in text:
post_json_object['object'][field_name] = \
text.replace(trash, '')
# remove things which would cause display issues
if dangerous_markup(text, False, ['pre']):
post_json_object['object'][field_name] = remove_html(text)
post_json_object['object'][field_name] = \
remove_markup_tag(text, 'pre')
2024-06-21 20:59:12 +00:00
2023-01-07 11:45:19 +00:00
map_name = field_name + 'Map'
if post_json_object['object'].get(map_name):
2023-10-11 21:16:27 +00:00
if isinstance(post_json_object['object'][map_name], dict):
map_dict = post_json_object['object'][map_name].items()
for lang, content in map_dict:
if not isinstance(content, str):
continue
# tidy up language mapped content warnings
if field_name == 'summary':
post_json_object['object'][map_name][lang] = \
valid_content_warning(content)
content = post_json_object['object'][map_name][lang]
# take out the trash
for trash in remove_trash:
2024-08-20 10:42:34 +00:00
if trash in content:
post_json_object['object'][map_name][lang] = \
2024-08-20 10:42:34 +00:00
content.replace(trash, '')
# remove things which would cause display issues
2023-10-11 21:16:27 +00:00
if dangerous_markup(content, False, ['pre']):
content = remove_html(content)
post_json_object['object'][map_name][lang] = \
content
content = post_json_object['object'][map_name][lang]
2023-01-07 11:45:19 +00:00
post_json_object['object'][map_name][lang] = \
2023-10-11 21:16:27 +00:00
remove_markup_tag(content, 'pre')
else:
2023-10-11 21:20:15 +00:00
print('WARN: harmless_markup unknown Map ' + map_name + ' ' +
2023-10-11 21:16:27 +00:00
str(post_json_object['object'][map_name]))
2023-05-12 13:47:28 +00:00
2023-05-12 14:39:26 +00:00
def ap_proxy_type(json_object: {}) -> str:
"""Returns a string indicating the proxy for an activitypub post
2023-05-12 13:47:28 +00:00
or None if not proxied
2023-05-12 17:13:33 +00:00
See https://codeberg.org/fediverse/fep/src/branch/main/feps/fep-fffd.md
2023-05-12 13:47:28 +00:00
"""
2023-05-12 14:39:26 +00:00
if not json_object.get('proxyOf'):
2023-05-12 13:47:28 +00:00
return None
2023-05-12 14:39:26 +00:00
if not isinstance(json_object['proxyOf'], list):
2023-05-12 13:47:28 +00:00
return None
2023-05-12 14:39:26 +00:00
for proxy_dict in json_object['proxyOf']:
2023-05-12 13:47:28 +00:00
if proxy_dict.get('protocol'):
if isinstance(proxy_dict['protocol'], str):
return proxy_dict['protocol']
return None
def language_right_to_left(language: str) -> bool:
"""is the given language written from right to left?
"""
2023-09-10 15:33:09 +00:00
rtl_languages = ('ar', 'fa', 'he', 'yi')
if language in rtl_languages:
return True
return False
def binary_is_image(filename: str, media_binary) -> bool:
"""Returns true if the given file binary data contains an image
"""
if len(media_binary) < 13:
return False
filename_lower = filename.lower()
bin_is_image = False
if filename_lower.endswith('.jpeg') or filename_lower.endswith('jpg'):
if media_binary[6:10] in (b'JFIF', b'Exif'):
bin_is_image = True
elif filename_lower.endswith('.ico'):
if media_binary.startswith(b'\x00\x00\x01\x00'):
bin_is_image = True
elif filename_lower.endswith('.png'):
if media_binary.startswith(b'\211PNG\r\n\032\n'):
bin_is_image = True
elif filename_lower.endswith('.webp'):
if media_binary.startswith(b'RIFF') and media_binary[8:12] == b'WEBP':
bin_is_image = True
elif filename_lower.endswith('.gif'):
if media_binary[:6] in (b'GIF87a', b'GIF89a'):
bin_is_image = True
elif filename_lower.endswith('.avif'):
if media_binary[4:12] == b'ftypavif':
bin_is_image = True
elif filename_lower.endswith('.heic'):
if media_binary[4:12] == b'ftypmif1':
bin_is_image = True
elif filename_lower.endswith('.jxl'):
if media_binary.startswith(b'\xff\n'):
bin_is_image = True
elif filename_lower.endswith('.svg'):
if '<svg' in str(media_binary):
bin_is_image = True
return bin_is_image
def get_status_count(base_dir: str) -> int:
"""Get the total number of posts
"""
status_ctr = 0
2024-05-12 12:35:26 +00:00
accounts_dir = data_dir(base_dir)
for _, dirs, _ in os.walk(accounts_dir):
for acct in dirs:
if not is_account_dir(acct):
continue
account_dir = os.path.join(accounts_dir, acct + '/outbox')
for _, _, files2 in os.walk(account_dir):
status_ctr += len(files2)
break
break
return status_ctr
2023-10-02 20:29:30 +00:00
def lines_in_file(filename: str) -> int:
"""Returns the number of lines in a file
"""
if os.path.isfile(filename):
try:
with open(filename, 'r', encoding='utf-8') as fp_lines:
return len(fp_lines.read().split('\n'))
except OSError:
print('EX: lines_in_file error reading ' + filename)
return 0
2023-10-12 14:34:49 +00:00
def corp_servers() -> ():
"""Returns a list of despised corporate thieves
"""
return ('GitHub.com', 'github.com', 'cloudflare', 'microsoft.com',
2024-07-19 18:20:06 +00:00
'google.com', 'google.co.')
2023-10-29 22:00:04 +00:00
def get_media_url_from_video(post_json_object: {}) -> (str, str, str, str):
"""Within a Video post (eg peertube) return the media details
"""
media_type = None
media_url = None
media_torrent = None
media_magnet = None
2024-08-03 15:00:28 +00:00
if not post_json_object.get('url'):
return media_type, media_url, media_torrent, media_magnet
if not isinstance(post_json_object['url'], list):
return media_type, media_url, media_torrent, media_magnet
for media_link in post_json_object['url']:
if not isinstance(media_link, dict):
continue
if not media_link.get('mediaType'):
continue
if not media_link.get('href'):
continue
if media_link.get('tag'):
media_tags = media_link['tag']
if isinstance(media_tags, list):
for tag_link in media_tags:
if not isinstance(tag_link, dict):
continue
if not tag_link.get('mediaType'):
continue
if not tag_link.get('href'):
continue
if tag_link['mediaType'] == 'video/mp4' or \
tag_link['mediaType'] == 'video/ogv':
media_type = tag_link['mediaType']
media_url = remove_html(tag_link['href'])
break
if media_type and media_url:
2023-10-31 19:20:50 +00:00
continue
2024-08-03 15:00:28 +00:00
if media_link['mediaType'] == 'application/x-bittorrent':
media_torrent = remove_html(media_link['href'])
if media_link['href'].startswith('magnet:'):
media_magnet = remove_html(media_link['href'])
if media_link['mediaType'] != 'video/mp4' and \
media_link['mediaType'] != 'video/ogv':
continue
if not media_url:
media_type = media_link['mediaType']
media_url = remove_html(media_link['href'])
2023-10-29 22:00:04 +00:00
return media_type, media_url, media_torrent, media_magnet
def get_reply_to(post_json_object: {}) -> str:
"""Returns the reply to link from a post
"""
if post_json_object.get('inReplyTo'):
2024-11-27 15:31:10 +00:00
if not isinstance(post_json_object['inReplyTo'], str):
2024-11-28 20:40:15 +00:00
if isinstance(post_json_object['inReplyTo'], dict):
if post_json_object['inReplyTo'].get('id'):
reply_id = post_json_object['inReplyTo']['id']
if isinstance(reply_id, str):
return reply_id
2024-11-27 15:31:10 +00:00
print('WARN: inReplyTo is not a string ' +
str(post_json_object['inReplyTo']))
return ''
return post_json_object['inReplyTo']
if post_json_object.get('inReplyToBook'):
2024-11-27 15:31:10 +00:00
if not isinstance(post_json_object['inReplyToBook'], str):
2024-11-28 20:40:15 +00:00
if isinstance(post_json_object['inReplyToBook'], dict):
if post_json_object['inReplyToBook'].get('id'):
reply_id = post_json_object['inReplyToBook']['id']
if isinstance(reply_id, str):
return reply_id
2024-11-27 15:31:10 +00:00
print('WARN: inReplyToBook is not a string ' +
str(post_json_object['inReplyToBook']))
return ''
return post_json_object['inReplyToBook']
return ''
2024-01-07 22:19:29 +00:00
2024-01-27 17:04:21 +00:00
def resembles_url(text: str) -> bool:
"""Does the given text look like a url?
"""
2024-08-05 09:34:41 +00:00
if '://' in text and '.' in text and \
' ' not in text and '<' not in text:
2024-01-27 17:04:21 +00:00
return True
return False
2024-01-27 17:21:08 +00:00
def local_only_is_local(message_json: {}, domain_full: str) -> bool:
"""Returns True if the given json post is verified as local only
"""
2024-08-05 09:34:41 +00:00
if message_json['object']['localOnly'] is not True:
return True
# check that the to addresses are local
if isinstance(message_json['object']['to'], list):
for to_actor in message_json['object']['to']:
to_domain, to_port = \
get_domain_from_actor(to_actor)
if not to_domain:
continue
to_domain_full = \
get_full_domain(to_domain, to_port)
if domain_full != to_domain_full:
print("REJECT: inbox " +
"local only post isn't local " +
2024-01-27 17:21:08 +00:00
str(message_json))
return False
2024-08-05 09:34:41 +00:00
# check that the sender is local
attrib_field = message_json['object']['attributedTo']
local_actor = get_attributed_to(attrib_field)
local_domain, local_port = \
get_domain_from_actor(local_actor)
if local_domain:
local_domain_full = \
get_full_domain(local_domain, local_port)
if domain_full != local_domain_full:
print("REJECT: " +
"inbox local only post isn't local " +
str(message_json))
return False
2024-01-27 17:21:08 +00:00
return True
2024-01-27 17:35:04 +00:00
def post_summary_contains_links(message_json: {}) -> bool:
"""check if the json post summary contains links
"""
2024-08-05 09:32:40 +00:00
if not (message_json['object'].get('type') and
message_json['object'].get('summary')):
return False
if message_json['object']['type'] != 'Person' and \
message_json['object']['type'] != 'Application' and \
message_json['object']['type'] != 'Group':
if len(message_json['object']['summary']) > 1024:
actor_url = get_actor_from_post(message_json)
print('INBOX: summary is too long ' +
actor_url + ' ' +
message_json['object']['summary'])
return True
if '://' in message_json['object']['summary']:
actor_url = get_actor_from_post(message_json)
print('INBOX: summary should not contain links ' +
actor_url + ' ' +
message_json['object']['summary'])
return True
else:
if len(message_json['object']['summary']) > 4096:
actor_url = get_actor_from_post(message_json)
print('INBOX: person summary is too long ' +
actor_url + ' ' +
message_json['object']['summary'])
return True
2024-01-27 17:35:04 +00:00
return False
2024-01-28 19:11:55 +00:00
def convert_domains(calling_domain: str, referer_domain: str,
msg_str: str, http_prefix: str,
domain: str,
onion_domain: str,
i2p_domain: str) -> str:
"""Convert domains to onion or i2p, depending upon who is asking
"""
curr_http_prefix = http_prefix + '://'
if _is_onion_request(calling_domain, referer_domain,
domain,
onion_domain):
msg_str = msg_str.replace(curr_http_prefix +
domain,
'http://' +
onion_domain)
elif _is_i2p_request(calling_domain, referer_domain,
domain,
i2p_domain):
msg_str = msg_str.replace(curr_http_prefix +
domain,
'http://' +
i2p_domain)
return msg_str
2024-01-29 21:05:16 +00:00
def get_instance_url(calling_domain: str,
http_prefix: str,
domain_full: str,
onion_domain: str,
i2p_domain: str) -> str:
"""Returns the URL for this instance
"""
if calling_domain.endswith('.onion') and \
onion_domain:
instance_url = 'http://' + onion_domain
elif (calling_domain.endswith('.i2p') and
i2p_domain):
instance_url = 'http://' + i2p_domain
else:
instance_url = \
http_prefix + '://' + domain_full
return instance_url
2024-03-01 17:10:04 +00:00
def check_bad_path(path: str):
"""for http GET or POST check that the path looks valid
"""
path_lower = path.lower()
2024-08-03 18:33:04 +00:00
bad_strings = ('..', '/.', '%2e%2e', '%252e%252e')
2024-08-03 18:30:54 +00:00
# allow /.well-known/...
2024-08-05 12:22:30 +00:00
if '/.' in path_lower:
if path_lower.startswith('/.well-known/') or \
path_lower.startswith('/users/.well-known/'):
bad_strings = ('..', '%2e%2e', '%252e%252e')
2024-08-03 18:30:54 +00:00
2024-08-03 14:49:38 +00:00
if string_contains(path_lower, bad_strings):
2024-03-01 17:10:04 +00:00
print('WARN: bad path ' + path)
return True
return False
def set_premium_account(base_dir: str, nickname: str, domain: str,
flag_state: bool) -> bool:
""" Set or clear the premium account flag
"""
premium_filename = acct_dir(base_dir, nickname, domain) + '/.premium'
if os.path.isfile(premium_filename):
if not flag_state:
try:
os.remove(premium_filename)
except OSError:
print('EX: unable to remove premium flag ' + premium_filename)
else:
if flag_state:
try:
with open(premium_filename, 'w+',
encoding='utf-8') as fp_premium:
fp_premium.write('\n')
except OSError:
print('EX: unable to set premium flag ' + premium_filename)
def get_post_attachments(post_json_object: {}) -> []:
""" Returns the list of attachments for a post
"""
post_obj = post_json_object
if has_object_dict(post_json_object):
post_obj = post_json_object['object']
if not post_obj.get('attachment'):
return []
if isinstance(post_obj['attachment'], list):
return post_obj['attachment']
if isinstance(post_obj['attachment'], dict):
return [post_obj['attachment']]
return []
2024-04-10 09:51:43 +00:00
def string_ends_with(text: str, possible_endings: []) -> bool:
""" Does the given text end with at least one of the endings
"""
for ending in possible_endings:
if text.endswith(ending):
return True
return False
def string_contains(text: str, possible_substrings: []) -> bool:
""" Does the given text contain at least one of the possible substrings
"""
for substring in possible_substrings:
if substring in text:
return True
return False
2024-04-24 19:35:04 +00:00
def remove_link_tracking(url: str) -> str:
""" Removes any web link tracking, such as utm_medium, utm_campaign
or utm_source
"""
if '?utm_' not in url:
return url
return url.split('?utm_')[0]
def get_image_file(base_dir: str, name: str, directory: str,
theme: str) -> (str, str):
"""returns the filenames for an image with the given name
"""
banner_extensions = get_image_extensions()
banner_file = ''
banner_filename = ''
im_name = name
for ext in banner_extensions:
banner_file_test = im_name + '.' + ext
banner_filename_test = directory + '/' + banner_file_test
2024-08-04 19:10:20 +00:00
if not os.path.isfile(banner_filename_test):
continue
banner_file = banner_file_test
banner_filename = banner_filename_test
return banner_file, banner_filename
# if not found then use the default image
curr_theme = 'default'
if theme:
curr_theme = theme
directory = base_dir + '/theme/' + curr_theme
for ext in banner_extensions:
banner_file_test = name + '.' + ext
banner_filename_test = directory + '/' + banner_file_test
2024-08-04 19:10:20 +00:00
if not os.path.isfile(banner_filename_test):
continue
banner_file = name + '_' + curr_theme + '.' + ext
banner_filename = banner_filename_test
break
return banner_file, banner_filename
def get_watermark_file(base_dir: str,
nickname: str, domain: str) -> (str, str):
"""Gets the filename for watermarking when an image is attached to a post
"""
account_dir = acct_dir(base_dir, nickname, domain)
watermark_file, watermark_filename = \
get_image_file(base_dir, 'watermark_image', account_dir, '')
return watermark_file, watermark_filename
2024-08-08 17:23:33 +00:00
def replace_strings(text: str, replacements: {}) -> str:
"""Does a series of string replacements
"""
for orig_str, new_str in replacements.items():
text = text.replace(orig_str, new_str)
return text
def account_is_indexable(actor_json: {}) -> bool:
"""Returns true if the given actor is indexable
"""
if 'indexable' not in actor_json:
return False
if isinstance(actor_json['indexable'], bool):
return actor_json['indexable']
if isinstance(actor_json['indexable'], list):
if '#Public' in str(actor_json['indexable']):
return True
elif isinstance(actor_json['indexable'], str):
if '#Public' in actor_json['indexable']:
return True
return False
2024-10-12 12:09:23 +00:00
def load_searchable_by_default(base_dir: str) -> {}:
"""loads the searchable_by states for each account
"""
result = {}
dir_str = data_dir(base_dir)
for _, dirs, _ in os.walk(dir_str):
for account in dirs:
if not is_account_dir(account):
continue
nickname = account.split('@')[0]
filename = os.path.join(dir_str, account) + '/.searchableByDefault'
if os.path.isfile(filename):
try:
with open(filename, 'r', encoding='utf-8') as fp_search:
result[nickname] = fp_search.read().strip()
except OSError:
print('EX: unable to load searchableByDefault ' + filename)
break
return result
2024-10-12 12:31:49 +00:00
def set_searchable_by(base_dir: str, nickname: str, domain: str,
searchable_by: str) -> None:
"""Sets the searchable_by state for an account from the dropdown on
new post screen
"""
if not searchable_by:
return
filename = acct_dir(base_dir, nickname, domain) + '/.searchableByDefault'
# already the same state?
if os.path.isfile(filename):
if text_in_file(searchable_by, filename, True):
return
# write the new state
try:
with open(filename, 'w+', encoding='utf-8') as fp_search:
fp_search.write(searchable_by)
except OSError:
print('EX: unable to write searchableByDropdown ' + filename)
2024-11-04 21:05:12 +00:00
def browser_supports_download_filename(ua_str: str) -> bool:
"""Does the browser indicated by the user agent string support specifying
a default download filename?
https://developer.mozilla.org/en-US/docs/Web/HTML/Element/a#download
https://www.w3schools.com/howto/howto_html_download_link.asp
"""
if 'mozilla' in ua_str or 'firefox' in ua_str:
return True
return False
2024-12-17 13:50:48 +00:00
def detect_mitm(self) -> bool:
"""Detect if a request contains a MiTM
"""
2024-12-19 16:51:22 +00:00
mitm_domains = (
'cloudflare', 'radware', 'imperva', 'akamai', 'azure',
'fastly', 'google'
)
2024-12-17 13:50:48 +00:00
# look for domains within these headers
check_headers = (
'Server', 'Report-To', 'Report-to', 'report-to',
'Expect-CT', 'Expect-Ct', 'expect-ct'
)
for interloper in mitm_domains:
for header_name in check_headers:
if not self.headers.get(header_name):
continue
2024-12-19 16:03:31 +00:00
if interloper in str(self.headers[header_name]).lower():
2024-12-17 13:50:48 +00:00
print('MITM: ' + header_name + ' = ' +
str(self.headers[header_name]))
return True
# The presence of these headers on their own indicates a MiTM
mitm_headers = (
'CF-Connecting-IP', 'CF-RAY', 'CF-IPCountry', 'CF-Visitor',
'CDN-Loop', 'CF-Worker', 'CF-Cache-Status'
)
for header_name in mitm_headers:
if self.headers.get(header_name):
print('MITM: ' + header_name + ' = ' +
self.headers[header_name])
return True
if self.headers.get(header_name.lower()):
print('MITM: ' + header_name + ' = ' +
self.headers[header_name.lower()])
return True
return False
2024-12-17 15:23:07 +00:00
def load_mitm_servers(base_dir: str) -> []:
"""Loads a list of servers implementing insecure transport security
"""
mitm_servers_filename = data_dir(base_dir) + '/mitm_servers.txt'
2024-12-23 18:23:47 +00:00
mitm_servers: list[str] = []
2024-12-17 15:23:07 +00:00
if os.path.isfile(mitm_servers_filename):
try:
with open(mitm_servers_filename, 'r',
encoding='utf-8') as fp_mitm:
mitm_servers = fp_mitm.read()
except OSError:
print('EX: error while reading mitm_servers.txt')
if not mitm_servers:
2024-12-17 15:30:03 +00:00
return []
2024-12-17 15:23:07 +00:00
mitm_servers = mitm_servers.split('\n')
return mitm_servers
def save_mitm_servers(base_dir: str, mitm_servers: []) -> None:
"""Saves a list of servers implementing insecure transport security
"""
mitm_servers_str = ''
for domain in mitm_servers:
if domain:
mitm_servers_str += domain + '\n'
mitm_servers_filename = data_dir(base_dir) + '/mitm_servers.txt'
try:
with open(mitm_servers_filename, 'w+',
encoding='utf-8') as fp_mitm:
fp_mitm.write(mitm_servers_str)
except OSError:
print('EX: error while saving mitm_servers.txt')
2024-12-18 19:20:27 +00:00
def text_mode_removals(text: str, translate: {}) -> str:
"""Removes some elements of a post when displaying in a text mode browser
"""
text = text.replace(translate['SHOW MORE'], '')
text = text.replace(translate['mitm'], '👁 ')
return text