2020-04-04 13:44:49 +00:00
|
|
|
|
__filename__ = "utils.py"
|
|
|
|
|
__author__ = "Bob Mottram"
|
|
|
|
|
__license__ = "AGPL3+"
|
2024-12-22 23:37:30 +00:00
|
|
|
|
__version__ = "1.6.0"
|
2020-04-04 13:44:49 +00:00
|
|
|
|
__maintainer__ = "Bob Mottram"
|
2021-09-10 16:14:50 +00:00
|
|
|
|
__email__ = "bob@libreserver.org"
|
2020-04-04 13:44:49 +00:00
|
|
|
|
__status__ = "Production"
|
2021-06-26 11:16:41 +00:00
|
|
|
|
__module_group__ = "Core"
|
2024-05-12 18:32:50 +00:00
|
|
|
|
__accounts_data_path__ = None
|
|
|
|
|
__accounts_data_path_tests__ = False
|
2019-07-02 09:25:29 +00:00
|
|
|
|
|
|
|
|
|
import os
|
2021-03-03 20:34:55 +00:00
|
|
|
|
import re
|
2019-10-11 18:03:58 +00:00
|
|
|
|
import time
|
2019-09-29 18:48:34 +00:00
|
|
|
|
import shutil
|
2019-07-02 09:25:29 +00:00
|
|
|
|
import datetime
|
2019-11-23 10:20:30 +00:00
|
|
|
|
import json
|
2021-03-18 17:27:46 +00:00
|
|
|
|
import locale
|
2020-06-06 18:16:16 +00:00
|
|
|
|
from pprint import pprint
|
2023-11-01 20:38:04 +00:00
|
|
|
|
import idna
|
|
|
|
|
from dateutil.tz import tz
|
2021-02-04 18:18:31 +00:00
|
|
|
|
from cryptography.hazmat.backends import default_backend
|
|
|
|
|
from cryptography.hazmat.primitives import hashes
|
2021-12-27 16:18:52 +00:00
|
|
|
|
from followingCalendar import add_person_to_calendar
|
2021-02-04 18:18:31 +00:00
|
|
|
|
|
2022-01-13 15:10:41 +00:00
|
|
|
|
VALID_HASHTAG_CHARS = \
|
2022-03-02 15:13:59 +00:00
|
|
|
|
set('_0123456789' +
|
2022-01-13 15:10:41 +00:00
|
|
|
|
'abcdefghijklmnopqrstuvwxyz' +
|
|
|
|
|
'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +
|
|
|
|
|
'¡¿ÄäÀàÁáÂâÃãÅåǍǎĄąĂăÆæĀā' +
|
|
|
|
|
'ÇçĆćĈĉČčĎđĐďðÈèÉéÊêËëĚěĘęĖėĒē' +
|
|
|
|
|
'ĜĝĢģĞğĤĥÌìÍíÎîÏïıĪīĮįĴĵĶķ' +
|
|
|
|
|
'ĹĺĻļŁłĽľĿŀÑñŃńŇňŅņÖöÒòÓóÔôÕõŐőØøŒœ' +
|
|
|
|
|
'ŔŕŘřẞߌśŜŝŞşŠšȘșŤťŢţÞþȚțÜüÙùÚúÛûŰűŨũŲųŮůŪū' +
|
|
|
|
|
'ŴŵÝýŸÿŶŷŹźŽžŻż')
|
|
|
|
|
|
2021-02-11 10:33:56 +00:00
|
|
|
|
# posts containing these strings will always get screened out,
|
|
|
|
|
# both incoming and outgoing.
|
|
|
|
|
# Could include dubious clacks or admin dogwhistles
|
2021-12-26 10:11:18 +00:00
|
|
|
|
INVALID_CHARACTERS = (
|
2023-01-01 22:28:13 +00:00
|
|
|
|
'卐', '卍', '࿕', '࿖', '࿗', '࿘', 'ϟϟ', '🏳️🌈🚫', '⚡⚡', ''
|
2021-02-11 10:33:56 +00:00
|
|
|
|
)
|
|
|
|
|
|
2022-12-26 10:49:41 +00:00
|
|
|
|
INVALID_ACTOR_URL_CHARACTERS = (
|
|
|
|
|
'
', '', '<', '>', '%', '{', '}', '|', '\\', '^', '`',
|
2022-12-26 15:41:21 +00:00
|
|
|
|
'?', '#', '[', ']', '!', '$', '&', "'", '(', ')', '*',
|
|
|
|
|
'+', ',', ';', '='
|
2022-12-26 10:49:41 +00:00
|
|
|
|
)
|
|
|
|
|
|
2021-02-04 18:18:31 +00:00
|
|
|
|
|
2024-09-13 13:58:14 +00:00
|
|
|
|
def is_account_dir(dir_name: str) -> bool:
|
|
|
|
|
"""Is the given directory an account within /accounts ?
|
|
|
|
|
"""
|
|
|
|
|
if '@' not in dir_name:
|
|
|
|
|
return False
|
|
|
|
|
if 'inbox@' in dir_name or 'news@' in dir_name or 'Actor@' in dir_name:
|
|
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
2023-11-29 11:37:44 +00:00
|
|
|
|
def remove_zero_length_strings(text: str) -> str:
|
|
|
|
|
"""removes zero length strings from text
|
|
|
|
|
"""
|
|
|
|
|
return text.replace('', '')
|
|
|
|
|
|
|
|
|
|
|
2023-11-20 22:27:58 +00:00
|
|
|
|
def _utc_mktime(utc_tuple):
|
|
|
|
|
"""Returns number of seconds elapsed since epoch
|
|
|
|
|
Note that no timezone are taken into consideration.
|
|
|
|
|
utc tuple must be: (year, month, day, hour, minute, second)
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
if len(utc_tuple) == 6:
|
|
|
|
|
utc_tuple += (0, 0, 0)
|
|
|
|
|
return time.mktime(utc_tuple) - time.mktime((1970, 1, 1, 0, 0, 0, 0, 0, 0))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _datetime_to_timestamp(dtime):
|
|
|
|
|
"""Converts a datetime object to UTC timestamp"""
|
|
|
|
|
return int(_utc_mktime(dtime.timetuple()))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def date_utcnow():
|
|
|
|
|
"""returns the time now
|
|
|
|
|
"""
|
|
|
|
|
return datetime.datetime.now(datetime.timezone.utc)
|
|
|
|
|
|
|
|
|
|
|
2023-11-21 11:15:44 +00:00
|
|
|
|
def date_from_numbers(year: int, month: int, day: int,
|
|
|
|
|
hour: int, mins: int):
|
2023-11-20 22:27:58 +00:00
|
|
|
|
"""returns an offset-aware datetime
|
|
|
|
|
"""
|
|
|
|
|
return datetime.datetime(year, month, day, hour, mins, 0,
|
|
|
|
|
tzinfo=datetime.timezone.utc)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def date_from_string_format(date_str: str, formats: []):
|
|
|
|
|
"""returns an offset-aware datetime from a string date
|
|
|
|
|
"""
|
|
|
|
|
if not formats:
|
|
|
|
|
formats = ("%a, %d %b %Y %H:%M:%S %Z",
|
|
|
|
|
"%a, %d %b %Y %H:%M:%S %z",
|
|
|
|
|
"%Y-%m-%dT%H:%M:%S%z")
|
|
|
|
|
dtime = None
|
|
|
|
|
for date_format in formats:
|
|
|
|
|
try:
|
|
|
|
|
dtime = \
|
|
|
|
|
datetime.datetime.strptime(date_str, date_format)
|
|
|
|
|
except BaseException:
|
|
|
|
|
continue
|
|
|
|
|
break
|
|
|
|
|
if not dtime:
|
|
|
|
|
return None
|
|
|
|
|
if not dtime.tzinfo:
|
|
|
|
|
dtime = dtime.replace(tzinfo=datetime.timezone.utc)
|
|
|
|
|
return dtime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def date_epoch():
|
|
|
|
|
"""returns an offset-aware version of epoch
|
|
|
|
|
"""
|
2023-11-21 11:15:44 +00:00
|
|
|
|
return date_from_numbers(1970, 1, 1, 0, 0)
|
2023-11-20 22:27:58 +00:00
|
|
|
|
|
|
|
|
|
|
2023-12-09 14:18:24 +00:00
|
|
|
|
def get_url_from_post(url_field) -> str:
|
|
|
|
|
"""Returns a url from a post object
|
|
|
|
|
"""
|
|
|
|
|
if isinstance(url_field, str):
|
|
|
|
|
return url_field
|
|
|
|
|
if isinstance(url_field, list):
|
|
|
|
|
for url_dict in url_field:
|
|
|
|
|
if not isinstance(url_dict, dict):
|
|
|
|
|
continue
|
|
|
|
|
if 'href' not in url_dict:
|
|
|
|
|
continue
|
|
|
|
|
if 'mediaType' not in url_dict:
|
|
|
|
|
continue
|
|
|
|
|
if not isinstance(url_dict['href'], str):
|
|
|
|
|
continue
|
|
|
|
|
if not isinstance(url_dict['mediaType'], str):
|
|
|
|
|
continue
|
|
|
|
|
if url_dict['mediaType'] != 'text/html':
|
|
|
|
|
continue
|
|
|
|
|
if '://' not in url_dict['href']:
|
|
|
|
|
continue
|
|
|
|
|
return url_dict['href']
|
|
|
|
|
return ''
|
|
|
|
|
|
|
|
|
|
|
2023-09-26 20:25:53 +00:00
|
|
|
|
def get_attributed_to(field) -> str:
|
|
|
|
|
"""Returns the actor
|
|
|
|
|
"""
|
|
|
|
|
if isinstance(field, str):
|
|
|
|
|
return field
|
2023-11-01 20:36:16 +00:00
|
|
|
|
if isinstance(field, list):
|
2023-10-29 14:35:46 +00:00
|
|
|
|
for attrib in field:
|
2024-07-18 11:56:09 +00:00
|
|
|
|
if not isinstance(attrib, dict):
|
|
|
|
|
continue
|
|
|
|
|
if not (attrib.get('type') and attrib.get('id')):
|
|
|
|
|
continue
|
|
|
|
|
if not (isinstance(attrib['type'], str) and
|
|
|
|
|
isinstance(attrib['id'], str)):
|
|
|
|
|
continue
|
|
|
|
|
if attrib['type'] == 'Person' and \
|
|
|
|
|
resembles_url(attrib['id']):
|
|
|
|
|
return attrib['id']
|
2023-09-26 20:25:53 +00:00
|
|
|
|
if isinstance(field[0], str):
|
|
|
|
|
return field[0]
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2024-02-02 12:04:09 +00:00
|
|
|
|
def uninvert_text(text: str) -> str:
|
|
|
|
|
"""uninverts inverted text
|
|
|
|
|
"""
|
|
|
|
|
if len(text) < 4:
|
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
flip_table = {
|
|
|
|
|
'\u0021': '\u00A1',
|
|
|
|
|
'\u0022': '\u201E',
|
|
|
|
|
'\u0026': '\u214B',
|
|
|
|
|
'\u002E': '\u02D9',
|
|
|
|
|
'\u0033': '\u0190',
|
|
|
|
|
'\u0034': '\u152D',
|
|
|
|
|
'\u0037': '\u2C62',
|
|
|
|
|
'\u003B': '\u061B',
|
|
|
|
|
'\u003F': '\u00BF',
|
|
|
|
|
'\u0041': '\u2200',
|
|
|
|
|
'\u0042': '\u10412',
|
|
|
|
|
'\u0043': '\u2183',
|
|
|
|
|
'\u0044': '\u25D6',
|
|
|
|
|
'\u0045': '\u018E',
|
|
|
|
|
'\u0046': '\u2132',
|
|
|
|
|
'\u0047': '\u2141',
|
|
|
|
|
'\u004A': '\u017F',
|
|
|
|
|
'\u004B': '\u22CA',
|
|
|
|
|
'\u004C': '\u2142',
|
|
|
|
|
'\u004D': '\u0057',
|
|
|
|
|
'\u004E': '\u1D0E',
|
|
|
|
|
'\u0050': '\u0500',
|
|
|
|
|
'\u0051': '\u038C',
|
|
|
|
|
'\u0052': '\u1D1A',
|
|
|
|
|
'\u0054': '\u22A5',
|
|
|
|
|
'\u0055': '\u2229',
|
|
|
|
|
'\u0056': '\u1D27',
|
|
|
|
|
'\u0059': '\u2144',
|
|
|
|
|
'\u005F': '\u203E',
|
|
|
|
|
'\u0061': '\u0250',
|
|
|
|
|
'\u0062': '\u0071',
|
|
|
|
|
'\u0063': '\u0254',
|
|
|
|
|
'\u0064': '\u0070',
|
|
|
|
|
'\u0065': '\u01DD',
|
|
|
|
|
'\u0066': '\u025F',
|
|
|
|
|
'\u0067': '\u0183',
|
|
|
|
|
'\u0068': '\u0265',
|
|
|
|
|
'\u0069': '\u0131',
|
|
|
|
|
'\u006A': '\u027E',
|
|
|
|
|
'\u006B': '\u029E',
|
|
|
|
|
'\u006C': '\u0283',
|
|
|
|
|
'\u006D': '\u026F',
|
|
|
|
|
'\u006E': '\u0075',
|
|
|
|
|
'\u0072': '\u0279',
|
|
|
|
|
'\u0074': '\u0287',
|
|
|
|
|
'\u0076': '\u028C',
|
|
|
|
|
'\u0077': '\u028D',
|
|
|
|
|
'\u0079': '\u028E',
|
|
|
|
|
'\u203F': '\u2040',
|
|
|
|
|
'\u2234': '\u2235'
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
matches = 0
|
|
|
|
|
possible_result = ''
|
|
|
|
|
for ch_test in text:
|
|
|
|
|
ch_result = ch_test
|
|
|
|
|
for ch1, ch_inv in flip_table.items():
|
|
|
|
|
if ch_test == ch_inv:
|
|
|
|
|
matches += 1
|
|
|
|
|
ch_result = ch1
|
|
|
|
|
break
|
|
|
|
|
possible_result = ch_result + possible_result
|
|
|
|
|
|
|
|
|
|
result = text
|
2024-04-27 16:28:33 +00:00
|
|
|
|
if matches > len(text)/2:
|
2024-02-02 13:29:13 +00:00
|
|
|
|
result = possible_result
|
2024-02-02 13:03:50 +00:00
|
|
|
|
new_result = ''
|
|
|
|
|
extra_replace = {
|
|
|
|
|
'[': ']',
|
|
|
|
|
']': '[',
|
|
|
|
|
'(': ')',
|
|
|
|
|
')': '(',
|
|
|
|
|
'<': '>',
|
2024-02-02 13:29:13 +00:00
|
|
|
|
'>': '<',
|
|
|
|
|
'9': '6',
|
|
|
|
|
'6': '9'
|
2024-02-02 13:03:50 +00:00
|
|
|
|
}
|
|
|
|
|
for ch1 in result:
|
|
|
|
|
ch_result = ch1
|
|
|
|
|
for ch2, rep in extra_replace.items():
|
|
|
|
|
if ch1 == ch2:
|
|
|
|
|
ch_result = rep
|
|
|
|
|
break
|
|
|
|
|
new_result += ch_result
|
|
|
|
|
result = new_result
|
2024-02-02 12:04:09 +00:00
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
2022-07-09 10:54:05 +00:00
|
|
|
|
def _standardize_text_range(text: str,
|
|
|
|
|
range_start: int, range_end: int,
|
|
|
|
|
offset: str) -> str:
|
|
|
|
|
"""Convert any fancy characters within the given range into ordinary ones
|
|
|
|
|
"""
|
|
|
|
|
offset = ord(offset)
|
|
|
|
|
ctr = 0
|
|
|
|
|
text = list(text)
|
|
|
|
|
while ctr < len(text):
|
|
|
|
|
val = ord(text[ctr])
|
|
|
|
|
if val in range(range_start, range_end):
|
|
|
|
|
text[ctr] = chr(val - range_start + offset)
|
|
|
|
|
ctr += 1
|
|
|
|
|
return "".join(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def standardize_text(text: str) -> str:
|
|
|
|
|
"""Converts fancy unicode text to ordinary letters
|
|
|
|
|
"""
|
2022-07-09 22:56:33 +00:00
|
|
|
|
if not text:
|
|
|
|
|
return text
|
|
|
|
|
|
2022-07-09 11:03:12 +00:00
|
|
|
|
char_ranges = (
|
2022-07-09 11:09:29 +00:00
|
|
|
|
[65345, 'a'],
|
2022-07-09 11:03:12 +00:00
|
|
|
|
[119886, 'a'],
|
|
|
|
|
[119990, 'a'],
|
|
|
|
|
[120042, 'a'],
|
|
|
|
|
[120094, 'a'],
|
2022-07-09 11:09:29 +00:00
|
|
|
|
[120146, 'a'],
|
|
|
|
|
[120198, 'a'],
|
2022-07-09 11:24:11 +00:00
|
|
|
|
[120302, 'a'],
|
|
|
|
|
[120354, 'a'],
|
|
|
|
|
[120406, 'a'],
|
2022-07-09 11:03:12 +00:00
|
|
|
|
[65313, 'A'],
|
2022-07-09 11:09:29 +00:00
|
|
|
|
[119912, 'A'],
|
|
|
|
|
[119964, 'A'],
|
|
|
|
|
[120016, 'A'],
|
|
|
|
|
[120068, 'A'],
|
|
|
|
|
[120120, 'A'],
|
|
|
|
|
[120172, 'A'],
|
2022-07-09 11:24:11 +00:00
|
|
|
|
[120224, 'A'],
|
|
|
|
|
[120328, 'A'],
|
|
|
|
|
[120380, 'A'],
|
2023-07-19 09:14:36 +00:00
|
|
|
|
[120432, 'A'],
|
|
|
|
|
[127344, 'A'],
|
|
|
|
|
[127312, 'A'],
|
|
|
|
|
[127280, 'A'],
|
|
|
|
|
[127248, 'A']
|
2022-07-09 10:54:05 +00:00
|
|
|
|
)
|
2022-07-09 11:03:12 +00:00
|
|
|
|
for char_range in char_ranges:
|
|
|
|
|
range_start = char_range[0]
|
2022-07-09 10:54:05 +00:00
|
|
|
|
range_end = range_start + 26
|
2022-07-09 11:03:12 +00:00
|
|
|
|
offset = char_range[1]
|
|
|
|
|
text = _standardize_text_range(text, range_start, range_end, offset)
|
2022-07-09 10:54:05 +00:00
|
|
|
|
|
2024-02-02 12:04:09 +00:00
|
|
|
|
return uninvert_text(text)
|
2022-07-09 10:54:05 +00:00
|
|
|
|
|
|
|
|
|
|
2024-09-13 15:11:02 +00:00
|
|
|
|
def remove_eol(line: str) -> str:
|
2022-06-21 11:58:50 +00:00
|
|
|
|
"""Removes line ending characters
|
|
|
|
|
"""
|
2024-04-16 13:47:21 +00:00
|
|
|
|
return line.rstrip()
|
2022-06-21 11:58:50 +00:00
|
|
|
|
|
|
|
|
|
|
2022-06-10 13:01:39 +00:00
|
|
|
|
def text_in_file(text: str, filename: str,
|
|
|
|
|
case_sensitive: bool = True) -> bool:
|
2022-06-10 09:24:11 +00:00
|
|
|
|
"""is the given text in the given file?
|
|
|
|
|
"""
|
2022-06-10 11:43:33 +00:00
|
|
|
|
if not case_sensitive:
|
|
|
|
|
text = text.lower()
|
2024-07-16 12:20:58 +00:00
|
|
|
|
|
|
|
|
|
content = None
|
2022-06-10 09:24:11 +00:00
|
|
|
|
try:
|
2024-07-16 12:20:58 +00:00
|
|
|
|
with open(filename, 'r', encoding='utf-8') as fp_file:
|
|
|
|
|
content = fp_file.read()
|
2022-06-10 09:24:11 +00:00
|
|
|
|
except OSError:
|
2022-06-10 10:40:47 +00:00
|
|
|
|
print('EX: unable to find text in missing file ' + filename)
|
2024-07-16 12:20:58 +00:00
|
|
|
|
|
|
|
|
|
if content:
|
|
|
|
|
if not case_sensitive:
|
|
|
|
|
content = content.lower()
|
|
|
|
|
if text in content:
|
|
|
|
|
return True
|
2022-06-10 09:24:11 +00:00
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 10:19:59 +00:00
|
|
|
|
def local_actor_url(http_prefix: str, nickname: str, domain_full: str) -> str:
|
2021-08-14 11:13:39 +00:00
|
|
|
|
"""Returns the url for an actor on this instance
|
|
|
|
|
"""
|
2021-12-26 10:00:46 +00:00
|
|
|
|
return http_prefix + '://' + domain_full + '/users/' + nickname
|
2021-08-14 11:13:39 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 10:29:52 +00:00
|
|
|
|
def get_actor_languages_list(actor_json: {}) -> []:
|
2021-07-20 13:33:27 +00:00
|
|
|
|
"""Returns a list containing languages used by the given actor
|
|
|
|
|
"""
|
2021-12-26 10:29:52 +00:00
|
|
|
|
if not actor_json.get('attachment'):
|
2021-07-20 13:33:27 +00:00
|
|
|
|
return []
|
2021-12-26 10:32:45 +00:00
|
|
|
|
for property_value in actor_json['attachment']:
|
2022-05-11 16:10:38 +00:00
|
|
|
|
name_value = None
|
|
|
|
|
if property_value.get('name'):
|
|
|
|
|
name_value = property_value['name']
|
|
|
|
|
elif property_value.get('schema:name'):
|
|
|
|
|
name_value = property_value['schema:name']
|
|
|
|
|
if not name_value:
|
2021-07-20 13:33:27 +00:00
|
|
|
|
continue
|
2022-05-11 16:10:38 +00:00
|
|
|
|
if not name_value.lower().startswith('languages'):
|
2021-07-20 13:33:27 +00:00
|
|
|
|
continue
|
2021-12-26 10:32:45 +00:00
|
|
|
|
if not property_value.get('type'):
|
2021-07-20 13:33:27 +00:00
|
|
|
|
continue
|
2022-05-11 17:17:23 +00:00
|
|
|
|
prop_value_name, _ = \
|
|
|
|
|
get_attachment_property_value(property_value)
|
|
|
|
|
if not prop_value_name:
|
2021-07-20 13:33:27 +00:00
|
|
|
|
continue
|
2022-05-11 16:16:34 +00:00
|
|
|
|
if not property_value['type'].endswith('PropertyValue'):
|
2021-07-20 13:33:27 +00:00
|
|
|
|
continue
|
2022-05-11 17:17:23 +00:00
|
|
|
|
if isinstance(property_value[prop_value_name], list):
|
|
|
|
|
lang_list = property_value[prop_value_name]
|
2021-12-26 10:35:37 +00:00
|
|
|
|
lang_list.sort()
|
|
|
|
|
return lang_list
|
2022-05-11 17:17:23 +00:00
|
|
|
|
if isinstance(property_value[prop_value_name], str):
|
|
|
|
|
lang_str = property_value[prop_value_name]
|
2024-12-23 18:23:47 +00:00
|
|
|
|
lang_list_temp: list[str] = []
|
2021-12-26 10:38:02 +00:00
|
|
|
|
if ',' in lang_str:
|
2021-12-26 10:40:24 +00:00
|
|
|
|
lang_list_temp = lang_str.split(',')
|
2021-12-26 10:38:02 +00:00
|
|
|
|
elif ';' in lang_str:
|
2021-12-26 10:40:24 +00:00
|
|
|
|
lang_list_temp = lang_str.split(';')
|
2021-12-26 10:38:02 +00:00
|
|
|
|
elif '/' in lang_str:
|
2021-12-26 10:40:24 +00:00
|
|
|
|
lang_list_temp = lang_str.split('/')
|
2021-12-26 10:38:02 +00:00
|
|
|
|
elif '+' in lang_str:
|
2021-12-26 10:40:24 +00:00
|
|
|
|
lang_list_temp = lang_str.split('+')
|
2021-12-26 10:38:02 +00:00
|
|
|
|
elif ' ' in lang_str:
|
2021-12-26 10:40:24 +00:00
|
|
|
|
lang_list_temp = lang_str.split(' ')
|
2022-02-25 15:20:15 +00:00
|
|
|
|
else:
|
|
|
|
|
return [lang_str]
|
2024-12-23 18:23:47 +00:00
|
|
|
|
lang_list: list[str] = []
|
2021-12-26 10:40:24 +00:00
|
|
|
|
for lang in lang_list_temp:
|
2021-08-11 09:00:17 +00:00
|
|
|
|
lang = lang.strip()
|
2021-12-26 10:35:37 +00:00
|
|
|
|
if lang not in lang_list:
|
|
|
|
|
lang_list.append(lang)
|
|
|
|
|
lang_list.sort()
|
|
|
|
|
return lang_list
|
2021-07-20 13:33:27 +00:00
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
2022-05-01 13:23:32 +00:00
|
|
|
|
def has_object_dict(post_json_object: {}) -> bool:
|
|
|
|
|
"""Returns true if the given post has an object dict
|
|
|
|
|
"""
|
|
|
|
|
if post_json_object.get('object'):
|
|
|
|
|
if isinstance(post_json_object['object'], dict):
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2023-05-18 11:15:18 +00:00
|
|
|
|
def remove_markup_tag(html: str, tag: str) -> str:
|
|
|
|
|
"""Remove the given tag from the given html markup
|
|
|
|
|
"""
|
2024-02-15 17:29:16 +00:00
|
|
|
|
if '<' + tag not in html and \
|
|
|
|
|
'</' + tag not in html:
|
2023-05-18 11:15:18 +00:00
|
|
|
|
return html
|
|
|
|
|
|
|
|
|
|
section = html.split('<' + tag)
|
|
|
|
|
result = ''
|
|
|
|
|
for text in section:
|
|
|
|
|
if not result:
|
|
|
|
|
if html.startswith('<' + tag) and '>' in text:
|
|
|
|
|
result = text.split('>', 1)[1]
|
|
|
|
|
else:
|
|
|
|
|
result = text
|
|
|
|
|
continue
|
|
|
|
|
result += text.split('>', 1)[1]
|
|
|
|
|
|
|
|
|
|
html = result
|
|
|
|
|
section = html.split('</' + tag)
|
|
|
|
|
result = ''
|
|
|
|
|
for text in section:
|
|
|
|
|
if not result:
|
|
|
|
|
if html.startswith('</' + tag) and '>' in text:
|
|
|
|
|
result = text.split('>', 1)[1]
|
|
|
|
|
else:
|
|
|
|
|
result = text
|
|
|
|
|
continue
|
|
|
|
|
result += text.split('>', 1)[1]
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
2024-09-24 19:40:30 +00:00
|
|
|
|
def remove_header_tags(html: str) -> str:
|
|
|
|
|
"""Removes any header tags from the given html text
|
|
|
|
|
"""
|
|
|
|
|
header_tags = ('h1', 'h2', 'h3', 'h4', 'h5')
|
|
|
|
|
for tag_str in header_tags:
|
|
|
|
|
html = remove_markup_tag(html, tag_str)
|
|
|
|
|
return html
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 10:50:49 +00:00
|
|
|
|
def get_content_from_post(post_json_object: {}, system_language: str,
|
2022-01-28 10:07:35 +00:00
|
|
|
|
languages_understood: [],
|
2024-02-19 20:54:46 +00:00
|
|
|
|
content_type: str) -> str:
|
2021-07-18 11:48:29 +00:00
|
|
|
|
"""Returns the content from the post in the given language
|
2021-07-19 19:40:04 +00:00
|
|
|
|
including searching for a matching entry within contentMap
|
2021-07-18 11:48:29 +00:00
|
|
|
|
"""
|
2021-12-26 10:54:58 +00:00
|
|
|
|
this_post_json = post_json_object
|
2021-12-26 10:57:03 +00:00
|
|
|
|
if has_object_dict(post_json_object):
|
2021-12-26 10:54:58 +00:00
|
|
|
|
this_post_json = post_json_object['object']
|
2023-03-20 14:50:19 +00:00
|
|
|
|
map_dict = content_type + 'Map'
|
2023-10-12 16:11:37 +00:00
|
|
|
|
has_contentmap_dict = False
|
|
|
|
|
if this_post_json.get(map_dict):
|
|
|
|
|
if isinstance(this_post_json[map_dict], dict):
|
|
|
|
|
has_contentmap_dict = True
|
2023-03-20 14:50:19 +00:00
|
|
|
|
if not this_post_json.get(content_type) and \
|
2023-10-12 16:11:37 +00:00
|
|
|
|
not has_contentmap_dict:
|
2021-07-18 11:48:29 +00:00
|
|
|
|
return ''
|
2021-07-20 12:28:56 +00:00
|
|
|
|
content = ''
|
2024-08-08 17:23:33 +00:00
|
|
|
|
replacements = {
|
|
|
|
|
'&': '&',
|
|
|
|
|
'<u>': '',
|
|
|
|
|
'</u>': ''
|
|
|
|
|
}
|
2023-10-12 16:11:37 +00:00
|
|
|
|
if has_contentmap_dict:
|
|
|
|
|
if this_post_json[map_dict].get(system_language):
|
|
|
|
|
sys_lang = this_post_json[map_dict][system_language]
|
|
|
|
|
if isinstance(sys_lang, str):
|
|
|
|
|
content = sys_lang
|
|
|
|
|
content = remove_markup_tag(content, 'pre')
|
2024-08-08 17:23:33 +00:00
|
|
|
|
content = replace_strings(content, replacements)
|
2023-10-12 16:11:37 +00:00
|
|
|
|
return standardize_text(content)
|
|
|
|
|
else:
|
|
|
|
|
# is there a contentMap/summaryMap entry for one of
|
|
|
|
|
# the understood languages?
|
|
|
|
|
for lang in languages_understood:
|
2024-07-18 12:40:00 +00:00
|
|
|
|
if not this_post_json[map_dict].get(lang):
|
|
|
|
|
continue
|
|
|
|
|
map_lang = this_post_json[map_dict][lang]
|
|
|
|
|
if not isinstance(map_lang, str):
|
|
|
|
|
continue
|
|
|
|
|
content = map_lang
|
|
|
|
|
content = remove_markup_tag(content, 'pre')
|
2024-08-08 17:23:33 +00:00
|
|
|
|
content = replace_strings(content, replacements)
|
2024-07-18 12:40:00 +00:00
|
|
|
|
return standardize_text(content)
|
2021-07-20 12:28:56 +00:00
|
|
|
|
else:
|
2022-06-10 13:01:39 +00:00
|
|
|
|
if isinstance(this_post_json[content_type], str):
|
|
|
|
|
content = this_post_json[content_type]
|
2024-08-08 17:23:33 +00:00
|
|
|
|
content = replace_strings(content, replacements)
|
2023-05-18 11:15:18 +00:00
|
|
|
|
content = remove_markup_tag(content, 'pre')
|
2022-07-09 10:54:05 +00:00
|
|
|
|
return standardize_text(content)
|
2021-07-18 11:48:29 +00:00
|
|
|
|
|
|
|
|
|
|
2022-12-16 21:43:34 +00:00
|
|
|
|
def get_language_from_post(post_json_object: {}, system_language: str,
|
|
|
|
|
languages_understood: [],
|
2024-02-19 20:52:37 +00:00
|
|
|
|
content_type: str) -> str:
|
2022-12-16 21:43:34 +00:00
|
|
|
|
"""Returns the content language from the post
|
|
|
|
|
including searching for a matching entry within contentMap
|
|
|
|
|
"""
|
|
|
|
|
this_post_json = post_json_object
|
|
|
|
|
if has_object_dict(post_json_object):
|
|
|
|
|
this_post_json = post_json_object['object']
|
|
|
|
|
if not this_post_json.get(content_type):
|
|
|
|
|
return system_language
|
|
|
|
|
map_dict = content_type + 'Map'
|
2024-08-05 10:04:05 +00:00
|
|
|
|
if not this_post_json.get(map_dict):
|
|
|
|
|
return system_language
|
|
|
|
|
if not isinstance(this_post_json[map_dict], dict):
|
|
|
|
|
return system_language
|
|
|
|
|
if this_post_json[map_dict].get(system_language):
|
|
|
|
|
sys_lang = this_post_json[map_dict][system_language]
|
|
|
|
|
if isinstance(sys_lang, str):
|
|
|
|
|
return system_language
|
|
|
|
|
else:
|
|
|
|
|
# is there a contentMap/summaryMap entry for one of
|
|
|
|
|
# the understood languages?
|
|
|
|
|
for lang in languages_understood:
|
|
|
|
|
if this_post_json[map_dict].get(lang):
|
|
|
|
|
return lang
|
2022-12-16 21:43:34 +00:00
|
|
|
|
return system_language
|
|
|
|
|
|
|
|
|
|
|
2022-06-02 18:07:07 +00:00
|
|
|
|
def get_media_descriptions_from_post(post_json_object: {}) -> str:
|
|
|
|
|
"""Returns all attached media descriptions as a single text.
|
|
|
|
|
This is used for filtering
|
|
|
|
|
"""
|
2024-03-29 14:52:14 +00:00
|
|
|
|
post_attachments = get_post_attachments(post_json_object)
|
|
|
|
|
if not post_attachments:
|
2023-11-29 10:48:38 +00:00
|
|
|
|
return ''
|
2022-06-02 18:07:07 +00:00
|
|
|
|
descriptions = ''
|
2024-03-29 14:52:14 +00:00
|
|
|
|
for attach in post_attachments:
|
2023-11-29 10:45:39 +00:00
|
|
|
|
if not isinstance(attach, dict):
|
2023-11-29 11:23:39 +00:00
|
|
|
|
print('WARN: attachment is not a dict ' + str(attach))
|
2023-11-29 10:45:39 +00:00
|
|
|
|
continue
|
2022-06-02 18:07:07 +00:00
|
|
|
|
if not attach.get('name'):
|
|
|
|
|
continue
|
|
|
|
|
descriptions += attach['name'] + ' '
|
|
|
|
|
if attach.get('url'):
|
2023-12-09 14:18:24 +00:00
|
|
|
|
descriptions += get_url_from_post(attach['url']) + ' '
|
2022-06-02 18:07:07 +00:00
|
|
|
|
return descriptions.strip()
|
|
|
|
|
|
|
|
|
|
|
2024-04-23 19:46:30 +00:00
|
|
|
|
def _valid_summary(possible_summary: str) -> bool:
|
|
|
|
|
"""Returns true if the given summary field is valid
|
|
|
|
|
"""
|
|
|
|
|
if not isinstance(possible_summary, str):
|
|
|
|
|
return False
|
|
|
|
|
if len(possible_summary) < 2:
|
|
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
2022-01-28 10:07:35 +00:00
|
|
|
|
def get_summary_from_post(post_json_object: {}, system_language: str,
|
|
|
|
|
languages_understood: []) -> str:
|
|
|
|
|
"""Returns the summary from the post in the given language
|
2024-11-11 10:07:51 +00:00
|
|
|
|
including searching for a matching entry within summaryMap.
|
2022-01-28 10:07:35 +00:00
|
|
|
|
"""
|
2024-11-11 11:29:15 +00:00
|
|
|
|
summary_str = \
|
|
|
|
|
get_content_from_post(post_json_object, system_language,
|
|
|
|
|
languages_understood, 'summary')
|
|
|
|
|
if not summary_str:
|
|
|
|
|
# Also try the "name" field if summary is not available.
|
|
|
|
|
# See https://codeberg.org/
|
|
|
|
|
# fediverse/fep/src/branch/main/fep/b2b8/fep-b2b8.md
|
|
|
|
|
obj = post_json_object
|
|
|
|
|
if has_object_dict(post_json_object):
|
|
|
|
|
obj = post_json_object['object']
|
|
|
|
|
if obj.get('type'):
|
|
|
|
|
if isinstance(obj['type'], str):
|
|
|
|
|
if obj['type'] == 'Article':
|
|
|
|
|
summary_str = \
|
|
|
|
|
get_content_from_post(post_json_object,
|
|
|
|
|
system_language,
|
|
|
|
|
languages_understood, 'name')
|
2024-04-21 09:18:30 +00:00
|
|
|
|
if summary_str:
|
|
|
|
|
summary_str = summary_str.strip()
|
2024-04-23 19:46:30 +00:00
|
|
|
|
if not _valid_summary(summary_str):
|
2024-04-21 09:18:30 +00:00
|
|
|
|
summary_str = ''
|
|
|
|
|
return summary_str
|
2022-01-28 10:07:35 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 11:29:40 +00:00
|
|
|
|
def get_base_content_from_post(post_json_object: {},
|
|
|
|
|
system_language: str) -> str:
|
2021-07-19 19:40:04 +00:00
|
|
|
|
"""Returns the content from the post in the given language
|
|
|
|
|
"""
|
2021-12-26 10:54:58 +00:00
|
|
|
|
this_post_json = post_json_object
|
2021-12-26 10:57:03 +00:00
|
|
|
|
if has_object_dict(post_json_object):
|
2021-12-26 10:54:58 +00:00
|
|
|
|
this_post_json = post_json_object['object']
|
2024-11-11 10:37:55 +00:00
|
|
|
|
if 'contentMap' in this_post_json:
|
|
|
|
|
if isinstance(this_post_json['contentMap'], dict):
|
|
|
|
|
if this_post_json['contentMap'].get(system_language):
|
|
|
|
|
return this_post_json['contentMap'][system_language]
|
2023-01-08 22:23:02 +00:00
|
|
|
|
if 'content' not in this_post_json:
|
2021-07-19 19:40:04 +00:00
|
|
|
|
return ''
|
2021-12-26 10:54:58 +00:00
|
|
|
|
return this_post_json['content']
|
2021-07-19 19:40:04 +00:00
|
|
|
|
|
|
|
|
|
|
2024-05-12 18:32:50 +00:00
|
|
|
|
def data_dir_testing(base_dir: str) -> None:
|
|
|
|
|
"""During unit tests __accounts_data_path__ should not be retained
|
|
|
|
|
"""
|
|
|
|
|
global __accounts_data_path__
|
|
|
|
|
global __accounts_data_path_tests__
|
|
|
|
|
__accounts_data_path_tests__ = True
|
|
|
|
|
__accounts_data_path__ = base_dir + '/accounts'
|
|
|
|
|
print('Data directory is in testing mode')
|
|
|
|
|
|
|
|
|
|
|
2024-05-12 19:38:38 +00:00
|
|
|
|
def set_accounts_data_dir(base_dir: str, accounts_data_path: str) -> None:
|
|
|
|
|
"""Sets the directory used to store instance accounts data
|
|
|
|
|
"""
|
|
|
|
|
if not accounts_data_path:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
accounts_data_path_filename = base_dir + '/data_path.txt'
|
|
|
|
|
if os.path.isfile(accounts_data_path_filename):
|
|
|
|
|
# read the existing path
|
|
|
|
|
path = None
|
|
|
|
|
try:
|
|
|
|
|
with open(accounts_data_path_filename, 'r',
|
2024-07-16 12:20:58 +00:00
|
|
|
|
encoding='utf-8') as fp_accounts:
|
|
|
|
|
path = fp_accounts.read()
|
2024-05-12 19:38:38 +00:00
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: unable to read ' + accounts_data_path_filename)
|
|
|
|
|
if path.strip() == accounts_data_path:
|
|
|
|
|
# path is already set, so avoid writing it again
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
with open(accounts_data_path_filename, 'w+',
|
2024-07-16 12:20:58 +00:00
|
|
|
|
encoding='utf-8') as fp_accounts:
|
|
|
|
|
fp_accounts.write(accounts_data_path)
|
2024-05-12 19:38:38 +00:00
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: unable to write ' + accounts_data_path_filename)
|
|
|
|
|
|
|
|
|
|
|
2024-05-12 18:32:50 +00:00
|
|
|
|
def data_dir(base_dir: str) -> str:
|
2024-05-12 12:35:26 +00:00
|
|
|
|
"""Returns the directory where account data is stored
|
|
|
|
|
"""
|
2024-05-12 18:32:50 +00:00
|
|
|
|
global __accounts_data_path__
|
|
|
|
|
global __accounts_data_path_tests__
|
|
|
|
|
if __accounts_data_path_tests__:
|
|
|
|
|
__accounts_data_path__ = base_dir + '/accounts'
|
|
|
|
|
return __accounts_data_path__
|
|
|
|
|
|
|
|
|
|
if not __accounts_data_path__:
|
|
|
|
|
# the default path for accounts data
|
|
|
|
|
__accounts_data_path__ = base_dir + '/accounts'
|
|
|
|
|
|
|
|
|
|
# is an alternative path set?
|
|
|
|
|
accounts_data_path_filename = base_dir + '/data_path.txt'
|
|
|
|
|
if os.path.isfile(accounts_data_path_filename):
|
|
|
|
|
path = None
|
|
|
|
|
try:
|
|
|
|
|
with open(accounts_data_path_filename, 'r',
|
2024-07-16 12:20:58 +00:00
|
|
|
|
encoding='utf-8') as fp_accounts:
|
|
|
|
|
path = fp_accounts.read()
|
2024-05-12 18:32:50 +00:00
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: unable to read ' + accounts_data_path_filename)
|
|
|
|
|
if path:
|
|
|
|
|
__accounts_data_path__ = path.strip()
|
|
|
|
|
print('Accounts data path set to ' + __accounts_data_path__)
|
|
|
|
|
|
|
|
|
|
return __accounts_data_path__
|
2024-05-12 12:35:26 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 12:02:29 +00:00
|
|
|
|
def acct_dir(base_dir: str, nickname: str, domain: str) -> str:
|
2024-05-12 12:35:26 +00:00
|
|
|
|
"""Returns the directory for an account on this instance
|
|
|
|
|
"""
|
|
|
|
|
return data_dir(base_dir) + '/' + nickname + '@' + domain
|
2021-07-13 21:59:53 +00:00
|
|
|
|
|
|
|
|
|
|
2022-12-18 15:29:54 +00:00
|
|
|
|
def acct_handle_dir(base_dir: str, handle: str) -> str:
|
2024-05-12 12:35:26 +00:00
|
|
|
|
"""Returns the directory for an account on this instance
|
|
|
|
|
"""
|
|
|
|
|
return data_dir(base_dir) + '/' + handle
|
2022-12-18 15:29:54 +00:00
|
|
|
|
|
|
|
|
|
|
2024-09-13 15:11:02 +00:00
|
|
|
|
def refresh_newswire(base_dir: str) -> None:
|
2021-02-13 10:01:04 +00:00
|
|
|
|
"""Causes the newswire to be updates after a change to user accounts
|
|
|
|
|
"""
|
2024-05-12 12:35:26 +00:00
|
|
|
|
refresh_newswire_filename = data_dir(base_dir) + '/.refresh_newswire'
|
2021-12-26 12:13:46 +00:00
|
|
|
|
if os.path.isfile(refresh_newswire_filename):
|
2021-02-13 10:01:04 +00:00
|
|
|
|
return
|
2024-02-01 13:30:59 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(refresh_newswire_filename, 'w+',
|
2024-07-14 11:09:24 +00:00
|
|
|
|
encoding='utf-8') as fp_refresh:
|
|
|
|
|
fp_refresh.write('\n')
|
2024-02-01 13:30:59 +00:00
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: refresh_newswire unable to write ' +
|
|
|
|
|
refresh_newswire_filename)
|
2021-02-13 10:01:04 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 12:13:46 +00:00
|
|
|
|
def get_sha_256(msg: str):
|
2021-02-04 18:18:31 +00:00
|
|
|
|
"""Returns a SHA256 hash of the given string
|
|
|
|
|
"""
|
|
|
|
|
digest = hashes.Hash(hashes.SHA256(), backend=default_backend())
|
|
|
|
|
digest.update(msg)
|
|
|
|
|
return digest.finalize()
|
2021-09-08 10:05:45 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 12:13:46 +00:00
|
|
|
|
def get_sha_512(msg: str):
|
2021-09-08 10:05:45 +00:00
|
|
|
|
"""Returns a SHA512 hash of the given string
|
|
|
|
|
"""
|
|
|
|
|
digest = hashes.Hash(hashes.SHA512(), backend=default_backend())
|
|
|
|
|
digest.update(msg)
|
|
|
|
|
return digest.finalize()
|
2019-07-02 09:25:29 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2022-01-30 17:21:28 +00:00
|
|
|
|
def local_network_host(host: str) -> bool:
|
2021-01-25 11:38:12 +00:00
|
|
|
|
"""Returns true if the given host is on the local network
|
|
|
|
|
"""
|
2021-01-25 11:51:42 +00:00
|
|
|
|
if host.startswith('localhost') or \
|
|
|
|
|
host.startswith('192.') or \
|
2021-01-25 11:38:12 +00:00
|
|
|
|
host.startswith('127.') or \
|
|
|
|
|
host.startswith('10.'):
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 12:21:31 +00:00
|
|
|
|
def decoded_host(host: str) -> str:
|
2021-01-25 11:38:12 +00:00
|
|
|
|
"""Convert hostname to internationalized domain
|
|
|
|
|
https://en.wikipedia.org/wiki/Internationalized_domain_name
|
|
|
|
|
"""
|
|
|
|
|
if ':' not in host:
|
2021-01-25 12:24:40 +00:00
|
|
|
|
# eg. mydomain:8000
|
2022-01-30 17:21:28 +00:00
|
|
|
|
if not local_network_host(host):
|
2021-01-25 12:24:40 +00:00
|
|
|
|
if not host.endswith('.onion'):
|
|
|
|
|
if not host.endswith('.i2p'):
|
|
|
|
|
return idna.decode(host)
|
2021-01-25 11:38:12 +00:00
|
|
|
|
return host
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 12:16:36 +00:00
|
|
|
|
def get_locked_account(actor_json: {}) -> bool:
|
2021-01-02 11:18:43 +00:00
|
|
|
|
"""Returns whether the given account requires follower approval
|
|
|
|
|
"""
|
2021-12-26 10:29:52 +00:00
|
|
|
|
if not actor_json.get('manuallyApprovesFollowers'):
|
2021-01-02 11:18:43 +00:00
|
|
|
|
return False
|
2021-12-26 10:29:52 +00:00
|
|
|
|
if actor_json['manuallyApprovesFollowers'] is True:
|
2021-01-02 11:18:43 +00:00
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 12:31:47 +00:00
|
|
|
|
def has_users_path(path_str: str) -> bool:
|
2020-12-23 10:57:44 +00:00
|
|
|
|
"""Whether there is a /users/ path (or equivalent) in the given string
|
|
|
|
|
"""
|
2024-09-28 16:48:09 +00:00
|
|
|
|
if not path_str:
|
|
|
|
|
return False
|
|
|
|
|
|
2021-12-26 12:31:47 +00:00
|
|
|
|
users_list = get_user_paths()
|
|
|
|
|
for users_str in users_list:
|
|
|
|
|
if users_str in path_str:
|
2020-12-23 10:57:44 +00:00
|
|
|
|
return True
|
2021-12-26 12:31:47 +00:00
|
|
|
|
if '://' in path_str:
|
|
|
|
|
domain = path_str.split('://')[1]
|
2021-06-03 18:30:48 +00:00
|
|
|
|
if '/' in domain:
|
|
|
|
|
domain = domain.split('/')[0]
|
2021-12-26 12:31:47 +00:00
|
|
|
|
if '://' + domain + '/' not in path_str:
|
2021-06-03 18:30:48 +00:00
|
|
|
|
return False
|
2021-12-26 12:31:47 +00:00
|
|
|
|
nickname = path_str.split('://' + domain + '/')[1]
|
2021-06-03 18:30:48 +00:00
|
|
|
|
if '/' in nickname or '.' in nickname:
|
|
|
|
|
return False
|
|
|
|
|
return True
|
2020-12-23 10:57:44 +00:00
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 12:37:53 +00:00
|
|
|
|
def valid_post_date(published: str, max_age_days: int, debug: bool) -> bool:
|
2020-12-21 12:11:45 +00:00
|
|
|
|
"""Returns true if the published date is recent and is not in the future
|
|
|
|
|
"""
|
2023-11-20 22:27:58 +00:00
|
|
|
|
baseline_time = date_epoch()
|
2020-12-21 12:11:45 +00:00
|
|
|
|
|
2023-11-20 22:27:58 +00:00
|
|
|
|
days_diff = date_utcnow() - baseline_time
|
2021-12-27 16:18:52 +00:00
|
|
|
|
now_days_since_epoch = days_diff.days
|
2020-12-21 12:11:45 +00:00
|
|
|
|
|
2023-11-20 22:27:58 +00:00
|
|
|
|
post_time_object = \
|
|
|
|
|
date_from_string_format(published, ["%Y-%m-%dT%H:%M:%S%z"])
|
|
|
|
|
if not post_time_object:
|
2021-10-29 14:33:52 +00:00
|
|
|
|
if debug:
|
2021-12-26 12:31:47 +00:00
|
|
|
|
print('EX: valid_post_date invalid published date ' +
|
|
|
|
|
str(published))
|
2021-01-09 10:23:05 +00:00
|
|
|
|
return False
|
|
|
|
|
|
2021-12-26 12:45:03 +00:00
|
|
|
|
days_diff = post_time_object - baseline_time
|
|
|
|
|
post_days_since_epoch = days_diff.days
|
2020-12-21 12:11:45 +00:00
|
|
|
|
|
2021-12-26 12:45:03 +00:00
|
|
|
|
if post_days_since_epoch > now_days_since_epoch:
|
2021-03-14 19:53:22 +00:00
|
|
|
|
if debug:
|
|
|
|
|
print("Inbox post has a published date in the future!")
|
2020-12-21 12:11:45 +00:00
|
|
|
|
return False
|
|
|
|
|
|
2021-12-26 12:45:03 +00:00
|
|
|
|
if now_days_since_epoch - post_days_since_epoch >= max_age_days:
|
2021-03-14 19:53:22 +00:00
|
|
|
|
if debug:
|
|
|
|
|
print("Inbox post is not recent enough")
|
2020-12-21 12:11:45 +00:00
|
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 12:45:03 +00:00
|
|
|
|
def get_full_domain(domain: str, port: int) -> str:
|
2020-12-16 10:30:54 +00:00
|
|
|
|
"""Returns the full domain name, including port number
|
|
|
|
|
"""
|
|
|
|
|
if not port:
|
|
|
|
|
return domain
|
|
|
|
|
if ':' in domain:
|
|
|
|
|
return domain
|
2021-12-27 16:18:52 +00:00
|
|
|
|
if port in (80, 443):
|
2020-12-16 10:30:54 +00:00
|
|
|
|
return domain
|
|
|
|
|
return domain + ':' + str(port)
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 14:20:09 +00:00
|
|
|
|
def get_video_extensions() -> []:
|
2021-08-03 09:09:04 +00:00
|
|
|
|
"""Returns a list of the possible video file extensions
|
|
|
|
|
"""
|
|
|
|
|
return ('mp4', 'webm', 'ogv')
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 14:24:03 +00:00
|
|
|
|
def get_audio_extensions() -> []:
|
2021-08-03 09:09:04 +00:00
|
|
|
|
"""Returns a list of the possible audio file extensions
|
|
|
|
|
"""
|
2022-10-31 11:05:11 +00:00
|
|
|
|
return ('mp3', 'ogg', 'flac', 'opus', 'spx', 'wav')
|
2021-08-03 09:09:04 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 14:26:16 +00:00
|
|
|
|
def get_image_extensions() -> []:
|
2020-11-21 11:21:05 +00:00
|
|
|
|
"""Returns a list of the possible image file extensions
|
|
|
|
|
"""
|
2022-10-31 17:26:31 +00:00
|
|
|
|
return ('jpg', 'jpeg', 'gif', 'webp', 'avif', 'heic',
|
|
|
|
|
'svg', 'ico', 'jxl', 'png')
|
2020-11-21 11:21:05 +00:00
|
|
|
|
|
|
|
|
|
|
2024-02-05 20:05:00 +00:00
|
|
|
|
def image_mime_types_dict() -> {}:
|
|
|
|
|
"""Returns a dict of image mime types
|
2021-07-09 20:53:49 +00:00
|
|
|
|
"""
|
2024-02-05 20:05:00 +00:00
|
|
|
|
return {
|
2021-07-09 20:53:49 +00:00
|
|
|
|
'png': 'png',
|
|
|
|
|
'jpg': 'jpeg',
|
2024-02-05 20:05:00 +00:00
|
|
|
|
'jpeg': 'jpeg',
|
2022-02-06 11:04:49 +00:00
|
|
|
|
'jxl': 'jxl',
|
2021-07-09 20:53:49 +00:00
|
|
|
|
'gif': 'gif',
|
|
|
|
|
'avif': 'avif',
|
2022-10-31 17:26:31 +00:00
|
|
|
|
'heic': 'heic',
|
2021-07-09 20:53:49 +00:00
|
|
|
|
'svg': 'svg+xml',
|
2021-12-17 12:01:54 +00:00
|
|
|
|
'webp': 'webp',
|
|
|
|
|
'ico': 'x-icon'
|
2021-07-09 20:53:49 +00:00
|
|
|
|
}
|
2024-02-05 20:05:00 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_image_mime_type(image_filename: str) -> str:
|
|
|
|
|
"""Returns the mime type for the given image filename
|
|
|
|
|
"""
|
|
|
|
|
extensions_to_mime = image_mime_types_dict()
|
2021-12-27 16:18:52 +00:00
|
|
|
|
for ext, mime_ext in extensions_to_mime.items():
|
2021-12-26 14:42:21 +00:00
|
|
|
|
if image_filename.endswith('.' + ext):
|
2021-12-26 15:44:28 +00:00
|
|
|
|
return 'image/' + mime_ext
|
2021-07-09 20:53:49 +00:00
|
|
|
|
return 'image/png'
|
|
|
|
|
|
|
|
|
|
|
2021-12-27 16:02:54 +00:00
|
|
|
|
def get_image_extension_from_mime_type(content_type: str) -> str:
|
2021-07-09 20:53:49 +00:00
|
|
|
|
"""Returns the image extension from a mime type, such as image/jpeg
|
|
|
|
|
"""
|
2021-12-26 15:23:01 +00:00
|
|
|
|
image_media = {
|
2021-07-09 20:53:49 +00:00
|
|
|
|
'png': 'png',
|
|
|
|
|
'jpeg': 'jpg',
|
2022-02-06 11:04:49 +00:00
|
|
|
|
'jxl': 'jxl',
|
2021-07-09 20:53:49 +00:00
|
|
|
|
'gif': 'gif',
|
|
|
|
|
'svg+xml': 'svg',
|
|
|
|
|
'webp': 'webp',
|
2021-12-17 12:01:54 +00:00
|
|
|
|
'avif': 'avif',
|
2022-10-31 17:26:31 +00:00
|
|
|
|
'heic': 'heic',
|
2021-12-17 12:01:54 +00:00
|
|
|
|
'x-icon': 'ico'
|
2021-07-09 20:53:49 +00:00
|
|
|
|
}
|
2021-12-27 16:02:54 +00:00
|
|
|
|
for mime_ext, ext in image_media.items():
|
|
|
|
|
if content_type.endswith(mime_ext):
|
2021-07-09 20:53:49 +00:00
|
|
|
|
return ext
|
|
|
|
|
return 'png'
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 14:39:49 +00:00
|
|
|
|
def get_media_extensions() -> []:
|
2020-11-21 11:54:29 +00:00
|
|
|
|
"""Returns a list of the possible media file extensions
|
|
|
|
|
"""
|
2021-12-26 14:26:16 +00:00
|
|
|
|
return get_image_extensions() + \
|
2021-12-26 14:24:03 +00:00
|
|
|
|
get_video_extensions() + get_audio_extensions()
|
2020-11-21 11:54:29 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 15:44:28 +00:00
|
|
|
|
def get_image_formats() -> str:
|
2020-11-21 11:21:05 +00:00
|
|
|
|
"""Returns a string of permissable image formats
|
|
|
|
|
used when selecting an image for a new post
|
|
|
|
|
"""
|
2021-12-27 16:02:54 +00:00
|
|
|
|
image_ext = get_image_extensions()
|
|
|
|
|
|
|
|
|
|
image_formats = ''
|
|
|
|
|
for ext in image_ext:
|
|
|
|
|
if image_formats:
|
|
|
|
|
image_formats += ', '
|
|
|
|
|
image_formats += '.' + ext
|
|
|
|
|
return image_formats
|
2020-11-21 11:21:05 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-27 15:56:15 +00:00
|
|
|
|
def get_media_formats() -> str:
|
2020-11-21 11:54:29 +00:00
|
|
|
|
"""Returns a string of permissable media formats
|
|
|
|
|
used when selecting an attachment for a new post
|
|
|
|
|
"""
|
2021-12-26 15:44:28 +00:00
|
|
|
|
media_ext = get_media_extensions()
|
2020-11-21 11:54:29 +00:00
|
|
|
|
|
2021-12-27 15:58:46 +00:00
|
|
|
|
media_formats = ''
|
2021-12-26 15:44:28 +00:00
|
|
|
|
for ext in media_ext:
|
2021-12-27 15:58:46 +00:00
|
|
|
|
if media_formats:
|
|
|
|
|
media_formats += ', '
|
|
|
|
|
media_formats += '.' + ext
|
|
|
|
|
return media_formats
|
2020-11-21 11:54:29 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-27 15:43:22 +00:00
|
|
|
|
def remove_html(content: str) -> str:
|
2020-10-25 12:47:16 +00:00
|
|
|
|
"""Removes html links from the given content.
|
|
|
|
|
Used to ensure that profile descriptions don't contain dubious content
|
|
|
|
|
"""
|
|
|
|
|
if '<' not in content:
|
|
|
|
|
return content
|
|
|
|
|
removing = False
|
2024-08-08 17:23:33 +00:00
|
|
|
|
replacements = {
|
|
|
|
|
'<a href': ' <a href',
|
|
|
|
|
'<q>': '"',
|
|
|
|
|
'</q>': '"',
|
|
|
|
|
'</p>': '\n\n',
|
|
|
|
|
'<br>': '\n'
|
|
|
|
|
}
|
|
|
|
|
content = replace_strings(content, replacements)
|
2020-10-25 12:47:16 +00:00
|
|
|
|
result = ''
|
2021-12-27 17:16:57 +00:00
|
|
|
|
for char in content:
|
|
|
|
|
if char == '<':
|
2020-10-25 12:47:16 +00:00
|
|
|
|
removing = True
|
2021-12-27 17:16:57 +00:00
|
|
|
|
elif char == '>':
|
2020-10-25 12:47:16 +00:00
|
|
|
|
removing = False
|
|
|
|
|
elif not removing:
|
2021-12-27 17:16:57 +00:00
|
|
|
|
result += char
|
2021-03-23 11:22:09 +00:00
|
|
|
|
|
2021-12-27 15:56:15 +00:00
|
|
|
|
plain_text = result.replace(' ', ' ')
|
2021-03-23 10:38:03 +00:00
|
|
|
|
|
|
|
|
|
# insert spaces after full stops
|
2021-12-27 17:16:57 +00:00
|
|
|
|
str_len = len(plain_text)
|
2021-03-23 10:38:03 +00:00
|
|
|
|
result = ''
|
2021-12-27 17:16:57 +00:00
|
|
|
|
for i in range(str_len):
|
2021-12-27 15:56:15 +00:00
|
|
|
|
result += plain_text[i]
|
2021-12-27 17:16:57 +00:00
|
|
|
|
if plain_text[i] == '.' and i < str_len - 1:
|
2021-12-27 15:56:15 +00:00
|
|
|
|
if plain_text[i + 1] >= 'A' and plain_text[i + 1] <= 'Z':
|
2021-03-23 10:38:03 +00:00
|
|
|
|
result += ' '
|
|
|
|
|
|
2021-03-23 11:22:09 +00:00
|
|
|
|
result = result.replace(' ', ' ').strip()
|
2020-10-25 12:47:16 +00:00
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
2023-02-19 11:36:35 +00:00
|
|
|
|
def remove_style_within_html(content: str) -> str:
|
|
|
|
|
"""Removes style="something" within html post content.
|
|
|
|
|
Used to ensure that styles
|
|
|
|
|
"""
|
|
|
|
|
if '<' not in content:
|
|
|
|
|
return content
|
|
|
|
|
if ' style="' not in content:
|
|
|
|
|
return content
|
|
|
|
|
sections = content.split(' style="')
|
|
|
|
|
result = ''
|
|
|
|
|
ctr = 0
|
|
|
|
|
for section_text in sections:
|
|
|
|
|
if ctr > 0:
|
|
|
|
|
result += section_text.split('"', 1)[1]
|
|
|
|
|
else:
|
|
|
|
|
result = section_text
|
|
|
|
|
ctr = 1
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
2021-12-27 15:52:08 +00:00
|
|
|
|
def first_paragraph_from_string(content: str) -> str:
|
2020-11-08 10:52:07 +00:00
|
|
|
|
"""Get the first paragraph from a blog post
|
|
|
|
|
to be used as a summary in the newswire feed
|
|
|
|
|
"""
|
|
|
|
|
if '<p>' not in content or '</p>' not in content:
|
2021-12-27 15:43:22 +00:00
|
|
|
|
return remove_html(content)
|
2020-11-08 10:52:07 +00:00
|
|
|
|
paragraph = content.split('<p>')[1]
|
|
|
|
|
if '</p>' in paragraph:
|
|
|
|
|
paragraph = paragraph.split('</p>')[0]
|
2021-12-27 15:43:22 +00:00
|
|
|
|
return remove_html(paragraph)
|
2020-11-08 10:52:07 +00:00
|
|
|
|
|
|
|
|
|
|
2023-08-31 22:29:10 +00:00
|
|
|
|
def get_memorials(base_dir: str) -> str:
|
|
|
|
|
"""Returns the nicknames for memorial accounts
|
|
|
|
|
"""
|
2024-05-12 12:35:26 +00:00
|
|
|
|
memorial_file = data_dir(base_dir) + '/memorial'
|
2023-08-31 22:29:10 +00:00
|
|
|
|
if not os.path.isfile(memorial_file):
|
|
|
|
|
return ''
|
|
|
|
|
|
|
|
|
|
memorial_str = ''
|
|
|
|
|
try:
|
|
|
|
|
with open(memorial_file, 'r', encoding='utf-8') as fp_memorial:
|
|
|
|
|
memorial_str = fp_memorial.read()
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: unable to read ' + memorial_file)
|
|
|
|
|
return memorial_str
|
|
|
|
|
|
|
|
|
|
|
2023-08-31 22:38:09 +00:00
|
|
|
|
def set_memorials(base_dir: str, domain: str, memorial_str) -> None:
|
2023-08-31 22:29:10 +00:00
|
|
|
|
"""Sets the nicknames for memorial accounts
|
|
|
|
|
"""
|
2023-08-31 22:38:09 +00:00
|
|
|
|
# check that the accounts exist
|
|
|
|
|
memorial_list = memorial_str.split('\n')
|
|
|
|
|
new_memorial_str = ''
|
|
|
|
|
for memorial_item in memorial_list:
|
|
|
|
|
memorial_nick = memorial_item.strip()
|
|
|
|
|
check_dir = acct_dir(base_dir, memorial_nick, domain)
|
|
|
|
|
if os.path.isdir(check_dir):
|
|
|
|
|
new_memorial_str += memorial_nick + '\n'
|
|
|
|
|
memorial_str = new_memorial_str
|
|
|
|
|
|
|
|
|
|
# save the accounts
|
2024-05-12 12:35:26 +00:00
|
|
|
|
memorial_file = data_dir(base_dir) + '/memorial'
|
2023-08-31 22:29:10 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(memorial_file, 'w+', encoding='utf-8') as fp_memorial:
|
|
|
|
|
fp_memorial.write(memorial_str)
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: unable to write ' + memorial_file)
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 14:37:28 +00:00
|
|
|
|
def _create_config(base_dir: str) -> None:
|
2020-10-06 08:58:44 +00:00
|
|
|
|
"""Creates a configuration file
|
|
|
|
|
"""
|
2021-12-26 14:37:28 +00:00
|
|
|
|
config_filename = base_dir + '/config.json'
|
|
|
|
|
if os.path.isfile(config_filename):
|
2020-10-06 08:58:44 +00:00
|
|
|
|
return
|
2024-09-15 12:04:23 +00:00
|
|
|
|
config_json = {}
|
2021-12-26 14:47:21 +00:00
|
|
|
|
save_json(config_json, config_filename)
|
2020-10-06 08:58:44 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-27 20:38:02 +00:00
|
|
|
|
def set_config_param(base_dir: str, variable_name: str,
|
|
|
|
|
variable_value) -> None:
|
2020-10-06 08:58:44 +00:00
|
|
|
|
"""Sets a configuration value
|
|
|
|
|
"""
|
2021-12-26 14:37:28 +00:00
|
|
|
|
_create_config(base_dir)
|
|
|
|
|
config_filename = base_dir + '/config.json'
|
2021-12-27 20:38:02 +00:00
|
|
|
|
config_json = {}
|
2021-12-26 14:37:28 +00:00
|
|
|
|
if os.path.isfile(config_filename):
|
2021-12-27 20:38:02 +00:00
|
|
|
|
config_json = load_json(config_filename)
|
2021-12-28 15:13:51 +00:00
|
|
|
|
variable_name = _convert_to_camel_case(variable_name)
|
2021-12-27 20:38:02 +00:00
|
|
|
|
config_json[variable_name] = variable_value
|
|
|
|
|
save_json(config_json, config_filename)
|
2020-10-06 08:58:44 +00:00
|
|
|
|
|
|
|
|
|
|
2024-09-13 15:11:02 +00:00
|
|
|
|
def get_config_param(base_dir: str, variable_name: str) -> str:
|
2020-10-06 08:58:44 +00:00
|
|
|
|
"""Gets a configuration value
|
|
|
|
|
"""
|
2021-12-26 14:37:28 +00:00
|
|
|
|
_create_config(base_dir)
|
|
|
|
|
config_filename = base_dir + '/config.json'
|
2021-12-27 17:16:57 +00:00
|
|
|
|
config_json = load_json(config_filename)
|
|
|
|
|
if config_json:
|
2021-12-28 15:13:51 +00:00
|
|
|
|
variable_name = _convert_to_camel_case(variable_name)
|
2021-12-27 17:16:57 +00:00
|
|
|
|
if variable_name in config_json:
|
|
|
|
|
return config_json[variable_name]
|
2020-10-06 08:58:44 +00:00
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2021-12-27 13:58:17 +00:00
|
|
|
|
def get_followers_list(base_dir: str,
|
|
|
|
|
nickname: str, domain: str,
|
|
|
|
|
follow_file='following.txt') -> []:
|
2020-09-25 14:14:59 +00:00
|
|
|
|
"""Returns a list of followers for the given account
|
|
|
|
|
"""
|
2021-12-27 13:58:17 +00:00
|
|
|
|
filename = acct_dir(base_dir, nickname, domain) + '/' + follow_file
|
2020-09-25 14:14:59 +00:00
|
|
|
|
|
|
|
|
|
if not os.path.isfile(filename):
|
|
|
|
|
return []
|
|
|
|
|
|
2024-12-23 18:23:47 +00:00
|
|
|
|
lines: list[str] = []
|
2024-07-18 12:43:45 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(filename, 'r', encoding='utf-8') as fp_foll:
|
|
|
|
|
lines = fp_foll.readlines()
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: get_followers_list unable to read ' + filename)
|
|
|
|
|
|
|
|
|
|
if lines:
|
2022-01-08 10:58:54 +00:00
|
|
|
|
for i, _ in enumerate(lines):
|
2020-09-25 14:14:59 +00:00
|
|
|
|
lines[i] = lines[i].strip()
|
|
|
|
|
return lines
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
2021-12-27 11:31:04 +00:00
|
|
|
|
def get_followers_of_person(base_dir: str,
|
|
|
|
|
nickname: str, domain: str,
|
|
|
|
|
follow_file='following.txt') -> []:
|
2020-09-25 13:21:56 +00:00
|
|
|
|
"""Returns a list containing the followers of the given person
|
|
|
|
|
Used by the shared inbox to know who to send incoming mail to
|
|
|
|
|
"""
|
2024-12-23 18:23:47 +00:00
|
|
|
|
followers: list[str] = []
|
2021-12-26 18:17:37 +00:00
|
|
|
|
domain = remove_domain_port(domain)
|
2020-09-25 13:21:56 +00:00
|
|
|
|
handle = nickname + '@' + domain
|
2022-12-18 15:29:54 +00:00
|
|
|
|
handle_dir = acct_handle_dir(base_dir, handle)
|
|
|
|
|
if not os.path.isdir(handle_dir):
|
2020-09-25 13:21:56 +00:00
|
|
|
|
return followers
|
2024-05-12 12:35:26 +00:00
|
|
|
|
dir_str = data_dir(base_dir)
|
|
|
|
|
for subdir, dirs, _ in os.walk(dir_str):
|
2020-09-25 13:21:56 +00:00
|
|
|
|
for account in dirs:
|
2021-12-27 11:31:04 +00:00
|
|
|
|
filename = os.path.join(subdir, account) + '/' + follow_file
|
2021-04-21 16:09:56 +00:00
|
|
|
|
if account == handle or \
|
|
|
|
|
account.startswith('inbox@') or \
|
2022-02-10 15:07:09 +00:00
|
|
|
|
account.startswith('Actor@') or \
|
2021-04-21 16:09:56 +00:00
|
|
|
|
account.startswith('news@'):
|
2020-09-25 13:21:56 +00:00
|
|
|
|
continue
|
|
|
|
|
if not os.path.isfile(filename):
|
|
|
|
|
continue
|
2024-07-18 12:55:47 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(filename, 'r', encoding='utf-8') as fp_following:
|
|
|
|
|
for following_handle in fp_following:
|
|
|
|
|
following_handle2 = remove_eol(following_handle)
|
|
|
|
|
if following_handle2 != handle:
|
|
|
|
|
continue
|
2020-09-25 13:21:56 +00:00
|
|
|
|
if account not in followers:
|
|
|
|
|
followers.append(account)
|
|
|
|
|
break
|
2024-07-18 12:55:47 +00:00
|
|
|
|
except OSError as exc:
|
|
|
|
|
print('EX: get_followers_of_person unable to read ' +
|
|
|
|
|
filename + ' ' + str(exc))
|
2020-12-13 22:13:45 +00:00
|
|
|
|
break
|
2020-09-25 13:21:56 +00:00
|
|
|
|
return followers
|
|
|
|
|
|
|
|
|
|
|
2021-12-27 11:31:04 +00:00
|
|
|
|
def remove_id_ending(id_str: str) -> str:
|
2020-08-23 11:13:35 +00:00
|
|
|
|
"""Removes endings such as /activity and /undo
|
|
|
|
|
"""
|
2021-12-27 11:31:04 +00:00
|
|
|
|
if id_str.endswith('/activity'):
|
|
|
|
|
id_str = id_str[:-len('/activity')]
|
|
|
|
|
elif id_str.endswith('/undo'):
|
|
|
|
|
id_str = id_str[:-len('/undo')]
|
|
|
|
|
elif id_str.endswith('/event'):
|
|
|
|
|
id_str = id_str[:-len('/event')]
|
|
|
|
|
elif id_str.endswith('/replies'):
|
|
|
|
|
id_str = id_str[:-len('/replies')]
|
2022-11-13 20:25:07 +00:00
|
|
|
|
elif id_str.endswith('/delete'):
|
|
|
|
|
id_str = id_str[:-len('/delete')]
|
2022-11-13 20:33:24 +00:00
|
|
|
|
elif id_str.endswith('/update'):
|
|
|
|
|
id_str = id_str[:-len('/update')]
|
2021-12-27 11:31:04 +00:00
|
|
|
|
if id_str.endswith('#Create'):
|
|
|
|
|
id_str = id_str.split('#Create')[0]
|
2022-11-13 20:25:07 +00:00
|
|
|
|
elif id_str.endswith('#delete'):
|
|
|
|
|
id_str = id_str.split('#delete')[0]
|
2023-05-05 10:35:22 +00:00
|
|
|
|
elif '#update' in id_str:
|
2022-11-13 20:33:24 +00:00
|
|
|
|
id_str = id_str.split('#update')[0]
|
2023-09-23 20:12:58 +00:00
|
|
|
|
elif '#moved' in id_str:
|
|
|
|
|
id_str = id_str.split('#moved')[0]
|
2023-08-20 11:06:39 +00:00
|
|
|
|
elif '#primary' in id_str:
|
|
|
|
|
id_str = id_str.split('#primary')[0]
|
|
|
|
|
elif '#reciprocal' in id_str:
|
|
|
|
|
id_str = id_str.split('#reciprocal')[0]
|
2021-12-27 11:31:04 +00:00
|
|
|
|
return id_str
|
2020-08-23 11:13:35 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-27 17:16:57 +00:00
|
|
|
|
def remove_hash_from_post_id(post_id: str) -> str:
|
2021-12-06 12:42:05 +00:00
|
|
|
|
"""Removes any has from a post id
|
|
|
|
|
"""
|
2021-12-26 19:47:06 +00:00
|
|
|
|
if '#' not in post_id:
|
|
|
|
|
return post_id
|
|
|
|
|
return post_id.split('#')[0]
|
2021-12-06 12:42:05 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-27 17:20:01 +00:00
|
|
|
|
def get_protocol_prefixes() -> []:
|
2020-06-11 12:26:15 +00:00
|
|
|
|
"""Returns a list of valid prefixes
|
|
|
|
|
"""
|
2021-01-02 10:37:19 +00:00
|
|
|
|
return ('https://', 'http://', 'ftp://',
|
|
|
|
|
'dat://', 'i2p://', 'gnunet://',
|
2022-04-29 13:54:13 +00:00
|
|
|
|
'ipfs://', 'ipns://',
|
2020-06-11 12:26:15 +00:00
|
|
|
|
'hyper://', 'gemini://', 'gopher://')
|
|
|
|
|
|
|
|
|
|
|
2021-12-27 17:32:34 +00:00
|
|
|
|
def get_link_prefixes() -> []:
|
2020-06-11 12:26:15 +00:00
|
|
|
|
"""Returns a list of valid web link prefixes
|
|
|
|
|
"""
|
2021-01-02 10:37:19 +00:00
|
|
|
|
return ('https://', 'http://', 'ftp://',
|
2021-09-21 10:24:42 +00:00
|
|
|
|
'dat://', 'i2p://', 'gnunet://', 'payto://',
|
2020-06-11 12:26:15 +00:00
|
|
|
|
'hyper://', 'gemini://', 'gopher://', 'briar:')
|
|
|
|
|
|
|
|
|
|
|
2021-12-27 20:43:15 +00:00
|
|
|
|
def remove_avatar_from_cache(base_dir: str, actor_str: str) -> None:
|
2020-02-04 19:34:52 +00:00
|
|
|
|
"""Removes any existing avatar entries from the cache
|
|
|
|
|
This avoids duplicate entries with differing extensions
|
|
|
|
|
"""
|
2021-12-27 17:35:58 +00:00
|
|
|
|
avatar_filename_extensions = get_image_extensions()
|
|
|
|
|
for extension in avatar_filename_extensions:
|
|
|
|
|
avatar_filename = \
|
2021-12-27 20:43:15 +00:00
|
|
|
|
base_dir + '/cache/avatars/' + actor_str + '.' + extension
|
2024-07-18 12:55:47 +00:00
|
|
|
|
if not os.path.isfile(avatar_filename):
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
os.remove(avatar_filename)
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: remove_avatar_from_cache ' +
|
|
|
|
|
'unable to delete cached avatar ' +
|
|
|
|
|
str(avatar_filename))
|
2020-02-04 19:34:52 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2021-12-26 15:13:34 +00:00
|
|
|
|
def save_json(json_object: {}, filename: str) -> bool:
|
2019-10-22 11:55:06 +00:00
|
|
|
|
"""Saves json to a file
|
|
|
|
|
"""
|
2024-04-05 10:32:35 +00:00
|
|
|
|
if not isinstance(json_object, dict):
|
2024-11-18 12:58:18 +00:00
|
|
|
|
if not isinstance(json_object, list):
|
|
|
|
|
print('EX: save_json object is not json ' + str(json_object))
|
|
|
|
|
return False
|
2024-04-05 10:32:35 +00:00
|
|
|
|
|
2023-05-17 12:02:16 +00:00
|
|
|
|
tries = 1
|
|
|
|
|
while tries <= 5:
|
2021-06-21 22:53:04 +00:00
|
|
|
|
try:
|
2024-07-14 11:09:24 +00:00
|
|
|
|
with open(filename, 'w+', encoding='utf-8') as fp_json:
|
|
|
|
|
fp_json.write(json.dumps(json_object))
|
2021-06-21 22:53:04 +00:00
|
|
|
|
return True
|
2024-04-10 17:05:44 +00:00
|
|
|
|
except OSError as exc:
|
2024-04-05 10:32:35 +00:00
|
|
|
|
print('EX: save_json ' + str(tries) + ' ' + str(filename) +
|
2024-04-10 17:05:44 +00:00
|
|
|
|
' ' + str(exc))
|
|
|
|
|
if exc.errno == 36:
|
|
|
|
|
# filename too long
|
|
|
|
|
break
|
2021-06-21 22:53:04 +00:00
|
|
|
|
time.sleep(1)
|
|
|
|
|
tries += 1
|
2019-10-22 11:55:06 +00:00
|
|
|
|
return False
|
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2024-06-20 10:47:58 +00:00
|
|
|
|
def load_json(filename: str) -> {}:
|
2019-10-22 11:55:06 +00:00
|
|
|
|
"""Makes a few attempts to load a json formatted file
|
|
|
|
|
"""
|
2022-02-04 17:28:14 +00:00
|
|
|
|
if '/Actor@' in filename:
|
|
|
|
|
filename = filename.replace('/Actor@', '/inbox@')
|
2024-06-20 10:18:47 +00:00
|
|
|
|
|
2024-06-20 10:47:58 +00:00
|
|
|
|
json_object = None
|
|
|
|
|
data = None
|
2024-06-20 10:18:47 +00:00
|
|
|
|
|
2024-06-20 10:47:58 +00:00
|
|
|
|
# load from file
|
|
|
|
|
try:
|
2024-07-14 10:56:31 +00:00
|
|
|
|
with open(filename, 'r', encoding='utf-8') as fp_json:
|
|
|
|
|
data = fp_json.read()
|
2024-06-20 10:47:58 +00:00
|
|
|
|
except OSError as exc:
|
|
|
|
|
print('EX: load_json exception ' + str(filename) + ' ' + str(exc))
|
|
|
|
|
return json_object
|
|
|
|
|
|
|
|
|
|
# check that something was loaded
|
|
|
|
|
if not data:
|
|
|
|
|
print('EX: load_json no data ' + str(filename))
|
|
|
|
|
return json_object
|
|
|
|
|
|
|
|
|
|
# convert to json
|
|
|
|
|
try:
|
|
|
|
|
json_object = json.loads(data)
|
|
|
|
|
except BaseException as exc:
|
|
|
|
|
print('EX: load_json exception ' + str(filename) + ' ' + str(exc))
|
2021-12-27 17:42:35 +00:00
|
|
|
|
return json_object
|
2019-10-22 11:55:06 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2021-12-26 15:13:34 +00:00
|
|
|
|
def load_json_onionify(filename: str, domain: str, onion_domain: str,
|
2021-12-26 15:28:08 +00:00
|
|
|
|
delay_sec: int = 2) -> {}:
|
2020-03-02 14:35:44 +00:00
|
|
|
|
"""Makes a few attempts to load a json formatted file
|
|
|
|
|
This also converts the domain name to the onion domain
|
|
|
|
|
"""
|
2022-02-04 17:34:00 +00:00
|
|
|
|
if '/Actor@' in filename:
|
|
|
|
|
filename = filename.replace('/Actor@', '/inbox@')
|
2021-12-27 17:42:35 +00:00
|
|
|
|
json_object = None
|
2020-04-04 13:44:49 +00:00
|
|
|
|
tries = 0
|
|
|
|
|
while tries < 5:
|
2020-03-02 14:35:44 +00:00
|
|
|
|
try:
|
2024-07-14 10:56:31 +00:00
|
|
|
|
with open(filename, 'r', encoding='utf-8') as fp_json:
|
|
|
|
|
data = fp_json.read()
|
2021-06-21 22:52:04 +00:00
|
|
|
|
if data:
|
2021-12-25 20:43:43 +00:00
|
|
|
|
data = data.replace(domain, onion_domain)
|
2021-06-21 22:52:04 +00:00
|
|
|
|
data = data.replace('https:', 'http:')
|
2021-12-27 17:42:35 +00:00
|
|
|
|
json_object = json.loads(data)
|
2021-06-21 22:52:04 +00:00
|
|
|
|
break
|
2020-04-04 13:44:49 +00:00
|
|
|
|
except BaseException:
|
2021-12-26 15:13:34 +00:00
|
|
|
|
print('EX: load_json_onionify exception ' + str(filename))
|
2021-12-26 15:28:08 +00:00
|
|
|
|
if delay_sec > 0:
|
|
|
|
|
time.sleep(delay_sec)
|
2020-04-04 13:44:49 +00:00
|
|
|
|
tries += 1
|
2021-12-27 17:42:35 +00:00
|
|
|
|
return json_object
|
2020-03-02 14:35:44 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2021-12-27 17:46:27 +00:00
|
|
|
|
def get_status_number(published_str: str = None) -> (str, str):
|
2019-07-02 09:25:29 +00:00
|
|
|
|
"""Returns the status number and published date
|
|
|
|
|
"""
|
2021-12-27 17:46:27 +00:00
|
|
|
|
if not published_str:
|
2023-11-20 22:27:58 +00:00
|
|
|
|
curr_time = date_utcnow()
|
2020-10-07 16:55:15 +00:00
|
|
|
|
else:
|
2021-12-26 13:17:46 +00:00
|
|
|
|
curr_time = \
|
2023-11-20 22:27:58 +00:00
|
|
|
|
date_from_string_format(published_str, ['%Y-%m-%dT%H:%M:%S%z'])
|
|
|
|
|
days_since_epoch = (curr_time - date_epoch()).days
|
2019-07-02 09:25:29 +00:00
|
|
|
|
# status is the number of seconds since epoch
|
2021-12-27 17:46:27 +00:00
|
|
|
|
status_number = \
|
2021-12-26 13:17:46 +00:00
|
|
|
|
str(((days_since_epoch * 24 * 60 * 60) +
|
|
|
|
|
(curr_time.hour * 60 * 60) +
|
|
|
|
|
(curr_time.minute * 60) +
|
|
|
|
|
curr_time.second) * 1000 +
|
|
|
|
|
int(curr_time.microsecond / 1000))
|
2020-04-04 13:44:49 +00:00
|
|
|
|
# See https://github.com/tootsuite/mastodon/blob/
|
|
|
|
|
# 995f8b389a66ab76ec92d9a240de376f1fc13a38/lib/mastodon/snowflake.rb
|
2019-10-12 12:45:53 +00:00
|
|
|
|
# use the leftover microseconds as the sequence number
|
2021-12-27 17:46:27 +00:00
|
|
|
|
sequence_id = curr_time.microsecond % 1000
|
2019-10-12 12:45:53 +00:00
|
|
|
|
# shift by 16bits "sequence data"
|
2021-12-27 17:46:27 +00:00
|
|
|
|
status_number = str((int(status_number) << 16) + sequence_id)
|
2021-12-26 13:17:46 +00:00
|
|
|
|
published = curr_time.strftime("%Y-%m-%dT%H:%M:%SZ")
|
2021-12-27 17:46:27 +00:00
|
|
|
|
return status_number, published
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2019-07-02 09:25:29 +00:00
|
|
|
|
|
2021-12-27 17:46:27 +00:00
|
|
|
|
def evil_incarnate() -> []:
|
2021-12-27 20:43:15 +00:00
|
|
|
|
"""Hardcoded blocked domains
|
|
|
|
|
"""
|
2024-12-23 19:03:59 +00:00
|
|
|
|
return ('fedilist.com', 'gab.com', 'gabfed.com', 'spinster.xyz')
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2020-03-28 10:33:04 +00:00
|
|
|
|
|
2021-12-27 17:53:41 +00:00
|
|
|
|
def contains_invalid_chars(json_str: str) -> bool:
|
2020-10-15 08:59:08 +00:00
|
|
|
|
"""Does the given json string contain invalid characters?
|
|
|
|
|
"""
|
2021-12-27 17:53:41 +00:00
|
|
|
|
for is_invalid in INVALID_CHARACTERS:
|
|
|
|
|
if is_invalid in json_str:
|
2020-10-15 08:59:08 +00:00
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2022-12-26 10:49:41 +00:00
|
|
|
|
def contains_invalid_actor_url_chars(url: str) -> bool:
|
|
|
|
|
"""Does the given actor url contain invalid characters?
|
|
|
|
|
"""
|
|
|
|
|
for is_invalid in INVALID_ACTOR_URL_CHARACTERS:
|
|
|
|
|
if is_invalid in url:
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
return contains_invalid_chars(url)
|
|
|
|
|
|
|
|
|
|
|
2021-12-27 19:33:45 +00:00
|
|
|
|
def remove_invalid_chars(text: str) -> str:
|
2021-02-11 10:33:56 +00:00
|
|
|
|
"""Removes any invalid characters from a string
|
|
|
|
|
"""
|
2021-12-27 17:57:27 +00:00
|
|
|
|
for is_invalid in INVALID_CHARACTERS:
|
|
|
|
|
if is_invalid not in text:
|
2021-02-11 10:33:56 +00:00
|
|
|
|
continue
|
2021-12-27 17:57:27 +00:00
|
|
|
|
text = text.replace(is_invalid, '')
|
2021-02-11 10:33:56 +00:00
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
|
2021-12-27 19:26:54 +00:00
|
|
|
|
def create_person_dir(nickname: str, domain: str, base_dir: str,
|
|
|
|
|
dir_name: str) -> str:
|
2019-07-04 10:02:56 +00:00
|
|
|
|
"""Create a directory for a person
|
2019-07-02 09:25:29 +00:00
|
|
|
|
"""
|
2020-04-04 13:44:49 +00:00
|
|
|
|
handle = nickname + '@' + domain
|
2022-12-18 15:29:54 +00:00
|
|
|
|
handle_dir = acct_handle_dir(base_dir, handle)
|
|
|
|
|
if not os.path.isdir(handle_dir):
|
|
|
|
|
os.mkdir(handle_dir)
|
|
|
|
|
box_dir = acct_handle_dir(base_dir, handle) + '/' + dir_name
|
2021-12-27 17:57:27 +00:00
|
|
|
|
if not os.path.isdir(box_dir):
|
|
|
|
|
os.mkdir(box_dir)
|
|
|
|
|
return box_dir
|
2019-07-04 10:02:56 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2021-12-27 17:57:27 +00:00
|
|
|
|
def create_outbox_dir(nickname: str, domain: str, base_dir: str) -> str:
|
2019-07-04 10:02:56 +00:00
|
|
|
|
"""Create an outbox for a person
|
|
|
|
|
"""
|
2021-12-27 19:26:54 +00:00
|
|
|
|
return create_person_dir(nickname, domain, base_dir, 'outbox')
|
2019-07-04 10:02:56 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2021-12-27 18:00:51 +00:00
|
|
|
|
def create_inbox_queue_dir(nickname: str, domain: str, base_dir: str) -> str:
|
2019-07-04 10:02:56 +00:00
|
|
|
|
"""Create an inbox queue and returns the feed filename and directory
|
|
|
|
|
"""
|
2021-12-27 19:26:54 +00:00
|
|
|
|
return create_person_dir(nickname, domain, base_dir, 'queue')
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2019-07-02 10:39:55 +00:00
|
|
|
|
|
2021-12-27 19:26:54 +00:00
|
|
|
|
def domain_permitted(domain: str, federation_list: []) -> bool:
|
|
|
|
|
"""Is the given domain permitted according to the federation list?
|
|
|
|
|
"""
|
2021-12-25 23:45:30 +00:00
|
|
|
|
if len(federation_list) == 0:
|
2019-07-02 10:39:55 +00:00
|
|
|
|
return True
|
2021-12-26 18:17:37 +00:00
|
|
|
|
domain = remove_domain_port(domain)
|
2021-12-25 23:45:30 +00:00
|
|
|
|
if domain in federation_list:
|
2019-07-02 10:39:55 +00:00
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2021-12-27 20:43:15 +00:00
|
|
|
|
def get_local_network_addresses() -> []:
|
2021-02-15 10:06:49 +00:00
|
|
|
|
"""Returns patterns for local network address detection
|
|
|
|
|
"""
|
|
|
|
|
return ('localhost', '127.0.', '192.168', '10.0.')
|
|
|
|
|
|
|
|
|
|
|
2022-03-22 18:22:09 +00:00
|
|
|
|
def _is_dangerous_string_tag(content: str, allow_local_network_access: bool,
|
|
|
|
|
separators: [], invalid_strings: []) -> bool:
|
2021-09-13 17:51:33 +00:00
|
|
|
|
"""Returns true if the given string is dangerous
|
2021-01-31 11:05:17 +00:00
|
|
|
|
"""
|
2021-12-27 21:42:08 +00:00
|
|
|
|
for separator_style in separators:
|
|
|
|
|
start_char = separator_style[0]
|
|
|
|
|
end_char = separator_style[1]
|
|
|
|
|
if start_char not in content:
|
2021-01-31 11:05:17 +00:00
|
|
|
|
continue
|
2021-12-27 21:42:08 +00:00
|
|
|
|
if end_char not in content:
|
2021-05-19 11:29:37 +00:00
|
|
|
|
continue
|
2021-12-27 21:42:08 +00:00
|
|
|
|
content_sections = content.split(start_char)
|
|
|
|
|
invalid_partials = ()
|
2021-12-25 18:54:50 +00:00
|
|
|
|
if not allow_local_network_access:
|
2021-12-27 21:42:08 +00:00
|
|
|
|
invalid_partials = get_local_network_addresses()
|
|
|
|
|
for markup in content_sections:
|
|
|
|
|
if end_char not in markup:
|
2021-05-19 11:29:37 +00:00
|
|
|
|
continue
|
2021-12-27 21:42:08 +00:00
|
|
|
|
markup = markup.split(end_char)[0].strip()
|
|
|
|
|
for partial_match in invalid_partials:
|
|
|
|
|
if partial_match in markup:
|
2021-01-31 11:05:17 +00:00
|
|
|
|
return True
|
2021-05-19 11:29:37 +00:00
|
|
|
|
if ' ' not in markup:
|
2021-12-27 21:42:08 +00:00
|
|
|
|
for bad_str in invalid_strings:
|
2022-03-22 18:22:09 +00:00
|
|
|
|
if not bad_str.endswith('-'):
|
|
|
|
|
if bad_str in markup:
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
if markup.startswith(bad_str):
|
|
|
|
|
return True
|
2021-05-19 11:29:37 +00:00
|
|
|
|
else:
|
2021-12-27 21:42:08 +00:00
|
|
|
|
for bad_str in invalid_strings:
|
2022-03-22 18:22:09 +00:00
|
|
|
|
if not bad_str.endswith('-'):
|
|
|
|
|
if bad_str + ' ' in markup:
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
if markup.startswith(bad_str):
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _is_dangerous_string_simple(content: str, allow_local_network_access: bool,
|
|
|
|
|
separators: [], invalid_strings: []) -> bool:
|
|
|
|
|
"""Returns true if the given string is dangerous
|
|
|
|
|
"""
|
|
|
|
|
for separator_style in separators:
|
|
|
|
|
start_char = separator_style[0]
|
|
|
|
|
end_char = separator_style[1]
|
|
|
|
|
if start_char not in content:
|
|
|
|
|
continue
|
|
|
|
|
if end_char not in content:
|
|
|
|
|
continue
|
|
|
|
|
content_sections = content.split(start_char)
|
|
|
|
|
invalid_partials = ()
|
|
|
|
|
if not allow_local_network_access:
|
|
|
|
|
invalid_partials = get_local_network_addresses()
|
|
|
|
|
for markup in content_sections:
|
|
|
|
|
if end_char not in markup:
|
|
|
|
|
continue
|
|
|
|
|
markup = markup.split(end_char)[0].strip()
|
|
|
|
|
for partial_match in invalid_partials:
|
|
|
|
|
if partial_match in markup:
|
|
|
|
|
return True
|
|
|
|
|
for bad_str in invalid_strings:
|
|
|
|
|
if bad_str in markup:
|
|
|
|
|
return True
|
2021-01-31 11:05:17 +00:00
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2023-01-19 15:17:20 +00:00
|
|
|
|
def html_tag_has_closing(tag_name: str, content: str) -> bool:
|
2022-07-17 17:44:52 +00:00
|
|
|
|
"""Does the given tag have opening and closing labels?
|
2022-07-17 17:40:48 +00:00
|
|
|
|
"""
|
|
|
|
|
content_lower = content.lower()
|
|
|
|
|
if '<' + tag_name not in content_lower:
|
|
|
|
|
return True
|
|
|
|
|
sections = content_lower.split('<' + tag_name)
|
|
|
|
|
ctr = 0
|
|
|
|
|
end_tag = '</' + tag_name + '>'
|
|
|
|
|
for section in sections:
|
|
|
|
|
if ctr == 0:
|
|
|
|
|
ctr += 1
|
|
|
|
|
continue
|
|
|
|
|
# check that an ending tag exists
|
|
|
|
|
if end_tag not in section:
|
|
|
|
|
return False
|
2023-04-28 11:38:32 +00:00
|
|
|
|
if tag_name in ('code', 'pre'):
|
2022-07-17 17:40:48 +00:00
|
|
|
|
# check that lines are not too long
|
2023-01-19 15:17:20 +00:00
|
|
|
|
section = section.split(end_tag)[0]
|
2023-04-28 10:13:07 +00:00
|
|
|
|
section = section.replace('<br>', '\n')
|
2022-07-17 17:40:48 +00:00
|
|
|
|
code_lines = section.split('\n')
|
|
|
|
|
for line in code_lines:
|
|
|
|
|
if len(line) >= 60:
|
2023-04-28 11:38:32 +00:00
|
|
|
|
print('<code> or <pre> line too long')
|
2022-07-17 17:40:48 +00:00
|
|
|
|
return False
|
|
|
|
|
ctr += 1
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
2023-05-18 11:15:18 +00:00
|
|
|
|
def dangerous_markup(content: str, allow_local_network_access: bool,
|
|
|
|
|
allow_tags: []) -> bool:
|
2021-09-13 17:51:33 +00:00
|
|
|
|
"""Returns true if the given content contains dangerous html markup
|
|
|
|
|
"""
|
2023-05-30 09:07:51 +00:00
|
|
|
|
if '.svg' in content.lower():
|
|
|
|
|
return True
|
2021-09-13 17:51:33 +00:00
|
|
|
|
separators = [['<', '>'], ['<', '>']]
|
2022-03-22 18:22:09 +00:00
|
|
|
|
invalid_strings = [
|
2022-12-12 21:49:03 +00:00
|
|
|
|
'ampproject', 'googleapis', '_exec(', ' id=', ' name='
|
2022-03-22 18:22:09 +00:00
|
|
|
|
]
|
|
|
|
|
if _is_dangerous_string_simple(content, allow_local_network_access,
|
|
|
|
|
separators, invalid_strings):
|
|
|
|
|
return True
|
2023-04-28 11:42:25 +00:00
|
|
|
|
for closing_tag in ('code', 'pre'):
|
|
|
|
|
if not html_tag_has_closing(closing_tag, content):
|
|
|
|
|
return True
|
2021-12-27 21:42:08 +00:00
|
|
|
|
invalid_strings = [
|
2023-04-28 11:38:32 +00:00
|
|
|
|
'script', 'noscript', 'canvas', 'style', 'abbr', 'input',
|
|
|
|
|
'frame', 'iframe', 'html', 'body', 'hr', 'allow-popups',
|
2023-05-18 11:15:18 +00:00
|
|
|
|
'allow-scripts', 'amp-', '?php', 'pre'
|
2022-03-22 18:25:42 +00:00
|
|
|
|
]
|
2023-05-18 11:15:18 +00:00
|
|
|
|
for allowed in allow_tags:
|
|
|
|
|
if allowed in invalid_strings:
|
|
|
|
|
invalid_strings.remove(allowed)
|
2022-03-22 18:22:09 +00:00
|
|
|
|
return _is_dangerous_string_tag(content, allow_local_network_access,
|
2022-03-22 18:25:42 +00:00
|
|
|
|
separators, invalid_strings)
|
2021-09-13 17:51:33 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-27 21:44:48 +00:00
|
|
|
|
def dangerous_svg(content: str, allow_local_network_access: bool) -> bool:
|
2021-09-13 17:51:33 +00:00
|
|
|
|
"""Returns true if the given svg file content contains dangerous scripts
|
|
|
|
|
"""
|
|
|
|
|
separators = [['<', '>'], ['<', '>']]
|
2021-12-27 21:44:48 +00:00
|
|
|
|
invalid_strings = [
|
2021-09-13 17:51:33 +00:00
|
|
|
|
'script'
|
|
|
|
|
]
|
2022-03-22 18:22:09 +00:00
|
|
|
|
return _is_dangerous_string_tag(content, allow_local_network_access,
|
|
|
|
|
separators, invalid_strings)
|
2021-09-13 17:51:33 +00:00
|
|
|
|
|
|
|
|
|
|
2024-05-26 12:24:08 +00:00
|
|
|
|
def _get_statuses_list() -> []:
|
2023-08-14 19:01:15 +00:00
|
|
|
|
"""Returns a list of statuses path strings
|
|
|
|
|
"""
|
2024-05-26 10:43:57 +00:00
|
|
|
|
return ('/statuses/', '/objects/', '/honk/', '/p/', '/h/', '/api/posts/',
|
2024-05-30 08:30:05 +00:00
|
|
|
|
'/note/', '/notes/', '/comment/', '/post/', '/item/', '/videos/',
|
2024-06-26 19:06:43 +00:00
|
|
|
|
'/button/', '/x/', '/o/', '/posts/', '/items/', '/object/', '/r/',
|
2024-08-31 10:31:11 +00:00
|
|
|
|
'/content/', '/federation/', '/elsewhere/', '/article/',
|
2024-09-19 09:49:40 +00:00
|
|
|
|
'/activity/', '/blog/', '/app.bsky.feed.post/')
|
2023-08-14 19:01:15 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def contains_statuses(url: str) -> bool:
|
|
|
|
|
"""Whether the given url contains /statuses/
|
|
|
|
|
"""
|
2024-05-26 12:24:08 +00:00
|
|
|
|
statuses_list = _get_statuses_list()
|
2023-08-14 19:01:15 +00:00
|
|
|
|
for status_str in statuses_list:
|
|
|
|
|
if status_str in url:
|
|
|
|
|
return True
|
2024-05-26 16:23:13 +00:00
|
|
|
|
|
|
|
|
|
# wordpress-style blog post
|
|
|
|
|
today = datetime.date.today()
|
|
|
|
|
if '/' + str(today.year) + '/' in url or \
|
|
|
|
|
'/' + str(today.year - 1) + '/' in url:
|
|
|
|
|
return True
|
2023-08-14 19:01:15 +00:00
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2023-08-03 17:31:47 +00:00
|
|
|
|
def get_actor_from_post_id(post_id: str) -> str:
|
2023-08-14 19:01:15 +00:00
|
|
|
|
"""Returns an actor url from a post id containing /statuses/ or equivalent
|
2023-08-03 17:31:47 +00:00
|
|
|
|
eg. https://somedomain/users/nick/statuses/123 becomes
|
|
|
|
|
https://somedomain/users/nick
|
|
|
|
|
"""
|
|
|
|
|
actor = post_id
|
2024-05-26 12:24:08 +00:00
|
|
|
|
statuses_list = _get_statuses_list()
|
2023-08-14 19:01:15 +00:00
|
|
|
|
pixelfed_style_statuses = ['/p/']
|
|
|
|
|
for status_str in statuses_list:
|
2023-08-14 19:07:14 +00:00
|
|
|
|
if status_str not in actor:
|
|
|
|
|
continue
|
2023-08-14 19:01:15 +00:00
|
|
|
|
if status_str in pixelfed_style_statuses:
|
|
|
|
|
# pixelfed style post id
|
|
|
|
|
nick = actor.split(status_str)[1]
|
|
|
|
|
if '/' in nick:
|
|
|
|
|
nick = nick.split('/')[0]
|
|
|
|
|
actor = actor.split(status_str)[0] + '/users/' + nick
|
|
|
|
|
break
|
2023-08-14 19:11:58 +00:00
|
|
|
|
if has_users_path(actor):
|
2023-08-14 19:07:14 +00:00
|
|
|
|
actor = actor.split(status_str)[0]
|
|
|
|
|
break
|
2023-08-03 17:31:47 +00:00
|
|
|
|
return actor
|
|
|
|
|
|
|
|
|
|
|
2021-12-27 21:59:07 +00:00
|
|
|
|
def get_display_name(base_dir: str, actor: str, person_cache: {}) -> str:
|
2019-08-22 18:36:07 +00:00
|
|
|
|
"""Returns the display name for the given actor
|
2019-08-22 12:41:16 +00:00
|
|
|
|
"""
|
2023-08-03 17:31:47 +00:00
|
|
|
|
actor = get_actor_from_post_id(actor)
|
2021-12-25 22:17:49 +00:00
|
|
|
|
if not person_cache.get(actor):
|
2019-08-22 13:29:57 +00:00
|
|
|
|
return None
|
2021-12-27 22:12:29 +00:00
|
|
|
|
name_found = None
|
2021-12-25 22:17:49 +00:00
|
|
|
|
if person_cache[actor].get('actor'):
|
|
|
|
|
if person_cache[actor]['actor'].get('name'):
|
2021-12-27 22:12:29 +00:00
|
|
|
|
name_found = person_cache[actor]['actor']['name']
|
2019-10-09 13:24:14 +00:00
|
|
|
|
else:
|
|
|
|
|
# Try to obtain from the cached actors
|
2021-12-27 22:12:29 +00:00
|
|
|
|
cached_actor_filename = \
|
2021-12-25 16:17:53 +00:00
|
|
|
|
base_dir + '/cache/actors/' + (actor.replace('/', '#')) + '.json'
|
2021-12-27 22:12:29 +00:00
|
|
|
|
if os.path.isfile(cached_actor_filename):
|
2024-06-20 10:47:58 +00:00
|
|
|
|
actor_json = load_json(cached_actor_filename)
|
2021-12-26 10:29:52 +00:00
|
|
|
|
if actor_json:
|
|
|
|
|
if actor_json.get('name'):
|
2021-12-27 22:12:29 +00:00
|
|
|
|
name_found = actor_json['name']
|
|
|
|
|
if name_found:
|
2023-05-18 11:15:18 +00:00
|
|
|
|
if dangerous_markup(name_found, False, []):
|
2021-12-27 22:12:29 +00:00
|
|
|
|
name_found = "*ADVERSARY*"
|
2022-07-09 22:01:38 +00:00
|
|
|
|
return standardize_text(name_found)
|
2019-08-22 12:41:16 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2022-05-02 19:22:00 +00:00
|
|
|
|
def display_name_is_emoji(display_name: str) -> bool:
|
|
|
|
|
"""Returns true if the given display name is an emoji
|
|
|
|
|
"""
|
|
|
|
|
if ' ' in display_name:
|
2022-05-02 22:50:14 +00:00
|
|
|
|
words = display_name.split(' ')
|
|
|
|
|
for wrd in words:
|
|
|
|
|
if not wrd.startswith(':'):
|
|
|
|
|
return False
|
|
|
|
|
if not wrd.endswith(':'):
|
|
|
|
|
return False
|
|
|
|
|
return True
|
2022-05-02 19:22:00 +00:00
|
|
|
|
if len(display_name) < 2:
|
|
|
|
|
return False
|
|
|
|
|
if not display_name.startswith(':'):
|
|
|
|
|
return False
|
|
|
|
|
if not display_name.endswith(':'):
|
|
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
2021-12-27 22:12:29 +00:00
|
|
|
|
def _gender_from_string(translate: {}, text: str) -> str:
|
2021-06-24 19:28:26 +00:00
|
|
|
|
"""Given some text, does it contain a gender description?
|
|
|
|
|
"""
|
2021-06-24 19:25:39 +00:00
|
|
|
|
gender = None
|
2021-07-23 14:32:21 +00:00
|
|
|
|
if not text:
|
|
|
|
|
return None
|
2021-12-27 22:12:29 +00:00
|
|
|
|
text_orig = text
|
2021-06-24 19:25:39 +00:00
|
|
|
|
text = text.lower()
|
|
|
|
|
if translate['He/Him'].lower() in text or \
|
|
|
|
|
translate['boy'].lower() in text:
|
|
|
|
|
gender = 'He/Him'
|
|
|
|
|
elif (translate['She/Her'].lower() in text or
|
|
|
|
|
translate['girl'].lower() in text):
|
|
|
|
|
gender = 'She/Her'
|
|
|
|
|
elif 'him' in text or 'male' in text:
|
|
|
|
|
gender = 'He/Him'
|
|
|
|
|
elif 'her' in text or 'she' in text or \
|
|
|
|
|
'fem' in text or 'woman' in text:
|
|
|
|
|
gender = 'She/Her'
|
2021-12-27 22:12:29 +00:00
|
|
|
|
elif 'man' in text or 'He' in text_orig:
|
2021-06-24 19:25:39 +00:00
|
|
|
|
gender = 'He/Him'
|
|
|
|
|
return gender
|
|
|
|
|
|
|
|
|
|
|
2021-12-27 22:12:29 +00:00
|
|
|
|
def get_gender_from_bio(base_dir: str, actor: str, person_cache: {},
|
|
|
|
|
translate: {}) -> str:
|
2021-03-03 13:02:47 +00:00
|
|
|
|
"""Tries to ascertain gender from bio description
|
2021-06-24 19:25:39 +00:00
|
|
|
|
This is for use by text-to-speech for pitch setting
|
2021-03-03 13:02:47 +00:00
|
|
|
|
"""
|
2021-12-27 22:19:18 +00:00
|
|
|
|
default_gender = 'They/Them'
|
2023-08-03 17:31:47 +00:00
|
|
|
|
actor = get_actor_from_post_id(actor)
|
2021-12-25 22:17:49 +00:00
|
|
|
|
if not person_cache.get(actor):
|
2021-12-27 22:19:18 +00:00
|
|
|
|
return default_gender
|
|
|
|
|
bio_found = None
|
2021-03-03 19:15:32 +00:00
|
|
|
|
if translate:
|
2021-12-27 22:19:18 +00:00
|
|
|
|
pronoun_str = translate['pronoun'].lower()
|
2021-03-03 19:15:32 +00:00
|
|
|
|
else:
|
2021-12-27 22:19:18 +00:00
|
|
|
|
pronoun_str = 'pronoun'
|
2021-12-26 10:29:52 +00:00
|
|
|
|
actor_json = None
|
2021-12-25 22:17:49 +00:00
|
|
|
|
if person_cache[actor].get('actor'):
|
2021-12-26 10:29:52 +00:00
|
|
|
|
actor_json = person_cache[actor]['actor']
|
2021-03-03 13:02:47 +00:00
|
|
|
|
else:
|
|
|
|
|
# Try to obtain from the cached actors
|
2021-12-27 22:19:18 +00:00
|
|
|
|
cached_actor_filename = \
|
2021-12-25 16:17:53 +00:00
|
|
|
|
base_dir + '/cache/actors/' + (actor.replace('/', '#')) + '.json'
|
2021-12-27 22:19:18 +00:00
|
|
|
|
if os.path.isfile(cached_actor_filename):
|
2024-06-20 10:47:58 +00:00
|
|
|
|
actor_json = load_json(cached_actor_filename)
|
2021-12-26 10:29:52 +00:00
|
|
|
|
if not actor_json:
|
2021-12-27 22:19:18 +00:00
|
|
|
|
return default_gender
|
2021-06-24 19:10:23 +00:00
|
|
|
|
# is gender defined as a profile tag?
|
2021-12-26 10:29:52 +00:00
|
|
|
|
if actor_json.get('attachment'):
|
2021-12-27 22:19:18 +00:00
|
|
|
|
tags_list = actor_json['attachment']
|
|
|
|
|
if isinstance(tags_list, list):
|
2021-06-24 19:25:39 +00:00
|
|
|
|
# look for a gender field name
|
2021-12-27 22:19:18 +00:00
|
|
|
|
for tag in tags_list:
|
2021-06-24 19:10:23 +00:00
|
|
|
|
if not isinstance(tag, dict):
|
|
|
|
|
continue
|
2022-05-11 18:04:58 +00:00
|
|
|
|
name_value = None
|
|
|
|
|
if tag.get('name'):
|
|
|
|
|
name_value = tag['name']
|
|
|
|
|
if tag.get('schema:name'):
|
|
|
|
|
name_value = tag['schema:name']
|
|
|
|
|
if not name_value:
|
2021-06-24 19:10:23 +00:00
|
|
|
|
continue
|
2022-05-11 18:04:58 +00:00
|
|
|
|
prop_value_name, _ = get_attachment_property_value(tag)
|
|
|
|
|
if not prop_value_name:
|
|
|
|
|
continue
|
|
|
|
|
if name_value.lower() == \
|
2021-06-24 19:10:23 +00:00
|
|
|
|
translate['gender'].lower():
|
2022-05-11 18:16:20 +00:00
|
|
|
|
bio_found = tag[prop_value_name]
|
2021-06-24 19:10:23 +00:00
|
|
|
|
break
|
2022-05-11 18:04:58 +00:00
|
|
|
|
if name_value.lower().startswith(pronoun_str):
|
2022-05-11 18:16:20 +00:00
|
|
|
|
bio_found = tag[prop_value_name]
|
2021-06-24 19:10:23 +00:00
|
|
|
|
break
|
2021-06-24 19:25:39 +00:00
|
|
|
|
# the field name could be anything,
|
|
|
|
|
# just look at the value
|
2021-12-27 22:19:18 +00:00
|
|
|
|
if not bio_found:
|
|
|
|
|
for tag in tags_list:
|
2021-06-24 19:25:39 +00:00
|
|
|
|
if not isinstance(tag, dict):
|
|
|
|
|
continue
|
2022-05-11 18:04:58 +00:00
|
|
|
|
if not tag.get('name') and not tag.get('schema:name'):
|
|
|
|
|
continue
|
|
|
|
|
prop_value_name, _ = get_attachment_property_value(tag)
|
|
|
|
|
if not prop_value_name:
|
2021-06-24 19:25:39 +00:00
|
|
|
|
continue
|
2022-05-11 18:04:58 +00:00
|
|
|
|
gender = \
|
|
|
|
|
_gender_from_string(translate, tag[prop_value_name])
|
2021-06-24 19:25:39 +00:00
|
|
|
|
if gender:
|
|
|
|
|
return gender
|
2021-06-24 19:10:23 +00:00
|
|
|
|
# if not then use the bio
|
2021-12-27 22:19:18 +00:00
|
|
|
|
if not bio_found and actor_json.get('summary'):
|
|
|
|
|
bio_found = actor_json['summary']
|
|
|
|
|
if not bio_found:
|
|
|
|
|
return default_gender
|
|
|
|
|
gender = _gender_from_string(translate, bio_found)
|
2021-06-24 19:25:39 +00:00
|
|
|
|
if not gender:
|
2021-12-27 22:19:18 +00:00
|
|
|
|
gender = default_gender
|
2021-03-03 13:02:47 +00:00
|
|
|
|
return gender
|
|
|
|
|
|
|
|
|
|
|
2021-12-27 22:19:18 +00:00
|
|
|
|
def get_nickname_from_actor(actor: str) -> str:
|
2019-07-06 15:17:21 +00:00
|
|
|
|
"""Returns the nickname from an actor url
|
|
|
|
|
"""
|
2020-08-13 16:41:02 +00:00
|
|
|
|
if actor.startswith('@'):
|
|
|
|
|
actor = actor[1:]
|
2024-09-19 09:49:40 +00:00
|
|
|
|
|
|
|
|
|
# handle brid.gy urls
|
|
|
|
|
actor = actor.replace('at://did:', 'did:')
|
|
|
|
|
|
2021-12-27 22:32:59 +00:00
|
|
|
|
users_paths = get_user_paths()
|
|
|
|
|
for possible_path in users_paths:
|
2024-08-04 19:29:10 +00:00
|
|
|
|
if possible_path not in actor:
|
|
|
|
|
continue
|
|
|
|
|
nick_str = actor.split(possible_path)[1].replace('@', '')
|
|
|
|
|
if '/' not in nick_str:
|
|
|
|
|
return nick_str
|
|
|
|
|
return nick_str.split('/')[0]
|
2023-04-23 15:55:48 +00:00
|
|
|
|
if '/@/' not in actor:
|
|
|
|
|
if '/@' in actor:
|
|
|
|
|
# https://domain/@nick
|
|
|
|
|
nick_str = actor.split('/@')[1]
|
|
|
|
|
if '/' in nick_str:
|
|
|
|
|
nick_str = nick_str.split('/')[0]
|
|
|
|
|
return nick_str
|
|
|
|
|
if '@' in actor:
|
|
|
|
|
nick_str = actor.split('@')[0]
|
|
|
|
|
return nick_str
|
2021-12-27 22:32:59 +00:00
|
|
|
|
if '://' in actor:
|
2021-06-24 19:55:29 +00:00
|
|
|
|
domain = actor.split('://')[1]
|
|
|
|
|
if '/' in domain:
|
|
|
|
|
domain = domain.split('/')[0]
|
|
|
|
|
if '://' + domain + '/' not in actor:
|
|
|
|
|
return None
|
2021-12-27 22:32:59 +00:00
|
|
|
|
nick_str = actor.split('://' + domain + '/')[1]
|
|
|
|
|
if '/' in nick_str or '.' in nick_str:
|
2021-06-24 19:55:29 +00:00
|
|
|
|
return None
|
2021-12-27 22:32:59 +00:00
|
|
|
|
return nick_str
|
2021-06-24 19:55:29 +00:00
|
|
|
|
return None
|
2019-07-06 15:17:21 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2021-12-26 12:24:40 +00:00
|
|
|
|
def get_user_paths() -> []:
|
2021-07-04 22:58:01 +00:00
|
|
|
|
"""Returns possible user paths
|
2021-07-28 12:49:02 +00:00
|
|
|
|
e.g. /users/nickname, /channel/nickname
|
2021-07-04 22:58:01 +00:00
|
|
|
|
"""
|
2024-03-22 13:51:39 +00:00
|
|
|
|
return ('/users/', '/profile/', '/accounts/', '/channel/',
|
2024-05-28 21:37:50 +00:00
|
|
|
|
'/u/', '/c/', '/m/', '/a/', '/video-channels/',
|
2024-07-26 09:45:41 +00:00
|
|
|
|
'/nieuws/author/', '/author/', '/federation/user/',
|
2023-06-26 10:38:21 +00:00
|
|
|
|
'/activitypub/', '/actors/', '/snac/', '/@/', '/~/',
|
2024-06-07 10:09:04 +00:00
|
|
|
|
'/fediverse/blog/', '/user/', '/@', '/api/collections/',
|
2024-09-19 09:49:40 +00:00
|
|
|
|
'/feed/', '/actor/', '/ap/')
|
2021-07-04 22:58:01 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 17:53:07 +00:00
|
|
|
|
def get_group_paths() -> []:
|
2021-07-30 13:00:23 +00:00
|
|
|
|
"""Returns possible group paths
|
2021-07-30 13:03:29 +00:00
|
|
|
|
e.g. https://lemmy/c/groupname
|
2021-07-30 13:00:23 +00:00
|
|
|
|
"""
|
2023-06-17 09:28:16 +00:00
|
|
|
|
return ['/c/', '/video-channels/', '/m/']
|
2021-07-30 13:00:23 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-27 19:05:25 +00:00
|
|
|
|
def get_domain_from_actor(actor: str) -> (str, int):
|
2019-07-06 15:17:21 +00:00
|
|
|
|
"""Returns the domain name from an actor url
|
|
|
|
|
"""
|
2020-08-13 16:41:02 +00:00
|
|
|
|
if actor.startswith('@'):
|
|
|
|
|
actor = actor[1:]
|
2020-04-04 13:44:49 +00:00
|
|
|
|
port = None
|
2021-12-27 17:20:01 +00:00
|
|
|
|
prefixes = get_protocol_prefixes()
|
2021-12-27 22:32:59 +00:00
|
|
|
|
users_paths = get_user_paths()
|
|
|
|
|
for possible_path in users_paths:
|
2024-08-04 19:29:10 +00:00
|
|
|
|
if possible_path not in actor:
|
|
|
|
|
continue
|
|
|
|
|
domain = actor.split(possible_path)[0]
|
|
|
|
|
for prefix in prefixes:
|
|
|
|
|
domain = domain.replace(prefix, '')
|
|
|
|
|
break
|
2023-04-23 15:55:48 +00:00
|
|
|
|
if '/@' in actor and '/@/' not in actor:
|
2020-08-13 16:41:02 +00:00
|
|
|
|
domain = actor.split('/@')[0]
|
|
|
|
|
for prefix in prefixes:
|
|
|
|
|
domain = domain.replace(prefix, '')
|
2023-04-23 15:55:48 +00:00
|
|
|
|
elif '@' in actor and '/@/' not in actor:
|
2020-08-13 16:41:02 +00:00
|
|
|
|
domain = actor.split('@')[1].strip()
|
2019-07-06 15:17:21 +00:00
|
|
|
|
else:
|
2020-08-13 16:41:02 +00:00
|
|
|
|
domain = actor
|
2020-08-13 16:19:35 +00:00
|
|
|
|
for prefix in prefixes:
|
|
|
|
|
domain = domain.replace(prefix, '')
|
2020-08-13 16:41:02 +00:00
|
|
|
|
if '/' in actor:
|
|
|
|
|
domain = domain.split('/')[0]
|
2019-07-06 15:17:21 +00:00
|
|
|
|
if ':' in domain:
|
2021-12-26 18:14:21 +00:00
|
|
|
|
port = get_port_from_domain(domain)
|
2021-12-26 18:17:37 +00:00
|
|
|
|
domain = remove_domain_port(domain)
|
2020-04-04 13:44:49 +00:00
|
|
|
|
return domain, port
|
|
|
|
|
|
|
|
|
|
|
2021-12-27 19:26:54 +00:00
|
|
|
|
def _set_default_pet_name(base_dir: str, nickname: str, domain: str,
|
|
|
|
|
follow_nickname: str, follow_domain: str) -> None:
|
2020-11-23 15:07:55 +00:00
|
|
|
|
"""Sets a default petname
|
|
|
|
|
This helps especially when using onion or i2p address
|
|
|
|
|
"""
|
2021-12-26 18:17:37 +00:00
|
|
|
|
domain = remove_domain_port(domain)
|
2021-12-27 22:32:59 +00:00
|
|
|
|
user_path = acct_dir(base_dir, nickname, domain)
|
|
|
|
|
petnames_filename = user_path + '/petnames.txt'
|
2020-11-23 15:07:55 +00:00
|
|
|
|
|
2021-12-27 22:32:59 +00:00
|
|
|
|
petname_lookup_entry = follow_nickname + ' ' + \
|
2021-12-27 19:26:54 +00:00
|
|
|
|
follow_nickname + '@' + follow_domain + '\n'
|
2021-12-27 22:32:59 +00:00
|
|
|
|
if not os.path.isfile(petnames_filename):
|
2020-11-23 15:07:55 +00:00
|
|
|
|
# if there is no existing petnames lookup file
|
2024-02-01 13:30:59 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(petnames_filename, 'w+',
|
2024-07-14 11:09:24 +00:00
|
|
|
|
encoding='utf-8') as fp_petnames:
|
|
|
|
|
fp_petnames.write(petname_lookup_entry)
|
2024-02-01 13:30:59 +00:00
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: _set_default_pet_name unable to write ' +
|
|
|
|
|
petnames_filename)
|
2020-11-23 15:07:55 +00:00
|
|
|
|
return
|
|
|
|
|
|
2024-07-14 10:56:31 +00:00
|
|
|
|
try:
|
2024-07-14 11:09:24 +00:00
|
|
|
|
with open(petnames_filename, 'r', encoding='utf-8') as fp_petnames:
|
|
|
|
|
petnames_str = fp_petnames.read()
|
2024-07-14 10:56:31 +00:00
|
|
|
|
if petnames_str:
|
|
|
|
|
petnames_list = petnames_str.split('\n')
|
|
|
|
|
for pet in petnames_list:
|
|
|
|
|
if pet.startswith(follow_nickname + ' '):
|
|
|
|
|
# petname already exists
|
|
|
|
|
return
|
|
|
|
|
except OSError:
|
2024-07-14 11:09:24 +00:00
|
|
|
|
print('EX: _set_default_pet_name unable to read 1 ' +
|
|
|
|
|
petnames_filename)
|
2020-11-23 15:07:55 +00:00
|
|
|
|
# petname doesn't already exist
|
2024-07-14 11:09:24 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(petnames_filename, 'a+', encoding='utf-8') as fp_petnames:
|
|
|
|
|
fp_petnames.write(petname_lookup_entry)
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: _set_default_pet_name unable to read 2 ' +
|
|
|
|
|
petnames_filename)
|
2020-11-23 15:07:55 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-27 17:08:19 +00:00
|
|
|
|
def follow_person(base_dir: str, nickname: str, domain: str,
|
2021-12-27 19:26:54 +00:00
|
|
|
|
follow_nickname: str, follow_domain: str,
|
2021-12-27 17:08:19 +00:00
|
|
|
|
federation_list: [], debug: bool,
|
|
|
|
|
group_account: bool,
|
2024-02-19 20:47:23 +00:00
|
|
|
|
follow_file: str) -> bool:
|
2019-07-06 19:24:52 +00:00
|
|
|
|
"""Adds a person to the follow list
|
|
|
|
|
"""
|
2022-06-21 11:58:50 +00:00
|
|
|
|
follow_domain_str_lower1 = follow_domain.lower()
|
|
|
|
|
follow_domain_str_lower = remove_eol(follow_domain_str_lower1)
|
2021-12-27 22:32:59 +00:00
|
|
|
|
if not domain_permitted(follow_domain_str_lower,
|
2021-12-27 18:28:26 +00:00
|
|
|
|
federation_list):
|
2019-07-06 19:24:52 +00:00
|
|
|
|
if debug:
|
2020-04-04 13:44:49 +00:00
|
|
|
|
print('DEBUG: follow of domain ' +
|
2021-12-27 19:26:54 +00:00
|
|
|
|
follow_domain + ' not permitted')
|
2019-07-06 19:24:52 +00:00
|
|
|
|
return False
|
2019-07-11 12:29:31 +00:00
|
|
|
|
if debug:
|
2021-12-27 19:26:54 +00:00
|
|
|
|
print('DEBUG: follow of domain ' + follow_domain)
|
2019-07-16 22:57:45 +00:00
|
|
|
|
|
|
|
|
|
if ':' in domain:
|
2021-12-27 22:32:59 +00:00
|
|
|
|
domain_only = remove_domain_port(domain)
|
|
|
|
|
handle = nickname + '@' + domain_only
|
2019-07-16 22:57:45 +00:00
|
|
|
|
else:
|
2020-09-15 09:16:03 +00:00
|
|
|
|
handle = nickname + '@' + domain
|
2020-03-03 11:02:34 +00:00
|
|
|
|
|
2022-12-18 15:29:54 +00:00
|
|
|
|
handle_dir = acct_handle_dir(base_dir, handle)
|
|
|
|
|
if not os.path.isdir(handle_dir):
|
2020-04-04 13:44:49 +00:00
|
|
|
|
print('WARN: account for ' + handle + ' does not exist')
|
2020-03-03 09:56:48 +00:00
|
|
|
|
return False
|
|
|
|
|
|
2021-12-27 19:26:54 +00:00
|
|
|
|
if ':' in follow_domain:
|
2021-12-27 22:32:59 +00:00
|
|
|
|
follow_domain_only = remove_domain_port(follow_domain)
|
|
|
|
|
handle_to_follow = follow_nickname + '@' + follow_domain_only
|
2019-07-16 22:57:45 +00:00
|
|
|
|
else:
|
2021-12-27 22:32:59 +00:00
|
|
|
|
handle_to_follow = follow_nickname + '@' + follow_domain
|
2020-02-22 10:50:07 +00:00
|
|
|
|
|
2021-12-26 00:07:44 +00:00
|
|
|
|
if group_account:
|
2021-12-27 22:32:59 +00:00
|
|
|
|
handle_to_follow = '!' + handle_to_follow
|
2021-07-31 11:56:28 +00:00
|
|
|
|
|
2020-02-22 10:50:07 +00:00
|
|
|
|
# was this person previously unfollowed?
|
2022-12-18 15:29:54 +00:00
|
|
|
|
unfollowed_filename = acct_handle_dir(base_dir, handle) + '/unfollowed.txt'
|
2021-12-27 22:32:59 +00:00
|
|
|
|
if os.path.isfile(unfollowed_filename):
|
2022-06-10 09:24:11 +00:00
|
|
|
|
if text_in_file(handle_to_follow, unfollowed_filename):
|
2020-02-22 10:50:07 +00:00
|
|
|
|
# remove them from the unfollowed file
|
2021-12-27 22:32:59 +00:00
|
|
|
|
new_lines = ''
|
2024-02-01 13:30:59 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(unfollowed_filename, 'r',
|
2024-07-14 11:09:24 +00:00
|
|
|
|
encoding='utf-8') as fp_unfoll:
|
|
|
|
|
lines = fp_unfoll.readlines()
|
2024-02-01 13:30:59 +00:00
|
|
|
|
for line in lines:
|
|
|
|
|
if handle_to_follow not in line:
|
|
|
|
|
new_lines += line
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: follow_person unable to read ' +
|
|
|
|
|
unfollowed_filename)
|
|
|
|
|
try:
|
|
|
|
|
with open(unfollowed_filename, 'w+',
|
2024-07-14 11:09:24 +00:00
|
|
|
|
encoding='utf-8') as fp_unfoll:
|
|
|
|
|
fp_unfoll.write(new_lines)
|
2024-02-01 13:30:59 +00:00
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: follow_person unable to write ' +
|
|
|
|
|
unfollowed_filename)
|
2020-02-22 10:50:07 +00:00
|
|
|
|
|
2024-05-12 12:35:26 +00:00
|
|
|
|
dir_str = data_dir(base_dir)
|
|
|
|
|
if not os.path.isdir(dir_str):
|
|
|
|
|
os.mkdir(dir_str)
|
2021-12-27 22:32:59 +00:00
|
|
|
|
handle_to_follow = follow_nickname + '@' + follow_domain
|
2021-12-26 00:07:44 +00:00
|
|
|
|
if group_account:
|
2021-12-27 22:32:59 +00:00
|
|
|
|
handle_to_follow = '!' + handle_to_follow
|
2022-12-18 15:29:54 +00:00
|
|
|
|
filename = acct_handle_dir(base_dir, handle) + '/' + follow_file
|
2019-07-06 19:24:52 +00:00
|
|
|
|
if os.path.isfile(filename):
|
2022-06-10 09:24:11 +00:00
|
|
|
|
if text_in_file(handle_to_follow, filename):
|
2019-07-11 12:29:31 +00:00
|
|
|
|
if debug:
|
|
|
|
|
print('DEBUG: follow already exists')
|
2019-07-06 19:24:52 +00:00
|
|
|
|
return True
|
2019-10-26 15:15:38 +00:00
|
|
|
|
# prepend to follow file
|
|
|
|
|
try:
|
2024-07-14 11:09:24 +00:00
|
|
|
|
with open(filename, 'r+', encoding='utf-8') as fp_foll:
|
|
|
|
|
content = fp_foll.read()
|
2021-12-27 22:32:59 +00:00
|
|
|
|
if handle_to_follow + '\n' not in content:
|
2024-07-14 11:09:24 +00:00
|
|
|
|
fp_foll.seek(0, 0)
|
|
|
|
|
fp_foll.write(handle_to_follow + '\n' + content)
|
2020-12-29 20:22:28 +00:00
|
|
|
|
print('DEBUG: follow added')
|
2021-12-27 16:18:52 +00:00
|
|
|
|
except OSError as ex:
|
2020-04-04 13:44:49 +00:00
|
|
|
|
print('WARN: Failed to write entry to follow file ' +
|
2021-12-25 15:28:52 +00:00
|
|
|
|
filename + ' ' + str(ex))
|
2020-09-03 10:09:40 +00:00
|
|
|
|
else:
|
|
|
|
|
# first follow
|
|
|
|
|
if debug:
|
2020-09-03 12:16:24 +00:00
|
|
|
|
print('DEBUG: ' + handle +
|
2021-12-27 22:32:59 +00:00
|
|
|
|
' creating new following file to follow ' +
|
|
|
|
|
handle_to_follow +
|
2020-09-03 12:16:24 +00:00
|
|
|
|
', filename is ' + filename)
|
2024-02-01 13:30:59 +00:00
|
|
|
|
try:
|
2024-07-14 11:09:24 +00:00
|
|
|
|
with open(filename, 'w+', encoding='utf-8') as fp_foll:
|
|
|
|
|
fp_foll.write(handle_to_follow + '\n')
|
2024-02-01 13:30:59 +00:00
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: follow_person unable to write ' + filename)
|
2020-09-03 10:09:40 +00:00
|
|
|
|
|
2021-12-27 11:31:04 +00:00
|
|
|
|
if follow_file.endswith('following.txt'):
|
2020-11-23 15:07:55 +00:00
|
|
|
|
# Default to adding new follows to the calendar.
|
|
|
|
|
# Possibly this could be made optional
|
2020-09-03 10:09:40 +00:00
|
|
|
|
# if following a person add them to the list of
|
|
|
|
|
# calendar follows
|
2020-09-03 12:16:24 +00:00
|
|
|
|
print('DEBUG: adding ' +
|
2021-12-27 19:26:54 +00:00
|
|
|
|
follow_nickname + '@' + follow_domain + ' to calendar of ' +
|
2020-09-03 12:16:24 +00:00
|
|
|
|
nickname + '@' + domain)
|
2021-12-27 16:18:52 +00:00
|
|
|
|
add_person_to_calendar(base_dir, nickname, domain,
|
2021-12-27 19:26:54 +00:00
|
|
|
|
follow_nickname, follow_domain)
|
2020-11-23 15:07:55 +00:00
|
|
|
|
# add a default petname
|
2021-12-27 19:26:54 +00:00
|
|
|
|
_set_default_pet_name(base_dir, nickname, domain,
|
|
|
|
|
follow_nickname, follow_domain)
|
2019-07-06 19:24:52 +00:00
|
|
|
|
return True
|
2019-07-11 12:29:31 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2021-12-27 22:32:59 +00:00
|
|
|
|
def votes_on_newswire_item(status: []) -> int:
|
2020-10-08 19:47:23 +00:00
|
|
|
|
"""Returns the number of votes on a newswire item
|
|
|
|
|
"""
|
2021-12-27 22:32:59 +00:00
|
|
|
|
total_votes = 0
|
2020-10-08 19:47:23 +00:00
|
|
|
|
for line in status:
|
|
|
|
|
if 'vote:' in line:
|
2021-12-27 22:32:59 +00:00
|
|
|
|
total_votes += 1
|
|
|
|
|
return total_votes
|
2020-10-08 19:47:23 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-27 22:38:48 +00:00
|
|
|
|
def locate_news_votes(base_dir: str, domain: str,
|
|
|
|
|
post_url: str) -> str:
|
2020-10-08 19:47:23 +00:00
|
|
|
|
"""Returns the votes filename for a news post
|
|
|
|
|
within the news user account
|
|
|
|
|
"""
|
2022-06-21 11:58:50 +00:00
|
|
|
|
post_url1 = post_url.strip()
|
|
|
|
|
post_url = remove_eol(post_url1)
|
2020-10-08 19:47:23 +00:00
|
|
|
|
|
|
|
|
|
# if this post in the shared inbox?
|
2021-12-27 22:38:48 +00:00
|
|
|
|
post_url = remove_id_ending(post_url.strip()).replace('/', '#')
|
2020-10-08 19:47:23 +00:00
|
|
|
|
|
2021-12-27 22:38:48 +00:00
|
|
|
|
if post_url.endswith('.json'):
|
|
|
|
|
post_url = post_url + '.votes'
|
2020-10-08 19:47:23 +00:00
|
|
|
|
else:
|
2021-12-27 22:38:48 +00:00
|
|
|
|
post_url = post_url + '.json.votes'
|
2020-10-08 19:47:23 +00:00
|
|
|
|
|
2024-05-12 12:35:26 +00:00
|
|
|
|
account_dir = data_dir(base_dir) + '/news@' + domain + '/'
|
2021-12-27 22:38:48 +00:00
|
|
|
|
post_filename = account_dir + 'outbox/' + post_url
|
2021-12-26 23:41:34 +00:00
|
|
|
|
if os.path.isfile(post_filename):
|
|
|
|
|
return post_filename
|
2020-10-09 12:15:20 +00:00
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2021-12-27 22:46:10 +00:00
|
|
|
|
def locate_news_arrival(base_dir: str, domain: str,
|
|
|
|
|
post_url: str) -> str:
|
2020-10-09 12:15:20 +00:00
|
|
|
|
"""Returns the arrival time for a news post
|
|
|
|
|
within the news user account
|
|
|
|
|
"""
|
2022-06-21 11:58:50 +00:00
|
|
|
|
post_url1 = post_url.strip()
|
|
|
|
|
post_url = remove_eol(post_url1)
|
2020-10-09 12:15:20 +00:00
|
|
|
|
|
|
|
|
|
# if this post in the shared inbox?
|
2021-12-27 22:46:10 +00:00
|
|
|
|
post_url = remove_id_ending(post_url.strip()).replace('/', '#')
|
2020-10-09 12:15:20 +00:00
|
|
|
|
|
2021-12-27 22:46:10 +00:00
|
|
|
|
if post_url.endswith('.json'):
|
|
|
|
|
post_url = post_url + '.arrived'
|
2020-10-09 12:15:20 +00:00
|
|
|
|
else:
|
2021-12-27 22:46:10 +00:00
|
|
|
|
post_url = post_url + '.json.arrived'
|
2020-10-09 12:15:20 +00:00
|
|
|
|
|
2024-05-12 12:35:26 +00:00
|
|
|
|
account_dir = data_dir(base_dir) + '/news@' + domain + '/'
|
2021-12-27 22:46:10 +00:00
|
|
|
|
post_filename = account_dir + 'outbox/' + post_url
|
2021-12-26 23:41:34 +00:00
|
|
|
|
if os.path.isfile(post_filename):
|
2024-07-14 10:56:31 +00:00
|
|
|
|
try:
|
2024-07-14 11:09:24 +00:00
|
|
|
|
with open(post_filename, 'r', encoding='utf-8') as fp_arrival:
|
|
|
|
|
arrival = fp_arrival.read()
|
2024-07-14 10:56:31 +00:00
|
|
|
|
if arrival:
|
|
|
|
|
arrival_date = \
|
|
|
|
|
date_from_string_format(arrival,
|
|
|
|
|
["%Y-%m-%dT%H:%M:%S%z"])
|
|
|
|
|
return arrival_date
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: locate_news_arrival unable to read ' + post_filename)
|
2020-10-09 12:15:20 +00:00
|
|
|
|
|
2020-10-08 19:47:23 +00:00
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2021-12-28 10:17:58 +00:00
|
|
|
|
def clear_from_post_caches(base_dir: str, recent_posts_cache: {},
|
|
|
|
|
post_id: str) -> None:
|
2020-10-18 16:19:28 +00:00
|
|
|
|
"""Clears cached html for the given post, so that edits
|
|
|
|
|
to news will appear
|
|
|
|
|
"""
|
2021-12-26 19:47:06 +00:00
|
|
|
|
filename = '/postcache/' + post_id + '.html'
|
2024-05-12 12:35:26 +00:00
|
|
|
|
dir_str = data_dir(base_dir)
|
|
|
|
|
for _, dirs, _ in os.walk(dir_str):
|
2020-10-18 16:19:28 +00:00
|
|
|
|
for acct in dirs:
|
|
|
|
|
if '@' not in acct:
|
|
|
|
|
continue
|
2022-02-10 15:07:09 +00:00
|
|
|
|
if acct.startswith('inbox@') or acct.startswith('Actor@'):
|
2020-10-18 16:19:28 +00:00
|
|
|
|
continue
|
2024-05-12 12:35:26 +00:00
|
|
|
|
cache_dir = os.path.join(dir_str, acct)
|
2021-12-28 10:17:58 +00:00
|
|
|
|
post_filename = cache_dir + filename
|
2021-12-26 23:41:34 +00:00
|
|
|
|
if os.path.isfile(post_filename):
|
2020-10-18 16:19:28 +00:00
|
|
|
|
try:
|
2021-12-26 23:41:34 +00:00
|
|
|
|
os.remove(post_filename)
|
2021-11-25 18:42:38 +00:00
|
|
|
|
except OSError:
|
2021-12-28 10:17:58 +00:00
|
|
|
|
print('EX: clear_from_post_caches file not removed ' +
|
2021-12-26 23:41:34 +00:00
|
|
|
|
str(post_filename))
|
2020-10-18 20:06:52 +00:00
|
|
|
|
# if the post is in the recent posts cache then remove it
|
2021-12-26 20:01:37 +00:00
|
|
|
|
if recent_posts_cache.get('index'):
|
|
|
|
|
if post_id in recent_posts_cache['index']:
|
|
|
|
|
recent_posts_cache['index'].remove(post_id)
|
|
|
|
|
if recent_posts_cache.get('json'):
|
|
|
|
|
if recent_posts_cache['json'].get(post_id):
|
|
|
|
|
del recent_posts_cache['json'][post_id]
|
|
|
|
|
if recent_posts_cache.get('html'):
|
|
|
|
|
if recent_posts_cache['html'].get(post_id):
|
|
|
|
|
del recent_posts_cache['html'][post_id]
|
2020-12-13 22:13:45 +00:00
|
|
|
|
break
|
2020-10-18 16:19:28 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 20:36:08 +00:00
|
|
|
|
def locate_post(base_dir: str, nickname: str, domain: str,
|
2021-12-28 12:15:46 +00:00
|
|
|
|
post_url: str, replies: bool = False) -> str:
|
2019-07-11 12:29:31 +00:00
|
|
|
|
"""Returns the filename for the given status post url
|
|
|
|
|
"""
|
2019-07-13 19:28:14 +00:00
|
|
|
|
if not replies:
|
2020-04-04 13:44:49 +00:00
|
|
|
|
extension = 'json'
|
2019-07-13 19:28:14 +00:00
|
|
|
|
else:
|
2020-04-04 13:44:49 +00:00
|
|
|
|
extension = 'replies'
|
2019-11-18 14:42:18 +00:00
|
|
|
|
|
2019-07-11 19:31:02 +00:00
|
|
|
|
# if this post in the shared inbox?
|
2021-12-28 12:15:46 +00:00
|
|
|
|
post_url = remove_id_ending(post_url.strip()).replace('/', '#')
|
2019-11-18 14:42:18 +00:00
|
|
|
|
|
2020-05-18 10:19:31 +00:00
|
|
|
|
# add the extension
|
2021-12-28 12:15:46 +00:00
|
|
|
|
post_url = post_url + '.' + extension
|
2020-05-18 10:19:31 +00:00
|
|
|
|
|
2020-05-18 10:14:29 +00:00
|
|
|
|
# search boxes
|
2021-07-01 21:30:36 +00:00
|
|
|
|
boxes = ('inbox', 'outbox', 'tlblogs')
|
2021-12-28 10:17:58 +00:00
|
|
|
|
account_dir = acct_dir(base_dir, nickname, domain) + '/'
|
2021-12-28 12:15:46 +00:00
|
|
|
|
for box_name in boxes:
|
|
|
|
|
post_filename = account_dir + box_name + '/' + post_url
|
2021-12-26 23:41:34 +00:00
|
|
|
|
if os.path.isfile(post_filename):
|
|
|
|
|
return post_filename
|
2020-10-08 13:07:17 +00:00
|
|
|
|
|
|
|
|
|
# check news posts
|
2024-05-12 12:35:26 +00:00
|
|
|
|
account_dir = data_dir(base_dir) + '/news' + '@' + domain + '/'
|
2021-12-28 12:15:46 +00:00
|
|
|
|
post_filename = account_dir + 'outbox/' + post_url
|
2021-12-26 23:41:34 +00:00
|
|
|
|
if os.path.isfile(post_filename):
|
|
|
|
|
return post_filename
|
2020-05-18 10:14:29 +00:00
|
|
|
|
|
|
|
|
|
# is it in the announce cache?
|
2021-12-28 12:15:46 +00:00
|
|
|
|
post_filename = base_dir + '/cache/announce/' + nickname + '/' + post_url
|
2021-12-26 23:41:34 +00:00
|
|
|
|
if os.path.isfile(post_filename):
|
|
|
|
|
return post_filename
|
2020-05-18 10:14:29 +00:00
|
|
|
|
|
2021-12-28 12:15:46 +00:00
|
|
|
|
# print('WARN: unable to locate ' + nickname + ' ' + post_url)
|
2019-11-18 14:42:18 +00:00
|
|
|
|
return None
|
2019-07-14 16:37:01 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2021-12-28 10:25:50 +00:00
|
|
|
|
def _get_published_date(post_json_object: {}) -> str:
|
2021-09-08 18:37:04 +00:00
|
|
|
|
"""Returns the published date on the given post
|
|
|
|
|
"""
|
|
|
|
|
published = None
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object.get('published'):
|
|
|
|
|
published = post_json_object['published']
|
2021-12-26 10:57:03 +00:00
|
|
|
|
elif has_object_dict(post_json_object):
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object['object'].get('published'):
|
|
|
|
|
published = post_json_object['object']['published']
|
2021-09-08 18:37:04 +00:00
|
|
|
|
if not published:
|
|
|
|
|
return None
|
|
|
|
|
if not isinstance(published, str):
|
|
|
|
|
return None
|
|
|
|
|
return published
|
|
|
|
|
|
|
|
|
|
|
2021-12-28 10:25:50 +00:00
|
|
|
|
def get_reply_interval_hours(base_dir: str, nickname: str, domain: str,
|
|
|
|
|
default_reply_interval_hrs: int) -> int:
|
2021-09-08 18:37:04 +00:00
|
|
|
|
"""Returns the reply interval for the given account.
|
|
|
|
|
The reply interval is the number of hours after a post being made
|
|
|
|
|
during which replies are allowed
|
|
|
|
|
"""
|
2021-12-28 10:25:50 +00:00
|
|
|
|
reply_interval_filename = \
|
2021-12-28 12:15:46 +00:00
|
|
|
|
acct_dir(base_dir, nickname, domain) + '/.reply_interval_hours'
|
2021-12-28 10:25:50 +00:00
|
|
|
|
if os.path.isfile(reply_interval_filename):
|
2024-07-14 10:56:31 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(reply_interval_filename, 'r',
|
|
|
|
|
encoding='utf-8') as fp_interval:
|
|
|
|
|
hours_str = fp_interval.read()
|
|
|
|
|
if hours_str.isdigit():
|
|
|
|
|
return int(hours_str)
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: get_reply_interval_hours unable to read ' +
|
|
|
|
|
reply_interval_filename)
|
2021-12-25 17:31:22 +00:00
|
|
|
|
return default_reply_interval_hrs
|
2021-09-08 18:37:04 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-28 12:15:46 +00:00
|
|
|
|
def set_reply_interval_hours(base_dir: str, nickname: str, domain: str,
|
|
|
|
|
reply_interval_hours: int) -> bool:
|
2021-09-08 19:30:49 +00:00
|
|
|
|
"""Sets the reply interval for the given account.
|
|
|
|
|
The reply interval is the number of hours after a post being made
|
|
|
|
|
during which replies are allowed
|
|
|
|
|
"""
|
2021-12-28 10:25:50 +00:00
|
|
|
|
reply_interval_filename = \
|
2021-12-28 12:15:46 +00:00
|
|
|
|
acct_dir(base_dir, nickname, domain) + '/.reply_interval_hours'
|
|
|
|
|
try:
|
2022-06-09 14:46:30 +00:00
|
|
|
|
with open(reply_interval_filename, 'w+',
|
2024-07-14 11:09:24 +00:00
|
|
|
|
encoding='utf-8') as fp_interval:
|
|
|
|
|
fp_interval.write(str(reply_interval_hours))
|
2021-09-08 19:30:49 +00:00
|
|
|
|
return True
|
2021-12-28 12:15:46 +00:00
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: set_reply_interval_hours unable to save reply interval ' +
|
|
|
|
|
str(reply_interval_filename) + ' ' +
|
|
|
|
|
str(reply_interval_hours))
|
2021-09-08 19:30:49 +00:00
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2021-12-28 12:15:46 +00:00
|
|
|
|
def can_reply_to(base_dir: str, nickname: str, domain: str,
|
|
|
|
|
post_url: str, reply_interval_hours: int,
|
|
|
|
|
curr_date_str: str = None,
|
|
|
|
|
post_json_object: {} = None) -> bool:
|
2023-08-03 17:31:47 +00:00
|
|
|
|
"""Is replying to the given local post permitted?
|
2021-09-08 18:37:04 +00:00
|
|
|
|
This is a spam mitigation feature, so that spammers can't
|
|
|
|
|
add a lot of replies to old post which you don't notice.
|
|
|
|
|
"""
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if '/statuses/' not in post_url:
|
2021-09-10 11:12:52 +00:00
|
|
|
|
return True
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object:
|
2021-12-28 12:15:46 +00:00
|
|
|
|
post_filename = locate_post(base_dir, nickname, domain, post_url)
|
2021-12-26 23:41:34 +00:00
|
|
|
|
if not post_filename:
|
2023-02-16 14:42:56 +00:00
|
|
|
|
# the post is not stored locally
|
|
|
|
|
return True
|
2021-12-26 23:41:34 +00:00
|
|
|
|
post_json_object = load_json(post_filename)
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object:
|
2021-09-08 18:37:04 +00:00
|
|
|
|
return False
|
2021-12-28 10:25:50 +00:00
|
|
|
|
published = _get_published_date(post_json_object)
|
2021-09-08 18:37:04 +00:00
|
|
|
|
if not published:
|
|
|
|
|
return False
|
2023-11-20 22:27:58 +00:00
|
|
|
|
|
|
|
|
|
pub_date = date_from_string_format(published, ['%Y-%m-%dT%H:%M:%S%z'])
|
|
|
|
|
if not pub_date:
|
2021-12-28 12:15:46 +00:00
|
|
|
|
print('EX: can_reply_to unrecognized published date ' + str(published))
|
2021-09-08 18:37:04 +00:00
|
|
|
|
return False
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if not curr_date_str:
|
2023-11-20 22:27:58 +00:00
|
|
|
|
curr_date = date_utcnow()
|
2021-09-08 18:37:04 +00:00
|
|
|
|
else:
|
2023-11-20 22:27:58 +00:00
|
|
|
|
curr_date = \
|
|
|
|
|
date_from_string_format(curr_date_str, ['%Y-%m-%dT%H:%M:%S%z'])
|
|
|
|
|
if not curr_date:
|
2021-12-28 12:15:46 +00:00
|
|
|
|
print('EX: can_reply_to unrecognized current date ' +
|
|
|
|
|
str(curr_date_str))
|
2021-09-08 18:37:04 +00:00
|
|
|
|
return False
|
2021-12-28 12:15:46 +00:00
|
|
|
|
hours_since_publication = \
|
|
|
|
|
int((curr_date - pub_date).total_seconds() / 3600)
|
|
|
|
|
if hours_since_publication < 0 or \
|
|
|
|
|
hours_since_publication >= reply_interval_hours:
|
2021-09-08 18:37:04 +00:00
|
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
2021-12-28 13:49:44 +00:00
|
|
|
|
def _remove_attachment(base_dir: str, http_prefix: str, domain: str,
|
2024-09-13 15:11:02 +00:00
|
|
|
|
post_json: {}) -> None:
|
2023-02-18 22:32:50 +00:00
|
|
|
|
"""Removes media files for an attachment
|
|
|
|
|
"""
|
2024-03-29 14:52:14 +00:00
|
|
|
|
post_attachments = get_post_attachments(post_json)
|
|
|
|
|
if not post_attachments:
|
2019-07-14 16:57:06 +00:00
|
|
|
|
return
|
2024-03-29 14:52:14 +00:00
|
|
|
|
if not post_attachments[0].get('url'):
|
2019-07-14 16:57:06 +00:00
|
|
|
|
return
|
2024-03-29 14:52:14 +00:00
|
|
|
|
attachment_url = get_url_from_post(post_attachments[0]['url'])
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if not attachment_url:
|
2019-07-14 16:57:06 +00:00
|
|
|
|
return
|
2023-07-12 11:08:02 +00:00
|
|
|
|
attachment_url = remove_html(attachment_url)
|
2021-12-28 12:15:46 +00:00
|
|
|
|
media_filename = base_dir + '/' + \
|
|
|
|
|
attachment_url.replace(http_prefix + '://' + domain + '/', '')
|
|
|
|
|
if os.path.isfile(media_filename):
|
2021-09-05 10:17:43 +00:00
|
|
|
|
try:
|
2021-12-28 12:15:46 +00:00
|
|
|
|
os.remove(media_filename)
|
2021-11-25 18:42:38 +00:00
|
|
|
|
except OSError:
|
2021-12-28 13:49:44 +00:00
|
|
|
|
print('EX: _remove_attachment unable to delete media file ' +
|
2021-12-28 12:15:46 +00:00
|
|
|
|
str(media_filename))
|
2023-02-18 22:32:50 +00:00
|
|
|
|
if os.path.isfile(media_filename + '.vtt'):
|
|
|
|
|
try:
|
|
|
|
|
os.remove(media_filename + '.vtt')
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: _remove_attachment unable to delete media transcript ' +
|
|
|
|
|
str(media_filename) + '.vtt')
|
2021-12-28 12:15:46 +00:00
|
|
|
|
etag_filename = media_filename + '.etag'
|
|
|
|
|
if os.path.isfile(etag_filename):
|
2021-09-05 10:17:43 +00:00
|
|
|
|
try:
|
2021-12-28 12:15:46 +00:00
|
|
|
|
os.remove(etag_filename)
|
2021-11-25 18:42:38 +00:00
|
|
|
|
except OSError:
|
2021-12-28 13:49:44 +00:00
|
|
|
|
print('EX: _remove_attachment unable to delete etag file ' +
|
2021-12-28 12:15:46 +00:00
|
|
|
|
str(etag_filename))
|
2024-12-23 18:23:47 +00:00
|
|
|
|
post_json['attachment']: list[dict] = []
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2019-07-14 16:57:06 +00:00
|
|
|
|
|
2024-10-23 12:20:07 +00:00
|
|
|
|
def remove_post_from_index(post_url: str, debug: bool,
|
|
|
|
|
index_file: str) -> None:
|
|
|
|
|
"""Removes a url from a box index
|
2019-08-12 18:02:29 +00:00
|
|
|
|
"""
|
2024-10-23 12:20:07 +00:00
|
|
|
|
if not os.path.isfile(index_file):
|
2019-08-12 18:02:29 +00:00
|
|
|
|
return
|
2021-12-28 12:15:46 +00:00
|
|
|
|
post_id = remove_id_ending(post_url)
|
2024-10-23 12:20:07 +00:00
|
|
|
|
if not text_in_file(post_id, index_file):
|
|
|
|
|
return
|
2024-12-23 18:23:47 +00:00
|
|
|
|
lines: list[str] = []
|
2024-10-23 12:20:07 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(index_file, 'r', encoding='utf-8') as fp_mod1:
|
|
|
|
|
lines = fp_mod1.readlines()
|
|
|
|
|
except OSError as exc:
|
|
|
|
|
print('EX: remove_post_from_index unable to read ' +
|
|
|
|
|
index_file + ' ' + str(exc))
|
|
|
|
|
|
|
|
|
|
if lines:
|
2024-02-01 13:30:59 +00:00
|
|
|
|
try:
|
2024-10-23 12:20:07 +00:00
|
|
|
|
with open(index_file, 'w+',
|
|
|
|
|
encoding='utf-8') as fp_mod2:
|
|
|
|
|
for line in lines:
|
|
|
|
|
if line.strip("\n").strip("\r") != post_id:
|
|
|
|
|
fp_mod2.write(line)
|
|
|
|
|
continue
|
|
|
|
|
if debug:
|
|
|
|
|
print('DEBUG: removed ' + post_id +
|
|
|
|
|
' from index ' + index_file)
|
2024-07-18 12:55:47 +00:00
|
|
|
|
except OSError as exc:
|
2024-10-23 12:20:07 +00:00
|
|
|
|
print('EX: ' +
|
|
|
|
|
'remove_post_from_index unable to write ' +
|
|
|
|
|
index_file + ' ' + str(exc))
|
2024-07-18 12:55:47 +00:00
|
|
|
|
|
2024-10-23 12:20:07 +00:00
|
|
|
|
|
|
|
|
|
def remove_moderation_post_from_index(base_dir: str, post_url: str,
|
|
|
|
|
debug: bool) -> None:
|
|
|
|
|
"""Removes a url from the moderation index
|
|
|
|
|
"""
|
|
|
|
|
moderation_index_file = data_dir(base_dir) + '/moderation.txt'
|
|
|
|
|
remove_post_from_index(post_url, debug, moderation_index_file)
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2019-08-12 18:02:29 +00:00
|
|
|
|
|
2021-12-26 19:36:40 +00:00
|
|
|
|
def _is_reply_to_blog_post(base_dir: str, nickname: str, domain: str,
|
2024-09-13 15:11:02 +00:00
|
|
|
|
post_json_object: str) -> bool:
|
2020-05-18 15:59:11 +00:00
|
|
|
|
"""Is the given post a reply to a blog post?
|
|
|
|
|
"""
|
2021-12-26 10:57:03 +00:00
|
|
|
|
if not has_object_dict(post_json_object):
|
2020-05-18 15:59:11 +00:00
|
|
|
|
return False
|
2023-12-24 23:42:38 +00:00
|
|
|
|
reply_id = get_reply_to(post_json_object['object'])
|
|
|
|
|
if not reply_id:
|
2020-05-18 15:59:11 +00:00
|
|
|
|
return False
|
2023-12-24 23:42:38 +00:00
|
|
|
|
if not isinstance(reply_id, str):
|
2020-08-28 14:45:07 +00:00
|
|
|
|
return False
|
2021-12-26 19:36:40 +00:00
|
|
|
|
blogs_index_filename = \
|
2021-12-26 12:02:29 +00:00
|
|
|
|
acct_dir(base_dir, nickname, domain) + '/tlblogs.index'
|
2021-12-26 19:36:40 +00:00
|
|
|
|
if not os.path.isfile(blogs_index_filename):
|
2020-05-18 15:59:11 +00:00
|
|
|
|
return False
|
2023-12-24 23:42:38 +00:00
|
|
|
|
post_id = remove_id_ending(reply_id)
|
2021-12-26 19:36:40 +00:00
|
|
|
|
post_id = post_id.replace('/', '#')
|
2022-06-10 09:24:11 +00:00
|
|
|
|
if text_in_file(post_id, blogs_index_filename):
|
2020-05-18 15:59:11 +00:00
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2021-12-28 14:55:45 +00:00
|
|
|
|
def _delete_post_remove_replies(base_dir: str, nickname: str, domain: str,
|
|
|
|
|
http_prefix: str, post_filename: str,
|
2022-05-01 17:14:29 +00:00
|
|
|
|
recent_posts_cache: {}, debug: bool,
|
|
|
|
|
manual: bool) -> None:
|
2021-07-05 09:24:29 +00:00
|
|
|
|
"""Removes replies when deleting a post
|
|
|
|
|
"""
|
2021-12-28 12:15:46 +00:00
|
|
|
|
replies_filename = post_filename.replace('.json', '.replies')
|
|
|
|
|
if not os.path.isfile(replies_filename):
|
2021-07-05 09:24:29 +00:00
|
|
|
|
return
|
|
|
|
|
if debug:
|
2021-12-26 23:41:34 +00:00
|
|
|
|
print('DEBUG: removing replies to ' + post_filename)
|
2024-07-14 10:56:31 +00:00
|
|
|
|
try:
|
2024-07-14 11:09:24 +00:00
|
|
|
|
with open(replies_filename, 'r', encoding='utf-8') as fp_replies:
|
|
|
|
|
for reply_id in fp_replies:
|
2024-07-14 10:56:31 +00:00
|
|
|
|
reply_file = locate_post(base_dir, nickname, domain, reply_id)
|
|
|
|
|
if not reply_file:
|
|
|
|
|
continue
|
2024-08-04 19:29:10 +00:00
|
|
|
|
if not os.path.isfile(reply_file):
|
|
|
|
|
continue
|
|
|
|
|
delete_post(base_dir, http_prefix,
|
|
|
|
|
nickname, domain, reply_file, debug,
|
|
|
|
|
recent_posts_cache, manual)
|
2024-07-14 10:56:31 +00:00
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: _delete_post_remove_replies unable to read ' +
|
|
|
|
|
replies_filename)
|
2021-07-05 09:24:29 +00:00
|
|
|
|
# remove the replies file
|
2021-09-05 10:17:43 +00:00
|
|
|
|
try:
|
2021-12-28 12:15:46 +00:00
|
|
|
|
os.remove(replies_filename)
|
2021-11-25 18:42:38 +00:00
|
|
|
|
except OSError:
|
2021-12-28 14:55:45 +00:00
|
|
|
|
print('EX: _delete_post_remove_replies ' +
|
|
|
|
|
'unable to delete replies file ' + str(replies_filename))
|
2021-07-05 09:24:29 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-28 13:12:10 +00:00
|
|
|
|
def _is_bookmarked(base_dir: str, nickname: str, domain: str,
|
|
|
|
|
post_filename: str) -> bool:
|
2021-07-05 09:24:29 +00:00
|
|
|
|
"""Returns True if the given post is bookmarked
|
|
|
|
|
"""
|
2021-12-28 12:15:46 +00:00
|
|
|
|
bookmarks_index_filename = \
|
2021-12-26 12:02:29 +00:00
|
|
|
|
acct_dir(base_dir, nickname, domain) + '/bookmarks.index'
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if os.path.isfile(bookmarks_index_filename):
|
|
|
|
|
bookmark_index = post_filename.split('/')[-1] + '\n'
|
2022-06-10 09:24:11 +00:00
|
|
|
|
if text_in_file(bookmark_index, bookmarks_index_filename):
|
2021-07-05 09:24:29 +00:00
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2021-12-27 11:05:24 +00:00
|
|
|
|
def remove_post_from_cache(post_json_object: {},
|
|
|
|
|
recent_posts_cache: {}) -> None:
|
2021-07-05 10:09:11 +00:00
|
|
|
|
""" if the post exists in the recent posts cache then remove it
|
2021-07-05 09:45:55 +00:00
|
|
|
|
"""
|
2021-12-26 20:01:37 +00:00
|
|
|
|
if not recent_posts_cache:
|
2021-07-05 09:45:55 +00:00
|
|
|
|
return
|
|
|
|
|
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object.get('id'):
|
2021-07-05 10:09:11 +00:00
|
|
|
|
return
|
|
|
|
|
|
2021-12-26 20:01:37 +00:00
|
|
|
|
if not recent_posts_cache.get('index'):
|
2021-07-05 10:09:11 +00:00
|
|
|
|
return
|
|
|
|
|
|
2021-12-26 19:47:06 +00:00
|
|
|
|
post_id = post_json_object['id']
|
|
|
|
|
if '#' in post_id:
|
|
|
|
|
post_id = post_id.split('#', 1)[0]
|
2021-12-27 11:20:57 +00:00
|
|
|
|
post_id = remove_id_ending(post_id).replace('/', '#')
|
2021-12-26 20:01:37 +00:00
|
|
|
|
if post_id not in recent_posts_cache['index']:
|
2021-07-05 10:09:11 +00:00
|
|
|
|
return
|
2021-07-05 09:45:55 +00:00
|
|
|
|
|
2021-12-26 20:01:37 +00:00
|
|
|
|
if recent_posts_cache.get('index'):
|
|
|
|
|
if post_id in recent_posts_cache['index']:
|
|
|
|
|
recent_posts_cache['index'].remove(post_id)
|
2021-07-05 09:45:55 +00:00
|
|
|
|
|
2021-12-26 20:01:37 +00:00
|
|
|
|
if recent_posts_cache.get('json'):
|
|
|
|
|
if recent_posts_cache['json'].get(post_id):
|
|
|
|
|
del recent_posts_cache['json'][post_id]
|
2021-07-05 09:45:55 +00:00
|
|
|
|
|
2021-12-26 20:01:37 +00:00
|
|
|
|
if recent_posts_cache.get('html'):
|
|
|
|
|
if recent_posts_cache['html'].get(post_id):
|
|
|
|
|
del recent_posts_cache['html'][post_id]
|
2021-07-05 09:45:55 +00:00
|
|
|
|
|
|
|
|
|
|
2022-04-09 15:11:22 +00:00
|
|
|
|
def delete_cached_html(base_dir: str, nickname: str, domain: str,
|
2024-09-13 15:11:02 +00:00
|
|
|
|
post_json_object: {}) -> None:
|
2021-07-05 09:45:55 +00:00
|
|
|
|
"""Removes cached html file for the given post
|
|
|
|
|
"""
|
2021-12-27 11:05:24 +00:00
|
|
|
|
cached_post_filename = \
|
2021-12-26 23:41:34 +00:00
|
|
|
|
get_cached_post_filename(base_dir, nickname, domain, post_json_object)
|
2021-12-27 11:05:24 +00:00
|
|
|
|
if cached_post_filename:
|
|
|
|
|
if os.path.isfile(cached_post_filename):
|
2021-09-05 10:17:43 +00:00
|
|
|
|
try:
|
2021-12-27 11:05:24 +00:00
|
|
|
|
os.remove(cached_post_filename)
|
2021-11-25 18:42:38 +00:00
|
|
|
|
except OSError:
|
2022-04-09 15:11:22 +00:00
|
|
|
|
print('EX: delete_cached_html ' +
|
2021-10-29 18:48:15 +00:00
|
|
|
|
'unable to delete cached post file ' +
|
2021-12-27 11:05:24 +00:00
|
|
|
|
str(cached_post_filename))
|
2022-05-12 11:56:45 +00:00
|
|
|
|
|
2022-05-12 10:13:55 +00:00
|
|
|
|
cached_post_filename = cached_post_filename.replace('.html', '.ssml')
|
|
|
|
|
if os.path.isfile(cached_post_filename):
|
|
|
|
|
try:
|
|
|
|
|
os.remove(cached_post_filename)
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: delete_cached_html ' +
|
|
|
|
|
'unable to delete cached ssml post file ' +
|
|
|
|
|
str(cached_post_filename))
|
2021-07-05 09:45:55 +00:00
|
|
|
|
|
2022-05-12 11:56:45 +00:00
|
|
|
|
cached_post_filename = \
|
|
|
|
|
cached_post_filename.replace('/postcache/', '/outbox/')
|
|
|
|
|
if os.path.isfile(cached_post_filename):
|
|
|
|
|
try:
|
|
|
|
|
os.remove(cached_post_filename)
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: delete_cached_html ' +
|
|
|
|
|
'unable to delete cached outbox ssml post file ' +
|
|
|
|
|
str(cached_post_filename))
|
|
|
|
|
|
2021-07-05 09:45:55 +00:00
|
|
|
|
|
2022-08-22 10:58:55 +00:00
|
|
|
|
def _remove_post_id_from_tag_index(tag_index_filename: str,
|
|
|
|
|
post_id: str) -> None:
|
|
|
|
|
"""Remove post_id from the tag index file
|
|
|
|
|
"""
|
|
|
|
|
lines = None
|
2024-07-14 10:56:31 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(tag_index_filename, 'r', encoding='utf-8') as fp_index:
|
|
|
|
|
lines = fp_index.readlines()
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: _remove_post_id_from_tag_index unable to read ' +
|
|
|
|
|
tag_index_filename)
|
2022-08-22 10:58:55 +00:00
|
|
|
|
if not lines:
|
|
|
|
|
return
|
|
|
|
|
newlines = ''
|
|
|
|
|
for file_line in lines:
|
|
|
|
|
if post_id in file_line:
|
|
|
|
|
# skip over the deleted post
|
|
|
|
|
continue
|
|
|
|
|
newlines += file_line
|
|
|
|
|
if not newlines.strip():
|
|
|
|
|
# if there are no lines then remove the hashtag file
|
|
|
|
|
try:
|
|
|
|
|
os.remove(tag_index_filename)
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: _delete_hashtags_on_post ' +
|
|
|
|
|
'unable to delete tag index ' + str(tag_index_filename))
|
|
|
|
|
else:
|
|
|
|
|
# write the new hashtag index without the given post in it
|
2024-02-01 13:30:59 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(tag_index_filename, 'w+',
|
2024-07-14 11:09:24 +00:00
|
|
|
|
encoding='utf-8') as fp_index:
|
|
|
|
|
fp_index.write(newlines)
|
2024-02-01 13:30:59 +00:00
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: _remove_post_id_from_tag_index unable to write ' +
|
|
|
|
|
tag_index_filename)
|
2022-08-22 10:58:55 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-28 14:55:45 +00:00
|
|
|
|
def _delete_hashtags_on_post(base_dir: str, post_json_object: {}) -> None:
|
2021-07-05 09:45:55 +00:00
|
|
|
|
"""Removes hashtags when a post is deleted
|
|
|
|
|
"""
|
2021-12-28 12:15:46 +00:00
|
|
|
|
remove_hashtag_index = False
|
2021-12-26 10:57:03 +00:00
|
|
|
|
if has_object_dict(post_json_object):
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object['object'].get('content'):
|
|
|
|
|
if '#' in post_json_object['object']['content']:
|
2021-12-28 12:15:46 +00:00
|
|
|
|
remove_hashtag_index = True
|
2021-07-05 09:45:55 +00:00
|
|
|
|
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if not remove_hashtag_index:
|
2021-07-05 09:45:55 +00:00
|
|
|
|
return
|
|
|
|
|
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object['object'].get('id') or \
|
|
|
|
|
not post_json_object['object'].get('tag'):
|
2021-07-05 09:45:55 +00:00
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# get the id of the post
|
2021-12-27 11:20:57 +00:00
|
|
|
|
post_id = remove_id_ending(post_json_object['object']['id'])
|
2021-12-25 22:09:19 +00:00
|
|
|
|
for tag in post_json_object['object']['tag']:
|
2021-10-30 19:26:52 +00:00
|
|
|
|
if not tag.get('type'):
|
|
|
|
|
continue
|
2021-07-05 09:45:55 +00:00
|
|
|
|
if tag['type'] != 'Hashtag':
|
|
|
|
|
continue
|
|
|
|
|
if not tag.get('name'):
|
|
|
|
|
continue
|
|
|
|
|
# find the index file for this tag
|
2022-08-22 10:58:55 +00:00
|
|
|
|
tag_map_filename = base_dir + '/tagmaps/' + tag['name'][1:] + '.txt'
|
|
|
|
|
if os.path.isfile(tag_map_filename):
|
|
|
|
|
_remove_post_id_from_tag_index(tag_map_filename, post_id)
|
|
|
|
|
# find the index file for this tag
|
2021-12-28 12:15:46 +00:00
|
|
|
|
tag_index_filename = base_dir + '/tags/' + tag['name'][1:] + '.txt'
|
2022-08-22 10:58:55 +00:00
|
|
|
|
if os.path.isfile(tag_index_filename):
|
|
|
|
|
_remove_post_id_from_tag_index(tag_index_filename, post_id)
|
2021-07-05 09:45:55 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
|
def _delete_conversation_post(base_dir: str, nickname: str, domain: str,
|
|
|
|
|
post_json_object: {}) -> None:
|
2021-08-12 10:22:04 +00:00
|
|
|
|
"""Deletes a post from a conversation
|
|
|
|
|
"""
|
2021-12-26 10:57:03 +00:00
|
|
|
|
if not has_object_dict(post_json_object):
|
2021-08-12 10:22:04 +00:00
|
|
|
|
return False
|
2024-10-06 09:41:21 +00:00
|
|
|
|
# Due to lack of AP specification maintenance, a conversation can also be
|
|
|
|
|
# referred to as a thread or (confusingly) "context"
|
2023-01-09 11:38:05 +00:00
|
|
|
|
if not post_json_object['object'].get('conversation') and \
|
2024-10-06 09:41:21 +00:00
|
|
|
|
not post_json_object['object'].get('thread') and \
|
2023-01-09 11:38:05 +00:00
|
|
|
|
not post_json_object['object'].get('context'):
|
2021-08-12 10:22:04 +00:00
|
|
|
|
return False
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object['object'].get('id'):
|
2021-08-12 10:22:04 +00:00
|
|
|
|
return False
|
2021-12-28 12:15:46 +00:00
|
|
|
|
conversation_dir = \
|
|
|
|
|
acct_dir(base_dir, nickname, domain) + '/conversation'
|
2023-01-09 11:38:05 +00:00
|
|
|
|
if post_json_object['object'].get('conversation'):
|
|
|
|
|
conversation_id = post_json_object['object']['conversation']
|
2024-10-06 16:22:13 +00:00
|
|
|
|
elif post_json_object['object'].get('context'):
|
2023-01-09 11:38:05 +00:00
|
|
|
|
conversation_id = post_json_object['object']['context']
|
2024-10-06 16:22:13 +00:00
|
|
|
|
else:
|
|
|
|
|
conversation_id = post_json_object['object']['thread']
|
2024-10-06 09:41:21 +00:00
|
|
|
|
if not isinstance(conversation_id, str):
|
|
|
|
|
return False
|
2021-12-28 12:15:46 +00:00
|
|
|
|
conversation_id = conversation_id.replace('/', '#')
|
2021-12-26 19:47:06 +00:00
|
|
|
|
post_id = post_json_object['object']['id']
|
2021-12-28 12:15:46 +00:00
|
|
|
|
conversation_filename = conversation_dir + '/' + conversation_id
|
|
|
|
|
if not os.path.isfile(conversation_filename):
|
|
|
|
|
return False
|
|
|
|
|
conversation_str = ''
|
2024-07-14 10:56:31 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(conversation_filename, 'r', encoding='utf-8') as fp_conv:
|
|
|
|
|
conversation_str = fp_conv.read()
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: _delete_conversation_post unable to read ' +
|
|
|
|
|
conversation_filename)
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if post_id + '\n' not in conversation_str:
|
|
|
|
|
return False
|
|
|
|
|
conversation_str = conversation_str.replace(post_id + '\n', '')
|
|
|
|
|
if conversation_str:
|
2024-02-01 13:30:59 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(conversation_filename, 'w+',
|
2024-07-14 10:56:31 +00:00
|
|
|
|
encoding='utf-8') as fp_conv:
|
|
|
|
|
fp_conv.write(conversation_str)
|
2024-02-01 13:30:59 +00:00
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: _delete_conversation_post unable to write ' +
|
|
|
|
|
conversation_filename)
|
2021-08-12 10:22:04 +00:00
|
|
|
|
else:
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if os.path.isfile(conversation_filename + '.muted'):
|
2021-09-05 10:17:43 +00:00
|
|
|
|
try:
|
2021-12-28 12:15:46 +00:00
|
|
|
|
os.remove(conversation_filename + '.muted')
|
2021-11-25 18:42:38 +00:00
|
|
|
|
except OSError:
|
2021-12-29 21:55:09 +00:00
|
|
|
|
print('EX: _delete_conversation_post ' +
|
2021-10-29 18:48:15 +00:00
|
|
|
|
'unable to remove conversation ' +
|
2021-12-28 12:15:46 +00:00
|
|
|
|
str(conversation_filename) + '.muted')
|
2021-09-05 10:17:43 +00:00
|
|
|
|
try:
|
2021-12-28 12:15:46 +00:00
|
|
|
|
os.remove(conversation_filename)
|
2021-11-25 18:42:38 +00:00
|
|
|
|
except OSError:
|
2021-12-29 21:55:09 +00:00
|
|
|
|
print('EX: _delete_conversation_post ' +
|
2021-10-29 18:48:15 +00:00
|
|
|
|
'unable to remove conversation ' +
|
2021-12-28 12:15:46 +00:00
|
|
|
|
str(conversation_filename))
|
2021-08-12 10:22:04 +00:00
|
|
|
|
|
|
|
|
|
|
2022-05-01 13:23:32 +00:00
|
|
|
|
def is_dm(post_json_object: {}) -> bool:
|
|
|
|
|
"""Returns true if the given post is a DM
|
|
|
|
|
"""
|
|
|
|
|
if post_json_object['type'] != 'Create':
|
|
|
|
|
return False
|
|
|
|
|
if not has_object_dict(post_json_object):
|
|
|
|
|
return False
|
|
|
|
|
if post_json_object['object']['type'] != 'ChatMessage':
|
2024-01-09 10:41:19 +00:00
|
|
|
|
if post_json_object['object']['type'] not in ('Note', 'Event',
|
|
|
|
|
'Page', 'Patch',
|
|
|
|
|
'EncryptedMessage',
|
|
|
|
|
'Article'):
|
2022-05-01 13:23:32 +00:00
|
|
|
|
return False
|
|
|
|
|
if post_json_object['object'].get('moderationStatus'):
|
|
|
|
|
return False
|
|
|
|
|
fields = ('to', 'cc')
|
|
|
|
|
for field_name in fields:
|
|
|
|
|
if not post_json_object['object'].get(field_name):
|
|
|
|
|
continue
|
2023-09-25 18:09:30 +00:00
|
|
|
|
if isinstance(post_json_object['object'][field_name], list):
|
|
|
|
|
for to_address in post_json_object['object'][field_name]:
|
|
|
|
|
if to_address.endswith('#Public') or \
|
|
|
|
|
to_address == 'as:Public' or \
|
|
|
|
|
to_address == 'Public':
|
|
|
|
|
return False
|
|
|
|
|
if to_address.endswith('followers'):
|
|
|
|
|
return False
|
|
|
|
|
elif isinstance(post_json_object['object'][field_name], str):
|
|
|
|
|
if post_json_object['object'][field_name].endswith('#Public'):
|
2022-05-01 13:23:32 +00:00
|
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _is_remote_dm(domain_full: str, post_json_object: {}) -> bool:
|
|
|
|
|
"""Is the given post a DM from a different domain?
|
|
|
|
|
"""
|
|
|
|
|
if not is_dm(post_json_object):
|
|
|
|
|
return False
|
|
|
|
|
this_post_json = post_json_object
|
|
|
|
|
if has_object_dict(post_json_object):
|
|
|
|
|
this_post_json = post_json_object['object']
|
|
|
|
|
if this_post_json.get('attributedTo'):
|
2023-09-26 20:25:53 +00:00
|
|
|
|
attrib = get_attributed_to(this_post_json['attributedTo'])
|
|
|
|
|
if attrib:
|
|
|
|
|
if '://' + domain_full not in attrib:
|
2022-05-01 13:23:32 +00:00
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2021-12-28 14:55:45 +00:00
|
|
|
|
def delete_post(base_dir: str, http_prefix: str,
|
|
|
|
|
nickname: str, domain: str, post_filename: str,
|
2022-05-01 17:14:29 +00:00
|
|
|
|
debug: bool, recent_posts_cache: {},
|
|
|
|
|
manual: bool) -> None:
|
2019-07-14 16:37:01 +00:00
|
|
|
|
"""Recursively deletes a post and its replies and attachments
|
|
|
|
|
"""
|
2024-06-20 10:47:58 +00:00
|
|
|
|
post_json_object = load_json(post_filename)
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object:
|
2021-07-05 09:24:29 +00:00
|
|
|
|
# remove any replies
|
2021-12-28 14:55:45 +00:00
|
|
|
|
_delete_post_remove_replies(base_dir, nickname, domain,
|
|
|
|
|
http_prefix, post_filename,
|
2022-05-01 17:14:29 +00:00
|
|
|
|
recent_posts_cache, debug, manual)
|
2021-07-05 09:24:29 +00:00
|
|
|
|
# finally, remove the post itself
|
2021-09-05 10:17:43 +00:00
|
|
|
|
try:
|
2021-12-26 23:41:34 +00:00
|
|
|
|
os.remove(post_filename)
|
2021-11-25 18:42:38 +00:00
|
|
|
|
except OSError:
|
2021-10-29 14:33:52 +00:00
|
|
|
|
if debug:
|
2021-12-28 14:55:45 +00:00
|
|
|
|
print('EX: delete_post unable to delete post ' +
|
2021-12-26 23:41:34 +00:00
|
|
|
|
str(post_filename))
|
2021-07-05 09:24:29 +00:00
|
|
|
|
return
|
2020-06-24 13:30:50 +00:00
|
|
|
|
|
2022-05-01 13:23:32 +00:00
|
|
|
|
# don't allow DMs to be deleted if they came from a different instance
|
|
|
|
|
# otherwise this breaks expectations about how DMs should operate
|
2022-05-01 17:14:29 +00:00
|
|
|
|
# i.e. DMs should only be removed if they are manually deleted
|
|
|
|
|
if not manual:
|
|
|
|
|
if _is_remote_dm(domain, post_json_object):
|
|
|
|
|
return
|
2022-05-01 13:23:32 +00:00
|
|
|
|
|
2021-07-05 09:24:29 +00:00
|
|
|
|
# don't allow deletion of bookmarked posts
|
2021-12-28 13:12:10 +00:00
|
|
|
|
if _is_bookmarked(base_dir, nickname, domain, post_filename):
|
2021-07-05 09:24:29 +00:00
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# don't remove replies to blog posts
|
2021-12-26 19:36:40 +00:00
|
|
|
|
if _is_reply_to_blog_post(base_dir, nickname, domain,
|
|
|
|
|
post_json_object):
|
2021-07-05 09:24:29 +00:00
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# remove from recent posts cache in memory
|
2021-12-27 11:05:24 +00:00
|
|
|
|
remove_post_from_cache(post_json_object, recent_posts_cache)
|
2021-07-05 09:24:29 +00:00
|
|
|
|
|
2021-08-12 10:22:04 +00:00
|
|
|
|
# remove from conversation index
|
2021-12-29 21:55:09 +00:00
|
|
|
|
_delete_conversation_post(base_dir, nickname, domain, post_json_object)
|
2021-08-12 10:22:04 +00:00
|
|
|
|
|
2021-07-05 09:24:29 +00:00
|
|
|
|
# remove any attachment
|
2021-12-28 13:49:44 +00:00
|
|
|
|
_remove_attachment(base_dir, http_prefix, domain, post_json_object)
|
2021-07-05 09:24:29 +00:00
|
|
|
|
|
2022-04-09 16:02:18 +00:00
|
|
|
|
extensions = (
|
2024-07-16 14:51:04 +00:00
|
|
|
|
'votes', 'arrived', 'muted', 'tts', 'reject', 'mitm', 'edits', 'seen'
|
2022-04-09 16:02:18 +00:00
|
|
|
|
)
|
2021-07-05 09:24:29 +00:00
|
|
|
|
for ext in extensions:
|
2021-12-28 12:15:46 +00:00
|
|
|
|
ext_filename = post_filename + '.' + ext
|
|
|
|
|
if os.path.isfile(ext_filename):
|
2021-09-05 10:17:43 +00:00
|
|
|
|
try:
|
2021-12-28 12:15:46 +00:00
|
|
|
|
os.remove(ext_filename)
|
2021-11-25 18:42:38 +00:00
|
|
|
|
except OSError:
|
2021-12-28 14:55:45 +00:00
|
|
|
|
print('EX: delete_post unable to remove ext ' +
|
2021-12-28 12:15:46 +00:00
|
|
|
|
str(ext_filename))
|
2022-04-09 16:02:18 +00:00
|
|
|
|
elif post_filename.endswith('.json'):
|
|
|
|
|
ext_filename = post_filename.replace('.json', '') + '.' + ext
|
|
|
|
|
if os.path.isfile(ext_filename):
|
|
|
|
|
try:
|
|
|
|
|
os.remove(ext_filename)
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: delete_post unable to remove ext ' +
|
|
|
|
|
str(ext_filename))
|
2021-07-05 09:24:29 +00:00
|
|
|
|
|
|
|
|
|
# remove cached html version of the post
|
2022-04-09 15:11:22 +00:00
|
|
|
|
delete_cached_html(base_dir, nickname, domain, post_json_object)
|
2021-07-05 09:24:29 +00:00
|
|
|
|
|
2021-12-26 23:53:16 +00:00
|
|
|
|
has_object = False
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object.get('object'):
|
2021-12-26 23:53:16 +00:00
|
|
|
|
has_object = True
|
2021-07-05 09:24:29 +00:00
|
|
|
|
|
|
|
|
|
# remove from moderation index file
|
2021-12-26 23:53:16 +00:00
|
|
|
|
if has_object:
|
2021-12-26 10:57:03 +00:00
|
|
|
|
if has_object_dict(post_json_object):
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object['object'].get('moderationStatus'):
|
|
|
|
|
if post_json_object.get('id'):
|
2021-12-27 11:20:57 +00:00
|
|
|
|
post_id = remove_id_ending(post_json_object['id'])
|
2021-12-28 13:12:10 +00:00
|
|
|
|
remove_moderation_post_from_index(base_dir, post_id, debug)
|
2021-07-05 09:24:29 +00:00
|
|
|
|
|
|
|
|
|
# remove any hashtags index entries
|
2021-12-26 23:53:16 +00:00
|
|
|
|
if has_object:
|
2021-12-28 14:55:45 +00:00
|
|
|
|
_delete_hashtags_on_post(base_dir, post_json_object)
|
2019-08-09 11:39:53 +00:00
|
|
|
|
|
2019-07-14 17:02:41 +00:00
|
|
|
|
# remove any replies
|
2021-12-28 14:55:45 +00:00
|
|
|
|
_delete_post_remove_replies(base_dir, nickname, domain,
|
|
|
|
|
http_prefix, post_filename,
|
2022-05-01 17:14:29 +00:00
|
|
|
|
recent_posts_cache, debug, manual)
|
2019-07-14 17:02:41 +00:00
|
|
|
|
# finally, remove the post itself
|
2021-09-05 10:17:43 +00:00
|
|
|
|
try:
|
2021-12-26 23:41:34 +00:00
|
|
|
|
os.remove(post_filename)
|
2021-11-25 18:42:38 +00:00
|
|
|
|
except OSError:
|
2021-10-29 14:33:52 +00:00
|
|
|
|
if debug:
|
2021-12-28 14:55:45 +00:00
|
|
|
|
print('EX: delete_post unable to delete post ' +
|
|
|
|
|
str(post_filename))
|
2019-07-27 22:48:34 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2022-01-13 15:10:41 +00:00
|
|
|
|
def _is_valid_language(text: str) -> bool:
|
2021-02-09 14:41:32 +00:00
|
|
|
|
"""Returns true if the given text contains a valid
|
|
|
|
|
natural language string
|
|
|
|
|
"""
|
2021-12-28 12:15:46 +00:00
|
|
|
|
natural_languages = {
|
2021-02-09 14:41:32 +00:00
|
|
|
|
"Latin": [65, 866],
|
|
|
|
|
"Greek": [880, 1280],
|
|
|
|
|
"isArmenian": [1328, 1424],
|
|
|
|
|
"isHebrew": [1424, 1536],
|
|
|
|
|
"Arabic": [1536, 1792],
|
|
|
|
|
"Syriac": [1792, 1872],
|
|
|
|
|
"Thaan": [1920, 1984],
|
|
|
|
|
"Devanagari": [2304, 2432],
|
|
|
|
|
"Bengali": [2432, 2560],
|
|
|
|
|
"Gurmukhi": [2560, 2688],
|
|
|
|
|
"Gujarati": [2688, 2816],
|
|
|
|
|
"Oriya": [2816, 2944],
|
|
|
|
|
"Tamil": [2944, 3072],
|
|
|
|
|
"Telugu": [3072, 3200],
|
|
|
|
|
"Kannada": [3200, 3328],
|
|
|
|
|
"Malayalam": [3328, 3456],
|
|
|
|
|
"Sinhala": [3456, 3584],
|
|
|
|
|
"Thai": [3584, 3712],
|
|
|
|
|
"Lao": [3712, 3840],
|
|
|
|
|
"Tibetan": [3840, 4096],
|
|
|
|
|
"Myanmar": [4096, 4256],
|
|
|
|
|
"Georgian": [4256, 4352],
|
|
|
|
|
"HangulJamo": [4352, 4608],
|
|
|
|
|
"Cherokee": [5024, 5120],
|
|
|
|
|
"UCAS": [5120, 5760],
|
|
|
|
|
"Ogham": [5760, 5792],
|
|
|
|
|
"Runic": [5792, 5888],
|
|
|
|
|
"Khmer": [6016, 6144],
|
2022-03-02 10:10:43 +00:00
|
|
|
|
"Hangul Syllables": [44032, 55203],
|
|
|
|
|
"Hangul Jamo": [4352, 4607],
|
|
|
|
|
"Hangul Compatibility Jamo": [12592, 12687],
|
|
|
|
|
"Hangul Jamo Extended-A": [43360, 43391],
|
|
|
|
|
"Hangul Jamo Extended-B": [55216, 55295],
|
2022-03-04 18:57:43 +00:00
|
|
|
|
"Mongolian": [6144, 6320],
|
2022-03-04 19:08:54 +00:00
|
|
|
|
"Cyrillic": [1024, 1279],
|
|
|
|
|
"Cyrillic Supplement": [1280, 1327],
|
|
|
|
|
"Cyrillic Extended A": [11744, 11775],
|
|
|
|
|
"Cyrillic Extended B": [42560, 42655],
|
|
|
|
|
"Cyrillic Extended C": [7296, 7311],
|
|
|
|
|
"Phonetic Extensions": [7467, 7544],
|
|
|
|
|
"Combining Half Marks": [65070, 65071]
|
2021-02-09 14:41:32 +00:00
|
|
|
|
}
|
2022-05-30 20:47:23 +00:00
|
|
|
|
for _, lang_range in natural_languages.items():
|
2021-12-28 12:15:46 +00:00
|
|
|
|
ok_lang = True
|
|
|
|
|
for char in text:
|
2022-03-03 11:52:55 +00:00
|
|
|
|
if char.isdigit() or char == '_':
|
2021-02-09 14:41:32 +00:00
|
|
|
|
continue
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if ord(char) not in range(lang_range[0], lang_range[1]):
|
|
|
|
|
ok_lang = False
|
2021-02-09 14:41:32 +00:00
|
|
|
|
break
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if ok_lang:
|
2021-02-09 14:41:32 +00:00
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2021-12-28 14:55:45 +00:00
|
|
|
|
def _get_reserved_words() -> str:
|
2024-05-30 08:34:33 +00:00
|
|
|
|
"""Returns a list of reserved words which should not be
|
|
|
|
|
used for nicknames in order to avoid confusion
|
|
|
|
|
"""
|
2021-07-29 12:18:12 +00:00
|
|
|
|
return ('inbox', 'dm', 'outbox', 'following',
|
|
|
|
|
'public', 'followers', 'category',
|
2021-09-13 13:57:37 +00:00
|
|
|
|
'channel', 'calendar', 'video-channels',
|
2024-05-30 08:34:33 +00:00
|
|
|
|
'videos', 'tlreplies', 'tlmedia', 'tlblogs',
|
2021-07-29 12:18:12 +00:00
|
|
|
|
'tlblogs', 'tlfeatures',
|
|
|
|
|
'moderation', 'moderationaction',
|
|
|
|
|
'activity', 'undo', 'pinned',
|
2022-02-26 17:14:10 +00:00
|
|
|
|
'actor', 'Actor', 'instance.actor',
|
2021-07-29 12:18:12 +00:00
|
|
|
|
'reply', 'replies', 'question', 'like',
|
2024-07-26 09:47:41 +00:00
|
|
|
|
'likes', 'user', 'users', 'statuses',
|
|
|
|
|
'tags', 'author', 'accounts', 'headers', 'snac',
|
2021-07-29 12:18:12 +00:00
|
|
|
|
'channels', 'profile', 'u', 'c',
|
|
|
|
|
'updates', 'repeat', 'announce',
|
|
|
|
|
'shares', 'fonts', 'icons', 'avatars',
|
|
|
|
|
'welcome', 'helpimages',
|
|
|
|
|
'bookmark', 'bookmarks', 'tlbookmarks',
|
|
|
|
|
'ignores', 'linksmobile', 'newswiremobile',
|
|
|
|
|
'minimal', 'search', 'eventdelete',
|
2024-10-06 09:41:21 +00:00
|
|
|
|
'searchemoji', 'catalog', 'conversationId', 'thread',
|
2022-04-29 13:54:13 +00:00
|
|
|
|
'mention', 'http', 'https', 'ipfs', 'ipns',
|
2022-12-01 19:44:12 +00:00
|
|
|
|
'ontologies', 'data', 'postedit', 'moved',
|
2023-05-03 18:56:05 +00:00
|
|
|
|
'inactive', 'activitypub', 'actors',
|
2024-05-30 08:34:33 +00:00
|
|
|
|
'note', 'notes', 'offers', 'wanted', 'honk',
|
2024-06-26 19:08:07 +00:00
|
|
|
|
'button', 'post', 'item', 'comment',
|
2024-08-10 12:17:08 +00:00
|
|
|
|
'content', 'federation', 'elsewhere',
|
2024-08-31 10:31:11 +00:00
|
|
|
|
'article', 'activity')
|
2021-07-29 12:18:12 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-28 14:55:45 +00:00
|
|
|
|
def get_nickname_validation_pattern() -> str:
|
2021-07-29 12:18:12 +00:00
|
|
|
|
"""Returns a html text input validation pattern for nickname
|
|
|
|
|
"""
|
2021-12-28 14:55:45 +00:00
|
|
|
|
reserved_names = _get_reserved_words()
|
2021-07-29 12:18:12 +00:00
|
|
|
|
pattern = ''
|
2021-12-28 12:15:46 +00:00
|
|
|
|
for word in reserved_names:
|
2021-07-29 12:18:12 +00:00
|
|
|
|
if pattern:
|
2021-07-29 13:27:29 +00:00
|
|
|
|
pattern += '(?!.*\\b' + word + '\\b)'
|
2021-07-29 12:18:12 +00:00
|
|
|
|
else:
|
2021-07-29 13:27:29 +00:00
|
|
|
|
pattern = '^(?!.*\\b' + word + '\\b)'
|
2021-07-29 14:22:55 +00:00
|
|
|
|
return pattern + '.*${1,30}'
|
2021-07-29 12:18:12 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-28 14:55:45 +00:00
|
|
|
|
def _is_reserved_name(nickname: str) -> bool:
|
2021-02-09 14:41:32 +00:00
|
|
|
|
"""Is the given nickname reserved for some special function?
|
|
|
|
|
"""
|
2021-12-28 14:55:45 +00:00
|
|
|
|
reserved_names = _get_reserved_words()
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if nickname in reserved_names:
|
2021-02-09 14:41:32 +00:00
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2021-12-28 14:41:10 +00:00
|
|
|
|
def valid_nickname(domain: str, nickname: str) -> bool:
|
2021-02-09 14:41:32 +00:00
|
|
|
|
"""Is the given nickname valid?
|
|
|
|
|
"""
|
2021-07-29 14:24:29 +00:00
|
|
|
|
if len(nickname) == 0:
|
|
|
|
|
return False
|
|
|
|
|
if len(nickname) > 30:
|
|
|
|
|
return False
|
2022-01-13 15:10:41 +00:00
|
|
|
|
if not _is_valid_language(nickname):
|
2021-02-09 14:41:32 +00:00
|
|
|
|
return False
|
2021-12-28 12:15:46 +00:00
|
|
|
|
forbidden_chars = ('.', ' ', '/', '?', ':', ';', '@', '#', '!')
|
2021-12-28 13:12:10 +00:00
|
|
|
|
for char in forbidden_chars:
|
|
|
|
|
if char in nickname:
|
2021-02-09 14:41:32 +00:00
|
|
|
|
return False
|
|
|
|
|
# this should only apply for the shared inbox
|
|
|
|
|
if nickname == domain:
|
|
|
|
|
return False
|
2021-12-28 14:55:45 +00:00
|
|
|
|
if _is_reserved_name(nickname):
|
2019-07-27 22:48:34 +00:00
|
|
|
|
return False
|
|
|
|
|
return True
|
2019-08-08 11:24:26 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2021-12-28 14:41:10 +00:00
|
|
|
|
def no_of_accounts(base_dir: str) -> bool:
|
2019-08-08 11:24:26 +00:00
|
|
|
|
"""Returns the number of accounts on the system
|
|
|
|
|
"""
|
2021-12-28 12:15:46 +00:00
|
|
|
|
account_ctr = 0
|
2024-05-12 12:35:26 +00:00
|
|
|
|
dir_str = data_dir(base_dir)
|
|
|
|
|
for _, dirs, _ in os.walk(dir_str):
|
2019-08-08 11:24:26 +00:00
|
|
|
|
for account in dirs:
|
2021-12-26 18:46:43 +00:00
|
|
|
|
if is_account_dir(account):
|
2021-12-28 12:15:46 +00:00
|
|
|
|
account_ctr += 1
|
2020-12-13 22:13:45 +00:00
|
|
|
|
break
|
2021-12-28 12:15:46 +00:00
|
|
|
|
return account_ctr
|
2019-08-10 11:31:42 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2021-12-28 14:41:10 +00:00
|
|
|
|
def no_of_active_accounts_monthly(base_dir: str, months: int) -> bool:
|
2019-11-13 15:15:08 +00:00
|
|
|
|
"""Returns the number of accounts on the system this month
|
|
|
|
|
"""
|
2021-12-28 12:15:46 +00:00
|
|
|
|
account_ctr = 0
|
2021-12-26 13:17:46 +00:00
|
|
|
|
curr_time = int(time.time())
|
2021-12-28 12:15:46 +00:00
|
|
|
|
month_seconds = int(60*60*24*30*months)
|
2024-05-12 12:35:26 +00:00
|
|
|
|
dir_str = data_dir(base_dir)
|
|
|
|
|
for _, dirs, _ in os.walk(dir_str):
|
2019-11-13 15:15:08 +00:00
|
|
|
|
for account in dirs:
|
2021-12-26 18:46:43 +00:00
|
|
|
|
if not is_account_dir(account):
|
2021-06-25 09:51:54 +00:00
|
|
|
|
continue
|
2021-12-28 12:15:46 +00:00
|
|
|
|
last_used_filename = \
|
2024-05-12 12:35:26 +00:00
|
|
|
|
dir_str + '/' + account + '/.lastUsed'
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if not os.path.isfile(last_used_filename):
|
2021-06-25 09:51:54 +00:00
|
|
|
|
continue
|
2024-07-14 10:56:31 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(last_used_filename, 'r',
|
|
|
|
|
encoding='utf-8') as fp_last_used:
|
|
|
|
|
last_used = fp_last_used.read()
|
|
|
|
|
if last_used.isdigit():
|
|
|
|
|
time_diff = curr_time - int(last_used)
|
|
|
|
|
if time_diff < month_seconds:
|
|
|
|
|
account_ctr += 1
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: no_of_active_accounts_monthly unable to read ' +
|
|
|
|
|
last_used_filename)
|
2020-12-13 22:13:45 +00:00
|
|
|
|
break
|
2021-12-28 12:15:46 +00:00
|
|
|
|
return account_ctr
|
2019-11-13 15:15:08 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2024-02-19 18:48:55 +00:00
|
|
|
|
def copytree(src: str, dst: str, symlinks: str, ignore: bool):
|
2019-09-29 18:48:34 +00:00
|
|
|
|
"""Copy a directory
|
|
|
|
|
"""
|
|
|
|
|
for item in os.listdir(src):
|
2021-12-28 12:15:46 +00:00
|
|
|
|
s_dir = os.path.join(src, item)
|
|
|
|
|
d_dir = os.path.join(dst, item)
|
|
|
|
|
if os.path.isdir(s_dir):
|
|
|
|
|
shutil.copytree(s_dir, d_dir, symlinks, ignore)
|
2019-09-29 18:48:34 +00:00
|
|
|
|
else:
|
2021-12-28 12:15:46 +00:00
|
|
|
|
shutil.copy2(s_dir, d_dir)
|
2019-10-19 17:50:05 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2021-12-26 23:53:16 +00:00
|
|
|
|
def get_cached_post_directory(base_dir: str,
|
|
|
|
|
nickname: str, domain: str) -> str:
|
2019-10-19 17:50:05 +00:00
|
|
|
|
"""Returns the directory where the html post cache exists
|
|
|
|
|
"""
|
2021-12-26 23:53:16 +00:00
|
|
|
|
html_post_cache_dir = acct_dir(base_dir, nickname, domain) + '/postcache'
|
|
|
|
|
return html_post_cache_dir
|
2019-10-19 17:50:05 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2021-12-26 23:41:34 +00:00
|
|
|
|
def get_cached_post_filename(base_dir: str, nickname: str, domain: str,
|
|
|
|
|
post_json_object: {}) -> str:
|
2019-10-19 17:50:05 +00:00
|
|
|
|
"""Returns the html cache filename for the given post
|
|
|
|
|
"""
|
2021-12-28 12:15:46 +00:00
|
|
|
|
cached_post_dir = get_cached_post_directory(base_dir, nickname, domain)
|
|
|
|
|
if not os.path.isdir(cached_post_dir):
|
|
|
|
|
# print('ERROR: invalid html cache directory ' + cached_post_dir)
|
2019-11-29 23:04:37 +00:00
|
|
|
|
return None
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if '@' not in cached_post_dir:
|
|
|
|
|
# print('ERROR: invalid html cache directory ' + cached_post_dir)
|
2019-11-29 23:04:37 +00:00
|
|
|
|
return None
|
2021-12-28 12:15:46 +00:00
|
|
|
|
cached_post_id = remove_id_ending(post_json_object['id'])
|
|
|
|
|
cached_post_filename = \
|
|
|
|
|
cached_post_dir + '/' + cached_post_id.replace('/', '#')
|
2021-12-27 11:05:24 +00:00
|
|
|
|
return cached_post_filename + '.html'
|
2019-11-24 13:46:28 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2021-12-28 14:24:14 +00:00
|
|
|
|
def update_recent_posts_cache(recent_posts_cache: {}, max_recent_posts: int,
|
|
|
|
|
post_json_object: {}, html_str: str) -> None:
|
2019-11-24 17:40:31 +00:00
|
|
|
|
"""Store recent posts in memory so that they can be quickly recalled
|
|
|
|
|
"""
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object.get('id'):
|
2019-11-24 17:40:31 +00:00
|
|
|
|
return
|
2021-12-26 19:47:06 +00:00
|
|
|
|
post_id = post_json_object['id']
|
|
|
|
|
if '#' in post_id:
|
|
|
|
|
post_id = post_id.split('#', 1)[0]
|
2021-12-27 11:20:57 +00:00
|
|
|
|
post_id = remove_id_ending(post_id).replace('/', '#')
|
2021-12-26 20:01:37 +00:00
|
|
|
|
if recent_posts_cache.get('index'):
|
|
|
|
|
if post_id in recent_posts_cache['index']:
|
2019-11-24 17:40:31 +00:00
|
|
|
|
return
|
2021-12-26 20:01:37 +00:00
|
|
|
|
recent_posts_cache['index'].append(post_id)
|
2021-12-25 22:09:19 +00:00
|
|
|
|
post_json_object['muted'] = False
|
2021-12-26 20:01:37 +00:00
|
|
|
|
recent_posts_cache['json'][post_id] = json.dumps(post_json_object)
|
2021-12-28 14:24:14 +00:00
|
|
|
|
recent_posts_cache['html'][post_id] = html_str
|
2021-12-26 20:01:37 +00:00
|
|
|
|
|
|
|
|
|
while len(recent_posts_cache['html'].items()) > max_recent_posts:
|
|
|
|
|
post_id = recent_posts_cache['index'][0]
|
|
|
|
|
recent_posts_cache['index'].pop(0)
|
|
|
|
|
if recent_posts_cache['json'].get(post_id):
|
|
|
|
|
del recent_posts_cache['json'][post_id]
|
|
|
|
|
if recent_posts_cache['html'].get(post_id):
|
|
|
|
|
del recent_posts_cache['html'][post_id]
|
2019-11-24 17:40:31 +00:00
|
|
|
|
else:
|
2021-12-26 20:01:37 +00:00
|
|
|
|
recent_posts_cache['index'] = [post_id]
|
|
|
|
|
recent_posts_cache['json'] = {}
|
|
|
|
|
recent_posts_cache['html'] = {}
|
|
|
|
|
recent_posts_cache['json'][post_id] = json.dumps(post_json_object)
|
2021-12-28 14:24:14 +00:00
|
|
|
|
recent_posts_cache['html'][post_id] = html_str
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2020-02-21 10:19:02 +00:00
|
|
|
|
|
2021-12-28 14:01:37 +00:00
|
|
|
|
def file_last_modified(filename: str) -> str:
|
2020-02-21 10:19:02 +00:00
|
|
|
|
"""Returns the date when a file was last modified
|
|
|
|
|
"""
|
2021-12-28 12:15:46 +00:00
|
|
|
|
time_val = os.path.getmtime(filename)
|
2023-11-20 22:27:58 +00:00
|
|
|
|
modified_time = \
|
|
|
|
|
datetime.datetime.fromtimestamp(time_val, datetime.timezone.utc)
|
2021-12-28 12:15:46 +00:00
|
|
|
|
return modified_time.strftime("%Y-%m-%dT%H:%M:%SZ")
|
2020-02-22 16:00:27 +00:00
|
|
|
|
|
2020-04-04 13:44:49 +00:00
|
|
|
|
|
2022-07-12 19:03:30 +00:00
|
|
|
|
def get_css(base_dir: str, css_filename: str) -> str:
|
2020-10-29 12:48:58 +00:00
|
|
|
|
"""Retrieves the css for a given file, or from a cache
|
|
|
|
|
"""
|
|
|
|
|
# does the css file exist?
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if not os.path.isfile(css_filename):
|
2020-10-29 12:48:58 +00:00
|
|
|
|
return None
|
|
|
|
|
|
2024-07-14 10:56:31 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(css_filename, 'r', encoding='utf-8') as fp_css:
|
|
|
|
|
css = fp_css.read()
|
|
|
|
|
return css
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: get_css unable to read ' + css_filename)
|
2020-10-29 12:48:58 +00:00
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2021-12-28 13:07:02 +00:00
|
|
|
|
def _search_virtual_box_posts(base_dir: str, nickname: str, domain: str,
|
|
|
|
|
search_str: str, max_results: int,
|
|
|
|
|
box_name: str) -> []:
|
2021-05-03 22:31:06 +00:00
|
|
|
|
"""Searches through a virtual box, which is typically an index on the inbox
|
|
|
|
|
"""
|
2021-12-28 12:15:46 +00:00
|
|
|
|
index_filename = \
|
|
|
|
|
acct_dir(base_dir, nickname, domain) + '/' + box_name + '.index'
|
|
|
|
|
if box_name == 'bookmarks':
|
|
|
|
|
box_name = 'inbox'
|
|
|
|
|
path = acct_dir(base_dir, nickname, domain) + '/' + box_name
|
2021-05-03 22:31:06 +00:00
|
|
|
|
if not os.path.isdir(path):
|
|
|
|
|
return []
|
|
|
|
|
|
2021-12-28 12:15:46 +00:00
|
|
|
|
search_str = search_str.lower().strip()
|
2021-05-03 22:31:06 +00:00
|
|
|
|
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if '+' in search_str:
|
|
|
|
|
search_words = search_str.split('+')
|
2022-01-08 10:58:54 +00:00
|
|
|
|
for index, _ in enumerate(search_words):
|
2021-12-28 12:15:46 +00:00
|
|
|
|
search_words[index] = search_words[index].strip()
|
|
|
|
|
print('SEARCH: ' + str(search_words))
|
2021-05-03 22:31:06 +00:00
|
|
|
|
else:
|
2021-12-28 12:15:46 +00:00
|
|
|
|
search_words = [search_str]
|
2021-05-03 22:31:06 +00:00
|
|
|
|
|
2024-12-23 18:23:47 +00:00
|
|
|
|
res: list[str] = []
|
2024-07-14 10:56:31 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(index_filename, 'r', encoding='utf-8') as fp_index:
|
|
|
|
|
post_filename = 'start'
|
|
|
|
|
while post_filename:
|
|
|
|
|
post_filename = fp_index.readline()
|
|
|
|
|
if not post_filename:
|
|
|
|
|
break
|
|
|
|
|
if '.json' not in post_filename:
|
|
|
|
|
break
|
|
|
|
|
post_filename = path + '/' + post_filename.strip()
|
|
|
|
|
if not os.path.isfile(post_filename):
|
2021-05-03 22:31:06 +00:00
|
|
|
|
continue
|
2024-07-14 10:56:31 +00:00
|
|
|
|
with open(post_filename, 'r', encoding='utf-8') as fp_post:
|
|
|
|
|
data = fp_post.read().lower()
|
2021-05-03 22:31:06 +00:00
|
|
|
|
|
2024-07-14 10:56:31 +00:00
|
|
|
|
not_found = False
|
|
|
|
|
for keyword in search_words:
|
|
|
|
|
if keyword not in data:
|
|
|
|
|
not_found = True
|
|
|
|
|
break
|
|
|
|
|
if not_found:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
res.append(post_filename)
|
|
|
|
|
if len(res) >= max_results:
|
|
|
|
|
return res
|
|
|
|
|
except OSError as exc:
|
|
|
|
|
print('EX: _search_virtual_box_posts unable to read ' +
|
|
|
|
|
index_filename + ' ' + str(exc))
|
2021-05-03 22:31:06 +00:00
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
|
2024-10-12 17:09:26 +00:00
|
|
|
|
def _get_mutuals_of_person(base_dir: str,
|
|
|
|
|
nickname: str, domain: str) -> []:
|
|
|
|
|
"""Returns the mutuals of a person
|
|
|
|
|
i.e. accounts which they follow and which also follow back
|
|
|
|
|
"""
|
|
|
|
|
followers = \
|
|
|
|
|
get_followers_list(base_dir, nickname, domain, 'followers.txt')
|
|
|
|
|
following = \
|
|
|
|
|
get_followers_list(base_dir, nickname, domain, 'following.txt')
|
2024-12-23 18:23:47 +00:00
|
|
|
|
mutuals: list[str] = []
|
2024-10-12 17:09:26 +00:00
|
|
|
|
for handle in following:
|
|
|
|
|
if handle in followers:
|
|
|
|
|
mutuals.append(handle)
|
|
|
|
|
return mutuals
|
|
|
|
|
|
|
|
|
|
|
2024-10-12 20:37:44 +00:00
|
|
|
|
def _actor_in_searchable_by(searchable_by: str, following_list: []) -> bool:
|
|
|
|
|
"""Does the given actor within searchable_by exist within the given list?
|
|
|
|
|
"""
|
|
|
|
|
data_actor = searchable_by.split('/followers')[0]
|
|
|
|
|
|
|
|
|
|
if '"' in data_actor:
|
|
|
|
|
data_actor = data_actor.split('"')[-1]
|
|
|
|
|
|
|
|
|
|
if data_actor not in following_list:
|
|
|
|
|
data_nickname = get_nickname_from_actor(data_actor)
|
|
|
|
|
data_domain, data_port = get_domain_from_actor(data_actor)
|
|
|
|
|
if not data_nickname or not data_domain:
|
|
|
|
|
return False
|
|
|
|
|
data_domain_full = get_full_domain(data_domain, data_port)
|
|
|
|
|
data_handle = data_nickname + '@' + data_domain_full
|
|
|
|
|
if data_handle not in following_list:
|
|
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
2021-12-28 13:07:02 +00:00
|
|
|
|
def search_box_posts(base_dir: str, nickname: str, domain: str,
|
|
|
|
|
search_str: str, max_results: int,
|
|
|
|
|
box_name='outbox') -> []:
|
2020-04-11 13:20:52 +00:00
|
|
|
|
"""Search your posts and return a list of the filenames
|
|
|
|
|
containing matching strings
|
2020-04-11 10:19:35 +00:00
|
|
|
|
"""
|
2021-12-28 12:15:46 +00:00
|
|
|
|
path = acct_dir(base_dir, nickname, domain) + '/' + box_name
|
2021-07-05 10:22:23 +00:00
|
|
|
|
# is this a virtual box, such as direct messages?
|
2020-04-11 10:19:35 +00:00
|
|
|
|
if not os.path.isdir(path):
|
2021-05-03 22:31:06 +00:00
|
|
|
|
if os.path.isfile(path + '.index'):
|
2021-12-28 13:07:02 +00:00
|
|
|
|
return _search_virtual_box_posts(base_dir, nickname, domain,
|
|
|
|
|
search_str, max_results, box_name)
|
2020-04-11 10:19:35 +00:00
|
|
|
|
return []
|
2021-12-28 12:15:46 +00:00
|
|
|
|
search_str = search_str.lower().strip()
|
2020-04-11 13:20:52 +00:00
|
|
|
|
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if '+' in search_str:
|
|
|
|
|
search_words = search_str.split('+')
|
2022-01-08 10:58:54 +00:00
|
|
|
|
for index, _ in enumerate(search_words):
|
2021-12-28 12:15:46 +00:00
|
|
|
|
search_words[index] = search_words[index].strip()
|
|
|
|
|
print('SEARCH: ' + str(search_words))
|
2020-04-11 13:30:54 +00:00
|
|
|
|
else:
|
2021-12-28 12:15:46 +00:00
|
|
|
|
search_words = [search_str]
|
2020-04-11 13:30:54 +00:00
|
|
|
|
|
2024-12-23 18:23:47 +00:00
|
|
|
|
following_list: list[str] = []
|
|
|
|
|
mutuals_list: list[str] = []
|
2024-10-12 17:09:26 +00:00
|
|
|
|
check_searchable_by = False
|
|
|
|
|
if box_name == 'inbox':
|
|
|
|
|
check_searchable_by = True
|
2024-10-12 20:40:26 +00:00
|
|
|
|
# https://codeberg.org/fediverse/fep/
|
|
|
|
|
# src/branch/main/fep/268d/fep-268d.md
|
2024-10-12 20:37:44 +00:00
|
|
|
|
# create a list containing all of the handles followed
|
|
|
|
|
following_list = get_followers_list(base_dir, nickname, domain,
|
|
|
|
|
'following.txt')
|
|
|
|
|
# create a list containing all of the mutuals
|
2024-10-12 17:09:26 +00:00
|
|
|
|
mutuals_list = _get_mutuals_of_person(base_dir, nickname, domain)
|
|
|
|
|
|
2024-12-23 18:23:47 +00:00
|
|
|
|
res: list[str] = []
|
2022-05-30 20:47:23 +00:00
|
|
|
|
for root, _, fnames in os.walk(path):
|
2020-04-11 10:19:35 +00:00
|
|
|
|
for fname in fnames:
|
2021-12-28 12:15:46 +00:00
|
|
|
|
file_path = os.path.join(root, fname)
|
2024-07-13 22:56:02 +00:00
|
|
|
|
try:
|
2024-07-14 10:56:31 +00:00
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as fp_post:
|
2024-10-12 17:09:26 +00:00
|
|
|
|
data = fp_post.read()
|
|
|
|
|
data_lower = data.lower()
|
2024-07-13 22:56:02 +00:00
|
|
|
|
|
|
|
|
|
not_found = False
|
|
|
|
|
for keyword in search_words:
|
2024-10-12 17:09:26 +00:00
|
|
|
|
if keyword not in data_lower:
|
2024-07-13 22:56:02 +00:00
|
|
|
|
not_found = True
|
|
|
|
|
break
|
|
|
|
|
if not_found:
|
|
|
|
|
continue
|
2020-04-11 13:14:53 +00:00
|
|
|
|
|
2024-10-12 17:09:26 +00:00
|
|
|
|
# if this is not an outbox/bookmarks search then is the
|
|
|
|
|
# post marked as being searchable?
|
2024-10-12 20:40:26 +00:00
|
|
|
|
# https://codeberg.org/fediverse/fep/
|
|
|
|
|
# src/branch/main/fep/268d/fep-268d.md
|
2024-10-12 17:09:26 +00:00
|
|
|
|
if check_searchable_by:
|
|
|
|
|
if '"searchableBy":' not in data:
|
|
|
|
|
continue
|
|
|
|
|
searchable_by = \
|
|
|
|
|
data.split('"searchableBy":')[1].strip()
|
|
|
|
|
if searchable_by.startswith('['):
|
|
|
|
|
searchable_by = searchable_by.split(']')[0]
|
|
|
|
|
if '"' in searchable_by:
|
|
|
|
|
searchable_by = searchable_by.split('"')[1]
|
2024-10-12 20:37:44 +00:00
|
|
|
|
elif "'" in searchable_by:
|
|
|
|
|
searchable_by = searchable_by.split("'")[1]
|
2024-10-12 17:09:26 +00:00
|
|
|
|
else:
|
|
|
|
|
continue
|
2024-10-12 20:37:44 +00:00
|
|
|
|
if '#Public' not in searchable_by:
|
|
|
|
|
if '/followers' in searchable_by and \
|
|
|
|
|
following_list:
|
|
|
|
|
if not _actor_in_searchable_by(searchable_by,
|
|
|
|
|
following_list):
|
|
|
|
|
continue
|
|
|
|
|
elif '/mutuals' in searchable_by and mutuals_list:
|
|
|
|
|
if not _actor_in_searchable_by(searchable_by,
|
|
|
|
|
mutuals_list):
|
|
|
|
|
continue
|
2024-10-12 17:09:26 +00:00
|
|
|
|
else:
|
|
|
|
|
continue
|
|
|
|
|
|
2024-07-13 22:56:02 +00:00
|
|
|
|
res.append(file_path)
|
|
|
|
|
if len(res) >= max_results:
|
|
|
|
|
return res
|
|
|
|
|
except OSError as exc:
|
|
|
|
|
print('EX: search_box_posts unable to read ' +
|
|
|
|
|
file_path + ' ' + str(exc))
|
2020-12-13 22:13:45 +00:00
|
|
|
|
break
|
2020-04-11 10:19:35 +00:00
|
|
|
|
return res
|
2020-05-04 18:24:30 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
|
def get_file_case_insensitive(path: str) -> str:
|
2020-05-04 18:24:30 +00:00
|
|
|
|
"""Returns a case specific filename given a case insensitive version of it
|
|
|
|
|
"""
|
2020-08-29 11:14:19 +00:00
|
|
|
|
if os.path.isfile(path):
|
|
|
|
|
return path
|
|
|
|
|
if path != path.lower():
|
|
|
|
|
if os.path.isfile(path.lower()):
|
|
|
|
|
return path.lower()
|
2020-08-29 19:54:30 +00:00
|
|
|
|
return None
|
2020-06-06 18:16:16 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-27 23:23:07 +00:00
|
|
|
|
def undo_likes_collection_entry(recent_posts_cache: {},
|
|
|
|
|
base_dir: str, post_filename: str,
|
|
|
|
|
actor: str, domain: str, debug: bool,
|
|
|
|
|
post_json_object: {}) -> None:
|
2020-06-06 18:16:16 +00:00
|
|
|
|
"""Undoes a like for a particular actor
|
|
|
|
|
"""
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object:
|
2021-12-26 23:41:34 +00:00
|
|
|
|
post_json_object = load_json(post_filename)
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object:
|
2021-07-05 10:22:23 +00:00
|
|
|
|
return
|
|
|
|
|
# remove any cached version of this post so that the
|
|
|
|
|
# like icon is changed
|
2021-12-27 22:19:18 +00:00
|
|
|
|
nickname = get_nickname_from_actor(actor)
|
2022-03-23 23:59:29 +00:00
|
|
|
|
if not nickname:
|
|
|
|
|
return
|
2021-12-27 11:05:24 +00:00
|
|
|
|
cached_post_filename = \
|
2021-12-26 23:41:34 +00:00
|
|
|
|
get_cached_post_filename(base_dir, nickname,
|
|
|
|
|
domain, post_json_object)
|
2021-12-27 11:05:24 +00:00
|
|
|
|
if cached_post_filename:
|
|
|
|
|
if os.path.isfile(cached_post_filename):
|
2021-09-05 10:17:43 +00:00
|
|
|
|
try:
|
2021-12-27 11:05:24 +00:00
|
|
|
|
os.remove(cached_post_filename)
|
2021-11-25 18:42:38 +00:00
|
|
|
|
except OSError:
|
2021-12-27 23:23:07 +00:00
|
|
|
|
print('EX: undo_likes_collection_entry ' +
|
2021-10-29 18:48:15 +00:00
|
|
|
|
'unable to delete cached post ' +
|
2021-12-27 11:05:24 +00:00
|
|
|
|
str(cached_post_filename))
|
|
|
|
|
remove_post_from_cache(post_json_object, recent_posts_cache)
|
2021-07-05 10:22:23 +00:00
|
|
|
|
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object.get('type'):
|
2021-07-05 10:22:23 +00:00
|
|
|
|
return
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object['type'] != 'Create':
|
2021-07-05 10:22:23 +00:00
|
|
|
|
return
|
2021-12-25 22:09:19 +00:00
|
|
|
|
obj = post_json_object
|
2021-12-26 10:57:03 +00:00
|
|
|
|
if has_object_dict(post_json_object):
|
2021-12-25 22:09:19 +00:00
|
|
|
|
obj = post_json_object['object']
|
2021-10-14 22:43:42 +00:00
|
|
|
|
if not obj.get('likes'):
|
2021-07-05 10:22:23 +00:00
|
|
|
|
return
|
2021-10-14 22:43:42 +00:00
|
|
|
|
if not isinstance(obj['likes'], dict):
|
2021-07-05 10:22:23 +00:00
|
|
|
|
return
|
2021-10-14 22:43:42 +00:00
|
|
|
|
if not obj['likes'].get('items'):
|
2021-07-05 10:22:23 +00:00
|
|
|
|
return
|
2021-12-27 23:23:07 +00:00
|
|
|
|
total_items = 0
|
2021-10-14 22:43:42 +00:00
|
|
|
|
if obj['likes'].get('totalItems'):
|
2021-12-27 23:23:07 +00:00
|
|
|
|
total_items = obj['likes']['totalItems']
|
2021-12-28 12:15:46 +00:00
|
|
|
|
item_found = False
|
|
|
|
|
for like_item in obj['likes']['items']:
|
2024-08-04 19:29:10 +00:00
|
|
|
|
if not like_item.get('actor'):
|
|
|
|
|
continue
|
|
|
|
|
if like_item['actor'] != actor:
|
|
|
|
|
continue
|
|
|
|
|
if debug:
|
|
|
|
|
print('DEBUG: like was removed for ' + actor)
|
|
|
|
|
obj['likes']['items'].remove(like_item)
|
|
|
|
|
item_found = True
|
|
|
|
|
break
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if not item_found:
|
2021-07-05 10:22:23 +00:00
|
|
|
|
return
|
2021-12-27 23:23:07 +00:00
|
|
|
|
if total_items == 1:
|
2021-07-05 10:22:23 +00:00
|
|
|
|
if debug:
|
|
|
|
|
print('DEBUG: likes was removed from post')
|
2021-10-14 22:43:42 +00:00
|
|
|
|
del obj['likes']
|
2021-07-05 10:22:23 +00:00
|
|
|
|
else:
|
2021-10-14 22:43:42 +00:00
|
|
|
|
itlen = len(obj['likes']['items'])
|
|
|
|
|
obj['likes']['totalItems'] = itlen
|
2020-06-06 18:16:16 +00:00
|
|
|
|
|
2021-12-26 23:41:34 +00:00
|
|
|
|
save_json(post_json_object, post_filename)
|
2020-06-06 18:16:16 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-27 23:02:50 +00:00
|
|
|
|
def undo_reaction_collection_entry(recent_posts_cache: {},
|
|
|
|
|
base_dir: str, post_filename: str,
|
|
|
|
|
actor: str, domain: str, debug: bool,
|
|
|
|
|
post_json_object: {},
|
2021-12-28 12:15:46 +00:00
|
|
|
|
emoji_content: str) -> None:
|
2021-11-10 12:16:03 +00:00
|
|
|
|
"""Undoes an emoji reaction for a particular actor
|
|
|
|
|
"""
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object:
|
2021-12-26 23:41:34 +00:00
|
|
|
|
post_json_object = load_json(post_filename)
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object:
|
2021-11-10 12:16:03 +00:00
|
|
|
|
return
|
|
|
|
|
# remove any cached version of this post so that the
|
|
|
|
|
# like icon is changed
|
2021-12-27 22:19:18 +00:00
|
|
|
|
nickname = get_nickname_from_actor(actor)
|
2022-03-23 23:59:29 +00:00
|
|
|
|
if not nickname:
|
|
|
|
|
return
|
2021-12-27 11:05:24 +00:00
|
|
|
|
cached_post_filename = \
|
2021-12-26 23:41:34 +00:00
|
|
|
|
get_cached_post_filename(base_dir, nickname,
|
|
|
|
|
domain, post_json_object)
|
2021-12-27 11:05:24 +00:00
|
|
|
|
if cached_post_filename:
|
|
|
|
|
if os.path.isfile(cached_post_filename):
|
2021-11-10 12:16:03 +00:00
|
|
|
|
try:
|
2021-12-27 11:05:24 +00:00
|
|
|
|
os.remove(cached_post_filename)
|
2021-11-25 18:42:38 +00:00
|
|
|
|
except OSError:
|
2021-12-27 23:02:50 +00:00
|
|
|
|
print('EX: undo_reaction_collection_entry ' +
|
2021-11-10 12:16:03 +00:00
|
|
|
|
'unable to delete cached post ' +
|
2021-12-27 11:05:24 +00:00
|
|
|
|
str(cached_post_filename))
|
|
|
|
|
remove_post_from_cache(post_json_object, recent_posts_cache)
|
2021-11-10 12:16:03 +00:00
|
|
|
|
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object.get('type'):
|
2021-11-10 12:16:03 +00:00
|
|
|
|
return
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object['type'] != 'Create':
|
2021-11-10 12:16:03 +00:00
|
|
|
|
return
|
2021-12-25 22:09:19 +00:00
|
|
|
|
obj = post_json_object
|
2021-12-26 10:57:03 +00:00
|
|
|
|
if has_object_dict(post_json_object):
|
2021-12-25 22:09:19 +00:00
|
|
|
|
obj = post_json_object['object']
|
2021-11-10 12:16:03 +00:00
|
|
|
|
if not obj.get('reactions'):
|
|
|
|
|
return
|
|
|
|
|
if not isinstance(obj['reactions'], dict):
|
|
|
|
|
return
|
|
|
|
|
if not obj['reactions'].get('items'):
|
|
|
|
|
return
|
2021-12-27 23:23:07 +00:00
|
|
|
|
total_items = 0
|
2021-11-10 12:16:03 +00:00
|
|
|
|
if obj['reactions'].get('totalItems'):
|
2021-12-27 23:23:07 +00:00
|
|
|
|
total_items = obj['reactions']['totalItems']
|
2021-12-28 12:15:46 +00:00
|
|
|
|
item_found = False
|
|
|
|
|
for like_item in obj['reactions']['items']:
|
2024-08-04 19:14:17 +00:00
|
|
|
|
if not like_item.get('actor'):
|
|
|
|
|
continue
|
|
|
|
|
if like_item['actor'] == actor and \
|
|
|
|
|
like_item['content'] == emoji_content:
|
|
|
|
|
if debug:
|
|
|
|
|
print('DEBUG: emoji reaction was removed for ' + actor)
|
|
|
|
|
obj['reactions']['items'].remove(like_item)
|
|
|
|
|
item_found = True
|
|
|
|
|
break
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if not item_found:
|
2021-11-10 12:16:03 +00:00
|
|
|
|
return
|
2021-12-27 23:23:07 +00:00
|
|
|
|
if total_items == 1:
|
2021-11-10 12:16:03 +00:00
|
|
|
|
if debug:
|
|
|
|
|
print('DEBUG: emoji reaction was removed from post')
|
|
|
|
|
del obj['reactions']
|
|
|
|
|
else:
|
|
|
|
|
itlen = len(obj['reactions']['items'])
|
|
|
|
|
obj['reactions']['totalItems'] = itlen
|
|
|
|
|
|
2021-12-26 23:41:34 +00:00
|
|
|
|
save_json(post_json_object, post_filename)
|
2021-11-10 12:16:03 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-27 10:55:48 +00:00
|
|
|
|
def undo_announce_collection_entry(recent_posts_cache: {},
|
|
|
|
|
base_dir: str, post_filename: str,
|
|
|
|
|
actor: str, domain: str,
|
|
|
|
|
debug: bool) -> None:
|
2020-06-06 18:34:39 +00:00
|
|
|
|
"""Undoes an announce for a particular actor by removing it from
|
|
|
|
|
the "shares" collection within a post. Note that the "shares"
|
|
|
|
|
collection has no relation to shared items in shares.py. It's
|
|
|
|
|
shares of posts, not shares of physical objects.
|
|
|
|
|
"""
|
2021-12-26 23:41:34 +00:00
|
|
|
|
post_json_object = load_json(post_filename)
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object:
|
2021-07-05 10:25:21 +00:00
|
|
|
|
return
|
|
|
|
|
# remove any cached version of this announce so that the announce
|
|
|
|
|
# icon is changed
|
2021-12-27 22:19:18 +00:00
|
|
|
|
nickname = get_nickname_from_actor(actor)
|
2022-03-23 23:59:29 +00:00
|
|
|
|
if not nickname:
|
|
|
|
|
return
|
2021-12-27 11:05:24 +00:00
|
|
|
|
cached_post_filename = \
|
2021-12-26 23:41:34 +00:00
|
|
|
|
get_cached_post_filename(base_dir, nickname, domain,
|
|
|
|
|
post_json_object)
|
2021-12-27 11:05:24 +00:00
|
|
|
|
if cached_post_filename:
|
|
|
|
|
if os.path.isfile(cached_post_filename):
|
2021-09-05 10:17:43 +00:00
|
|
|
|
try:
|
2021-12-27 11:05:24 +00:00
|
|
|
|
os.remove(cached_post_filename)
|
2021-11-25 18:42:38 +00:00
|
|
|
|
except OSError:
|
2021-10-29 14:33:52 +00:00
|
|
|
|
if debug:
|
2021-12-27 10:55:48 +00:00
|
|
|
|
print('EX: undo_announce_collection_entry ' +
|
2021-10-29 18:48:15 +00:00
|
|
|
|
'unable to delete cached post ' +
|
2021-12-27 11:05:24 +00:00
|
|
|
|
str(cached_post_filename))
|
|
|
|
|
remove_post_from_cache(post_json_object, recent_posts_cache)
|
2021-07-05 10:25:21 +00:00
|
|
|
|
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object.get('type'):
|
2021-07-05 10:25:21 +00:00
|
|
|
|
return
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object['type'] != 'Create':
|
2021-07-05 10:25:21 +00:00
|
|
|
|
return
|
2021-12-26 10:57:03 +00:00
|
|
|
|
if not has_object_dict(post_json_object):
|
2021-07-05 10:25:21 +00:00
|
|
|
|
if debug:
|
2021-12-25 22:09:19 +00:00
|
|
|
|
pprint(post_json_object)
|
2021-07-05 10:25:21 +00:00
|
|
|
|
print('DEBUG: post has no object')
|
|
|
|
|
return
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object['object'].get('shares'):
|
2021-07-05 10:25:21 +00:00
|
|
|
|
return
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object['object']['shares'].get('items'):
|
2021-07-05 10:25:21 +00:00
|
|
|
|
return
|
2021-12-27 23:23:07 +00:00
|
|
|
|
total_items = 0
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object['object']['shares'].get('totalItems'):
|
2021-12-27 23:23:07 +00:00
|
|
|
|
total_items = post_json_object['object']['shares']['totalItems']
|
2021-12-28 12:15:46 +00:00
|
|
|
|
item_found = False
|
|
|
|
|
for announce_item in post_json_object['object']['shares']['items']:
|
2024-08-04 19:13:29 +00:00
|
|
|
|
if not announce_item.get('actor'):
|
|
|
|
|
continue
|
|
|
|
|
if announce_item['actor'] != actor:
|
|
|
|
|
continue
|
|
|
|
|
if debug:
|
|
|
|
|
print('DEBUG: Announce was removed for ' + actor)
|
|
|
|
|
an_it = announce_item
|
|
|
|
|
post_json_object['object']['shares']['items'].remove(an_it)
|
|
|
|
|
item_found = True
|
|
|
|
|
break
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if not item_found:
|
2021-07-05 10:25:21 +00:00
|
|
|
|
return
|
2021-12-27 23:23:07 +00:00
|
|
|
|
if total_items == 1:
|
2021-07-05 10:25:21 +00:00
|
|
|
|
if debug:
|
|
|
|
|
print('DEBUG: shares (announcements) ' +
|
|
|
|
|
'was removed from post')
|
2021-12-25 22:09:19 +00:00
|
|
|
|
del post_json_object['object']['shares']
|
2021-07-05 10:25:21 +00:00
|
|
|
|
else:
|
2021-12-25 22:09:19 +00:00
|
|
|
|
itlen = len(post_json_object['object']['shares']['items'])
|
|
|
|
|
post_json_object['object']['shares']['totalItems'] = itlen
|
2020-06-06 18:34:39 +00:00
|
|
|
|
|
2021-12-26 23:41:34 +00:00
|
|
|
|
save_json(post_json_object, post_filename)
|
2020-06-06 18:34:39 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 23:41:34 +00:00
|
|
|
|
def update_announce_collection(recent_posts_cache: {},
|
|
|
|
|
base_dir: str, post_filename: str,
|
|
|
|
|
actor: str, nickname: str, domain: str,
|
|
|
|
|
debug: bool) -> None:
|
2020-06-06 18:34:39 +00:00
|
|
|
|
"""Updates the announcements collection within a post
|
|
|
|
|
Confusingly this is known as "shares", but isn't the
|
|
|
|
|
same as shared items within shares.py
|
|
|
|
|
It's shares of posts, not shares of physical objects.
|
|
|
|
|
"""
|
2021-12-26 23:41:34 +00:00
|
|
|
|
post_json_object = load_json(post_filename)
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object:
|
2021-05-07 15:58:39 +00:00
|
|
|
|
return
|
|
|
|
|
# remove any cached version of this announce so that the announce
|
|
|
|
|
# icon is changed
|
2021-12-27 11:05:24 +00:00
|
|
|
|
cached_post_filename = \
|
2021-12-26 23:41:34 +00:00
|
|
|
|
get_cached_post_filename(base_dir, nickname, domain,
|
|
|
|
|
post_json_object)
|
2021-12-27 11:05:24 +00:00
|
|
|
|
if cached_post_filename:
|
|
|
|
|
if os.path.isfile(cached_post_filename):
|
2024-10-07 21:39:05 +00:00
|
|
|
|
print('update_announce_collection: removing ' +
|
|
|
|
|
cached_post_filename)
|
2021-09-05 10:17:43 +00:00
|
|
|
|
try:
|
2021-12-27 11:05:24 +00:00
|
|
|
|
os.remove(cached_post_filename)
|
2021-11-25 18:42:38 +00:00
|
|
|
|
except OSError:
|
2021-10-29 14:33:52 +00:00
|
|
|
|
if debug:
|
2021-12-26 23:41:34 +00:00
|
|
|
|
print('EX: update_announce_collection ' +
|
2021-10-29 18:48:15 +00:00
|
|
|
|
'unable to delete cached post ' +
|
2021-12-27 11:05:24 +00:00
|
|
|
|
str(cached_post_filename))
|
|
|
|
|
remove_post_from_cache(post_json_object, recent_posts_cache)
|
2020-06-06 18:34:39 +00:00
|
|
|
|
|
2021-12-26 10:57:03 +00:00
|
|
|
|
if not has_object_dict(post_json_object):
|
2021-05-07 15:58:39 +00:00
|
|
|
|
if debug:
|
2021-12-25 22:09:19 +00:00
|
|
|
|
pprint(post_json_object)
|
2021-12-26 23:41:34 +00:00
|
|
|
|
print('DEBUG: post ' + post_filename + ' has no object')
|
2021-05-07 15:58:39 +00:00
|
|
|
|
return
|
2021-12-28 12:15:46 +00:00
|
|
|
|
post_url = remove_id_ending(post_json_object['id']) + '/shares'
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if not post_json_object['object'].get('shares'):
|
2021-05-07 15:58:39 +00:00
|
|
|
|
if debug:
|
|
|
|
|
print('DEBUG: Adding initial shares (announcements) to ' +
|
2021-12-28 12:15:46 +00:00
|
|
|
|
post_url)
|
|
|
|
|
announcements_json = {
|
2024-09-14 16:40:11 +00:00
|
|
|
|
"@context": [
|
|
|
|
|
'https://www.w3.org/ns/activitystreams',
|
|
|
|
|
'https://w3id.org/security/v1'
|
|
|
|
|
],
|
2021-12-28 12:15:46 +00:00
|
|
|
|
'id': post_url,
|
2021-05-07 15:58:39 +00:00
|
|
|
|
'type': 'Collection',
|
|
|
|
|
"totalItems": 1,
|
|
|
|
|
'items': [{
|
|
|
|
|
'type': 'Announce',
|
|
|
|
|
'actor': actor
|
|
|
|
|
}]
|
|
|
|
|
}
|
2021-12-28 12:15:46 +00:00
|
|
|
|
post_json_object['object']['shares'] = announcements_json
|
2021-05-07 15:58:39 +00:00
|
|
|
|
else:
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object['object']['shares'].get('items'):
|
2021-12-28 12:15:46 +00:00
|
|
|
|
shares_items = post_json_object['object']['shares']['items']
|
|
|
|
|
for announce_item in shares_items:
|
|
|
|
|
if announce_item.get('actor'):
|
|
|
|
|
if announce_item['actor'] == actor:
|
2021-05-07 15:58:39 +00:00
|
|
|
|
return
|
2021-12-27 23:23:07 +00:00
|
|
|
|
new_announce = {
|
2021-05-07 15:58:39 +00:00
|
|
|
|
'type': 'Announce',
|
|
|
|
|
'actor': actor
|
2020-06-06 18:34:39 +00:00
|
|
|
|
}
|
2021-12-27 23:23:07 +00:00
|
|
|
|
post_json_object['object']['shares']['items'].append(new_announce)
|
2021-12-25 22:09:19 +00:00
|
|
|
|
itlen = len(post_json_object['object']['shares']['items'])
|
|
|
|
|
post_json_object['object']['shares']['totalItems'] = itlen
|
2020-06-06 18:34:39 +00:00
|
|
|
|
else:
|
2021-05-07 15:58:39 +00:00
|
|
|
|
if debug:
|
|
|
|
|
print('DEBUG: shares (announcements) section of post ' +
|
|
|
|
|
'has no items list')
|
2020-06-06 18:34:39 +00:00
|
|
|
|
|
2021-05-07 15:58:39 +00:00
|
|
|
|
if debug:
|
|
|
|
|
print('DEBUG: saving post with shares (announcements) added')
|
2021-12-25 22:09:19 +00:00
|
|
|
|
pprint(post_json_object)
|
2021-12-26 23:41:34 +00:00
|
|
|
|
save_json(post_json_object, post_filename)
|
2020-06-22 16:55:19 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 20:52:11 +00:00
|
|
|
|
def week_day_of_month_start(month_number: int, year: int) -> int:
|
2020-11-09 19:41:01 +00:00
|
|
|
|
"""Gets the day number of the first day of the month
|
|
|
|
|
1=sun, 7=sat
|
|
|
|
|
"""
|
2023-11-21 11:15:44 +00:00
|
|
|
|
first_day_of_month = date_from_numbers(year, month_number, 1, 0, 0)
|
2021-12-26 20:52:11 +00:00
|
|
|
|
return int(first_day_of_month.strftime("%w")) + 1
|
2020-11-13 13:34:14 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 20:48:15 +00:00
|
|
|
|
def media_file_mime_type(filename: str) -> str:
|
2020-11-13 13:34:14 +00:00
|
|
|
|
"""Given a media filename return its mime type
|
|
|
|
|
"""
|
|
|
|
|
if '.' not in filename:
|
|
|
|
|
return 'image/png'
|
|
|
|
|
extensions = {
|
|
|
|
|
'json': 'application/json',
|
|
|
|
|
'png': 'image/png',
|
|
|
|
|
'jpg': 'image/jpeg',
|
2022-02-06 11:04:49 +00:00
|
|
|
|
'jxl': 'image/jxl',
|
2020-11-13 13:34:14 +00:00
|
|
|
|
'jpeg': 'image/jpeg',
|
|
|
|
|
'gif': 'image/gif',
|
2021-01-11 22:27:57 +00:00
|
|
|
|
'svg': 'image/svg+xml',
|
2020-11-13 13:34:14 +00:00
|
|
|
|
'webp': 'image/webp',
|
|
|
|
|
'avif': 'image/avif',
|
2022-10-31 17:26:31 +00:00
|
|
|
|
'heic': 'image/heic',
|
2021-12-16 23:47:01 +00:00
|
|
|
|
'ico': 'image/x-icon',
|
2020-11-13 13:34:14 +00:00
|
|
|
|
'mp3': 'audio/mpeg',
|
|
|
|
|
'ogg': 'audio/ogg',
|
2022-10-31 11:05:11 +00:00
|
|
|
|
'audio/wav': 'wav',
|
|
|
|
|
'audio/x-wav': 'wav',
|
|
|
|
|
'audio/x-pn-wave': 'wav',
|
|
|
|
|
'wav': 'audio/vnd.wave',
|
2022-04-18 13:21:45 +00:00
|
|
|
|
'opus': 'audio/opus',
|
2022-10-20 19:37:59 +00:00
|
|
|
|
'spx': 'audio/speex',
|
2021-08-03 09:09:04 +00:00
|
|
|
|
'flac': 'audio/flac',
|
2020-11-13 13:34:14 +00:00
|
|
|
|
'mp4': 'video/mp4',
|
|
|
|
|
'ogv': 'video/ogv'
|
|
|
|
|
}
|
2021-12-26 20:52:11 +00:00
|
|
|
|
file_ext = filename.split('.')[-1]
|
|
|
|
|
if not extensions.get(file_ext):
|
2020-11-13 13:34:14 +00:00
|
|
|
|
return 'image/png'
|
2021-12-26 20:52:11 +00:00
|
|
|
|
return extensions[file_ext]
|
2021-03-03 20:16:53 +00:00
|
|
|
|
|
|
|
|
|
|
2024-02-16 11:29:02 +00:00
|
|
|
|
def time_days_ago(datestr: str) -> int:
|
|
|
|
|
"""returns the number of days ago for the given date
|
|
|
|
|
"""
|
|
|
|
|
date1 = \
|
|
|
|
|
date_from_string_format(datestr,
|
|
|
|
|
["%Y-%m-%dT%H:%M:%S%z"])
|
|
|
|
|
if not date1:
|
|
|
|
|
return 0
|
2024-02-16 22:50:23 +00:00
|
|
|
|
date_diff = date_utcnow() - date1
|
|
|
|
|
return date_diff.days
|
2024-02-16 11:29:02 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 20:39:35 +00:00
|
|
|
|
def camel_case_split(text: str) -> str:
|
2021-03-03 20:34:55 +00:00
|
|
|
|
""" Splits CamelCase into "Camel Case"
|
|
|
|
|
"""
|
|
|
|
|
matches = re.finditer('.+?(?:(?<=[a-z])(?=[A-Z])|' +
|
|
|
|
|
'(?<=[A-Z])(?=[A-Z][a-z])|$)', text)
|
|
|
|
|
if not matches:
|
|
|
|
|
return text
|
2021-12-28 13:07:02 +00:00
|
|
|
|
result_str = ''
|
2021-03-03 20:34:55 +00:00
|
|
|
|
for word in matches:
|
2021-12-28 13:07:02 +00:00
|
|
|
|
result_str += word.group(0) + ' '
|
|
|
|
|
return result_str.strip()
|
2021-03-05 19:00:37 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-29 10:39:46 +00:00
|
|
|
|
def convert_to_snake_case(text: str) -> str:
|
|
|
|
|
"""Convert camel case to snake case
|
|
|
|
|
"""
|
|
|
|
|
return camel_case_split(text).lower().replace(' ', '_')
|
|
|
|
|
|
|
|
|
|
|
2021-12-28 15:13:51 +00:00
|
|
|
|
def _convert_to_camel_case(text: str) -> str:
|
|
|
|
|
"""Convers a snake case string to camel case
|
|
|
|
|
"""
|
|
|
|
|
if '_' not in text:
|
|
|
|
|
return text
|
|
|
|
|
words = text.split('_')
|
|
|
|
|
result = ''
|
|
|
|
|
ctr = 0
|
|
|
|
|
for wrd in words:
|
|
|
|
|
if ctr > 0:
|
|
|
|
|
result += wrd.title()
|
|
|
|
|
else:
|
|
|
|
|
result = wrd.lower()
|
|
|
|
|
ctr += 1
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 20:20:36 +00:00
|
|
|
|
def reject_post_id(base_dir: str, nickname: str, domain: str,
|
2024-10-23 12:20:07 +00:00
|
|
|
|
post_id: str, recent_posts_cache: {},
|
|
|
|
|
debug: bool) -> None:
|
2021-03-05 19:23:33 +00:00
|
|
|
|
""" Marks the given post as rejected,
|
|
|
|
|
for example an announce which is too old
|
2021-03-05 19:00:37 +00:00
|
|
|
|
"""
|
2021-12-26 20:43:03 +00:00
|
|
|
|
post_filename = locate_post(base_dir, nickname, domain, post_id)
|
|
|
|
|
if not post_filename:
|
2021-03-05 19:00:37 +00:00
|
|
|
|
return
|
2021-03-05 19:23:33 +00:00
|
|
|
|
|
2024-10-23 13:02:25 +00:00
|
|
|
|
post_url = None
|
2021-12-26 20:01:37 +00:00
|
|
|
|
if recent_posts_cache.get('index'):
|
2021-03-05 19:23:33 +00:00
|
|
|
|
# if this is a full path then remove the directories
|
2021-12-26 20:43:03 +00:00
|
|
|
|
index_filename = post_filename
|
|
|
|
|
if '/' in post_filename:
|
|
|
|
|
index_filename = post_filename.split('/')[-1]
|
2021-03-05 19:23:33 +00:00
|
|
|
|
|
|
|
|
|
# filename of the post without any extension or path
|
|
|
|
|
# This should also correspond to any index entry in
|
|
|
|
|
# the posts cache
|
2022-06-21 11:58:50 +00:00
|
|
|
|
post_url = remove_eol(index_filename)
|
2021-12-28 12:15:46 +00:00
|
|
|
|
post_url = post_url.replace('.json', '').strip()
|
2021-03-05 19:23:33 +00:00
|
|
|
|
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if post_url in recent_posts_cache['index']:
|
|
|
|
|
if recent_posts_cache['json'].get(post_url):
|
|
|
|
|
del recent_posts_cache['json'][post_url]
|
|
|
|
|
if recent_posts_cache['html'].get(post_url):
|
|
|
|
|
del recent_posts_cache['html'][post_url]
|
2021-03-05 19:23:33 +00:00
|
|
|
|
|
2024-02-01 13:30:59 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(post_filename + '.reject', 'w+',
|
2024-07-14 11:09:24 +00:00
|
|
|
|
encoding='utf-8') as fp_reject:
|
|
|
|
|
fp_reject.write('\n')
|
2024-02-01 13:30:59 +00:00
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: reject_post_id unable to write ' +
|
|
|
|
|
post_filename + '.reject')
|
2021-03-09 13:52:02 +00:00
|
|
|
|
|
2024-10-23 12:20:07 +00:00
|
|
|
|
# if the post is in the inbox index then remove it
|
|
|
|
|
index_file = \
|
|
|
|
|
acct_dir(base_dir, nickname, domain) + '/inbox.index'
|
2024-10-23 13:02:25 +00:00
|
|
|
|
if not post_url:
|
2024-10-23 14:13:02 +00:00
|
|
|
|
index_filename = post_filename
|
|
|
|
|
if '/' in post_filename:
|
|
|
|
|
index_filename = post_filename.split('/')[-1]
|
2024-10-23 13:02:25 +00:00
|
|
|
|
post_url = remove_eol(index_filename)
|
|
|
|
|
post_url = post_url.replace('.json', '').strip()
|
2024-10-23 12:45:14 +00:00
|
|
|
|
post_url2 = post_url.replace('/', '#') + '.json'
|
|
|
|
|
remove_post_from_index(post_url2, debug, index_file)
|
2024-10-23 12:20:07 +00:00
|
|
|
|
|
2021-03-09 13:52:02 +00:00
|
|
|
|
|
2021-12-26 19:12:02 +00:00
|
|
|
|
def load_translations_from_file(base_dir: str, language: str) -> ({}, str):
|
2021-03-18 17:27:46 +00:00
|
|
|
|
"""Returns the translations dictionary
|
|
|
|
|
"""
|
2021-12-25 16:17:53 +00:00
|
|
|
|
if not os.path.isdir(base_dir + '/translations'):
|
2021-03-18 17:27:46 +00:00
|
|
|
|
print('ERROR: translations directory not found')
|
2021-10-29 14:33:52 +00:00
|
|
|
|
return None, None
|
2021-03-18 17:27:46 +00:00
|
|
|
|
if not language:
|
2023-11-01 20:34:28 +00:00
|
|
|
|
system_language = locale.getlocale()[0]
|
2021-03-18 17:27:46 +00:00
|
|
|
|
else:
|
2021-12-25 23:03:28 +00:00
|
|
|
|
system_language = language
|
|
|
|
|
if not system_language:
|
|
|
|
|
system_language = 'en'
|
|
|
|
|
if '_' in system_language:
|
|
|
|
|
system_language = system_language.split('_')[0]
|
|
|
|
|
while '/' in system_language:
|
|
|
|
|
system_language = system_language.split('/')[1]
|
|
|
|
|
if '.' in system_language:
|
|
|
|
|
system_language = system_language.split('.')[0]
|
2021-12-26 19:12:02 +00:00
|
|
|
|
translations_file = base_dir + '/translations/' + \
|
2021-12-25 23:03:28 +00:00
|
|
|
|
system_language + '.json'
|
2021-12-26 19:12:02 +00:00
|
|
|
|
if not os.path.isfile(translations_file):
|
2021-12-25 23:03:28 +00:00
|
|
|
|
system_language = 'en'
|
2021-12-26 19:12:02 +00:00
|
|
|
|
translations_file = base_dir + '/translations/' + \
|
2021-12-25 23:03:28 +00:00
|
|
|
|
system_language + '.json'
|
2021-12-26 19:12:02 +00:00
|
|
|
|
return load_json(translations_file), system_language
|
2021-04-22 09:27:20 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 19:09:04 +00:00
|
|
|
|
def dm_allowed_from_domain(base_dir: str,
|
|
|
|
|
nickname: str, domain: str,
|
|
|
|
|
sending_actor_domain: str) -> bool:
|
2021-04-22 09:27:20 +00:00
|
|
|
|
"""When a DM is received and the .followDMs flag file exists
|
|
|
|
|
Then optionally some domains can be specified as allowed,
|
|
|
|
|
regardless of individual follows.
|
|
|
|
|
i.e. Mostly you only want DMs from followers, but there are
|
|
|
|
|
a few particular instances that you trust
|
|
|
|
|
"""
|
2021-12-26 19:09:04 +00:00
|
|
|
|
dm_allowed_instances_file = \
|
2021-12-26 12:02:29 +00:00
|
|
|
|
acct_dir(base_dir, nickname, domain) + '/dmAllowedInstances.txt'
|
2021-12-26 19:09:04 +00:00
|
|
|
|
if not os.path.isfile(dm_allowed_instances_file):
|
2021-04-22 09:27:20 +00:00
|
|
|
|
return False
|
2022-06-10 09:24:11 +00:00
|
|
|
|
if text_in_file(sending_actor_domain + '\n', dm_allowed_instances_file):
|
2021-04-22 09:27:20 +00:00
|
|
|
|
return True
|
|
|
|
|
return False
|
2021-05-16 15:10:39 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 19:01:36 +00:00
|
|
|
|
def get_occupation_skills(actor_json: {}) -> []:
|
2021-05-16 15:10:39 +00:00
|
|
|
|
"""Returns the list of skills for an actor
|
|
|
|
|
"""
|
2021-12-26 10:29:52 +00:00
|
|
|
|
if 'hasOccupation' not in actor_json:
|
2021-05-16 15:10:39 +00:00
|
|
|
|
return []
|
2021-12-26 10:29:52 +00:00
|
|
|
|
if not isinstance(actor_json['hasOccupation'], list):
|
2021-05-16 15:10:39 +00:00
|
|
|
|
return []
|
2021-12-26 19:01:36 +00:00
|
|
|
|
for occupation_item in actor_json['hasOccupation']:
|
|
|
|
|
if not isinstance(occupation_item, dict):
|
2021-05-16 15:10:39 +00:00
|
|
|
|
continue
|
2021-12-26 19:01:36 +00:00
|
|
|
|
if not occupation_item.get('@type'):
|
2021-05-16 15:10:39 +00:00
|
|
|
|
continue
|
2021-12-26 19:01:36 +00:00
|
|
|
|
if not occupation_item['@type'] == 'Occupation':
|
2021-05-16 15:10:39 +00:00
|
|
|
|
continue
|
2021-12-26 19:01:36 +00:00
|
|
|
|
if not occupation_item.get('skills'):
|
2021-05-16 15:10:39 +00:00
|
|
|
|
continue
|
2021-12-26 19:01:36 +00:00
|
|
|
|
if isinstance(occupation_item['skills'], list):
|
|
|
|
|
return occupation_item['skills']
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if isinstance(occupation_item['skills'], str):
|
2021-12-26 19:01:36 +00:00
|
|
|
|
return [occupation_item['skills']]
|
2021-05-16 15:10:39 +00:00
|
|
|
|
break
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 18:58:06 +00:00
|
|
|
|
def get_occupation_name(actor_json: {}) -> str:
|
2021-05-16 15:10:39 +00:00
|
|
|
|
"""Returns the occupation name an actor
|
|
|
|
|
"""
|
2021-12-26 10:29:52 +00:00
|
|
|
|
if not actor_json.get('hasOccupation'):
|
2021-05-16 15:10:39 +00:00
|
|
|
|
return ""
|
2021-12-26 10:29:52 +00:00
|
|
|
|
if not isinstance(actor_json['hasOccupation'], list):
|
2021-05-16 15:10:39 +00:00
|
|
|
|
return ""
|
2021-12-26 18:58:06 +00:00
|
|
|
|
for occupation_item in actor_json['hasOccupation']:
|
|
|
|
|
if not isinstance(occupation_item, dict):
|
2021-05-16 15:10:39 +00:00
|
|
|
|
continue
|
2021-12-26 18:58:06 +00:00
|
|
|
|
if not occupation_item.get('@type'):
|
2021-05-16 15:10:39 +00:00
|
|
|
|
continue
|
2021-12-26 18:58:06 +00:00
|
|
|
|
if occupation_item['@type'] != 'Occupation':
|
2021-05-16 15:10:39 +00:00
|
|
|
|
continue
|
2021-12-26 18:58:06 +00:00
|
|
|
|
if not occupation_item.get('name'):
|
2021-05-16 15:10:39 +00:00
|
|
|
|
continue
|
2021-12-26 18:58:06 +00:00
|
|
|
|
if isinstance(occupation_item['name'], str):
|
|
|
|
|
return occupation_item['name']
|
2021-05-16 15:10:39 +00:00
|
|
|
|
break
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 18:55:07 +00:00
|
|
|
|
def set_occupation_name(actor_json: {}, name: str) -> bool:
|
2021-05-16 15:10:39 +00:00
|
|
|
|
"""Sets the occupation name of an actor
|
|
|
|
|
"""
|
2021-12-26 10:29:52 +00:00
|
|
|
|
if not actor_json.get('hasOccupation'):
|
2021-05-16 15:10:39 +00:00
|
|
|
|
return False
|
2021-12-26 10:29:52 +00:00
|
|
|
|
if not isinstance(actor_json['hasOccupation'], list):
|
2021-05-16 15:10:39 +00:00
|
|
|
|
return False
|
2022-01-08 10:58:54 +00:00
|
|
|
|
for index, _ in enumerate(actor_json['hasOccupation']):
|
2021-12-26 18:55:07 +00:00
|
|
|
|
occupation_item = actor_json['hasOccupation'][index]
|
|
|
|
|
if not isinstance(occupation_item, dict):
|
2021-05-16 15:10:39 +00:00
|
|
|
|
continue
|
2021-12-26 18:55:07 +00:00
|
|
|
|
if not occupation_item.get('@type'):
|
2021-05-16 15:10:39 +00:00
|
|
|
|
continue
|
2021-12-26 18:55:07 +00:00
|
|
|
|
if occupation_item['@type'] != 'Occupation':
|
2021-05-16 15:10:39 +00:00
|
|
|
|
continue
|
2021-12-26 18:55:07 +00:00
|
|
|
|
occupation_item['name'] = name
|
2021-05-16 15:10:39 +00:00
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 18:50:59 +00:00
|
|
|
|
def set_occupation_skills_list(actor_json: {}, skills_list: []) -> bool:
|
2021-05-16 15:10:39 +00:00
|
|
|
|
"""Sets the occupation skills for an actor
|
|
|
|
|
"""
|
2021-12-26 10:29:52 +00:00
|
|
|
|
if 'hasOccupation' not in actor_json:
|
2021-05-16 15:10:39 +00:00
|
|
|
|
return False
|
2021-12-26 10:29:52 +00:00
|
|
|
|
if not isinstance(actor_json['hasOccupation'], list):
|
2021-05-16 15:10:39 +00:00
|
|
|
|
return False
|
2022-01-08 10:58:54 +00:00
|
|
|
|
for index, _ in enumerate(actor_json['hasOccupation']):
|
2021-12-26 18:50:59 +00:00
|
|
|
|
occupation_item = actor_json['hasOccupation'][index]
|
|
|
|
|
if not isinstance(occupation_item, dict):
|
2021-05-16 15:10:39 +00:00
|
|
|
|
continue
|
2021-12-26 18:50:59 +00:00
|
|
|
|
if not occupation_item.get('@type'):
|
2021-05-16 15:10:39 +00:00
|
|
|
|
continue
|
2021-12-26 18:50:59 +00:00
|
|
|
|
if occupation_item['@type'] != 'Occupation':
|
2021-05-16 15:10:39 +00:00
|
|
|
|
continue
|
2021-12-26 18:50:59 +00:00
|
|
|
|
occupation_item['skills'] = skills_list
|
2021-05-16 15:10:39 +00:00
|
|
|
|
return True
|
|
|
|
|
return False
|
2021-06-07 09:10:52 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 18:40:10 +00:00
|
|
|
|
def permitted_dir(path: str) -> bool:
|
2021-06-07 19:18:13 +00:00
|
|
|
|
"""These are special paths which should not be accessible
|
|
|
|
|
directly via GET or POST
|
|
|
|
|
"""
|
|
|
|
|
if path.startswith('/wfendpoints') or \
|
|
|
|
|
path.startswith('/keys') or \
|
|
|
|
|
path.startswith('/accounts'):
|
|
|
|
|
return False
|
|
|
|
|
return True
|
2021-06-20 15:45:29 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 18:37:07 +00:00
|
|
|
|
def user_agent_domain(user_agent: str, debug: bool) -> str:
|
2021-06-20 15:45:29 +00:00
|
|
|
|
"""If the User-Agent string contains a domain
|
|
|
|
|
then return it
|
|
|
|
|
"""
|
2022-02-03 12:30:57 +00:00
|
|
|
|
if 'https://' not in user_agent and 'http://' not in user_agent:
|
2021-06-20 15:45:29 +00:00
|
|
|
|
return None
|
2022-02-03 12:30:57 +00:00
|
|
|
|
agent_domain = ''
|
|
|
|
|
if 'https://' in user_agent:
|
|
|
|
|
agent_domain = user_agent.split('https://')[1].strip()
|
|
|
|
|
else:
|
|
|
|
|
agent_domain = user_agent.split('http://')[1].strip()
|
2021-12-26 18:37:07 +00:00
|
|
|
|
if '/' in agent_domain:
|
|
|
|
|
agent_domain = agent_domain.split('/')[0]
|
|
|
|
|
if ')' in agent_domain:
|
|
|
|
|
agent_domain = agent_domain.split(')')[0].strip()
|
|
|
|
|
if ' ' in agent_domain:
|
|
|
|
|
agent_domain = agent_domain.replace(' ', '')
|
|
|
|
|
if ';' in agent_domain:
|
|
|
|
|
agent_domain = agent_domain.replace(';', '')
|
|
|
|
|
if '.' not in agent_domain:
|
2021-06-20 15:45:29 +00:00
|
|
|
|
return None
|
|
|
|
|
if debug:
|
2021-12-26 18:37:07 +00:00
|
|
|
|
print('User-Agent Domain: ' + agent_domain)
|
|
|
|
|
return agent_domain
|
2021-06-22 15:45:59 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 18:32:02 +00:00
|
|
|
|
def get_alt_path(actor: str, domain_full: str, calling_domain: str) -> str:
|
2021-06-26 11:16:41 +00:00
|
|
|
|
"""Returns alternate path from the actor
|
|
|
|
|
eg. https://clearnetdomain/path becomes http://oniondomain/path
|
|
|
|
|
"""
|
2021-12-26 18:29:39 +00:00
|
|
|
|
post_actor = actor
|
|
|
|
|
if calling_domain not in actor and domain_full in actor:
|
|
|
|
|
if calling_domain.endswith('.onion') or \
|
|
|
|
|
calling_domain.endswith('.i2p'):
|
|
|
|
|
post_actor = \
|
|
|
|
|
'http://' + calling_domain + actor.split(domain_full)[1]
|
|
|
|
|
print('Changed POST domain from ' + actor + ' to ' + post_actor)
|
|
|
|
|
return post_actor
|
2021-06-26 11:16:41 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 18:22:20 +00:00
|
|
|
|
def get_actor_property_url(actor_json: {}, property_name: str) -> str:
|
2021-06-26 11:16:41 +00:00
|
|
|
|
"""Returns a url property from an actor
|
|
|
|
|
"""
|
2021-12-26 10:29:52 +00:00
|
|
|
|
if not actor_json.get('attachment'):
|
2021-06-26 11:16:41 +00:00
|
|
|
|
return ''
|
2021-12-26 18:19:58 +00:00
|
|
|
|
property_name = property_name.lower()
|
2021-12-26 10:32:45 +00:00
|
|
|
|
for property_value in actor_json['attachment']:
|
2022-05-11 16:10:38 +00:00
|
|
|
|
name_value = None
|
|
|
|
|
if property_value.get('name'):
|
|
|
|
|
name_value = property_value['name']
|
|
|
|
|
elif property_value.get('schema:name'):
|
|
|
|
|
name_value = property_value['schema:name']
|
|
|
|
|
if not name_value:
|
2021-06-26 11:16:41 +00:00
|
|
|
|
continue
|
2022-05-11 16:10:38 +00:00
|
|
|
|
if not name_value.lower().startswith(property_name):
|
2021-06-26 11:16:41 +00:00
|
|
|
|
continue
|
2021-12-26 10:32:45 +00:00
|
|
|
|
if not property_value.get('type'):
|
2021-06-26 11:16:41 +00:00
|
|
|
|
continue
|
2022-05-11 17:17:23 +00:00
|
|
|
|
prop_value_name, _ = \
|
|
|
|
|
get_attachment_property_value(property_value)
|
|
|
|
|
if not prop_value_name:
|
2021-06-26 11:16:41 +00:00
|
|
|
|
continue
|
2022-05-11 16:16:34 +00:00
|
|
|
|
if not property_value['type'].endswith('PropertyValue'):
|
2021-06-26 11:16:41 +00:00
|
|
|
|
continue
|
2022-05-11 17:17:23 +00:00
|
|
|
|
property_value['value'] = property_value[prop_value_name].strip()
|
2021-12-27 17:20:01 +00:00
|
|
|
|
prefixes = get_protocol_prefixes()
|
2021-12-28 12:15:46 +00:00
|
|
|
|
prefix_found = False
|
2024-08-12 20:39:31 +00:00
|
|
|
|
prop_value = remove_html(property_value[prop_value_name])
|
2021-06-26 11:16:41 +00:00
|
|
|
|
for prefix in prefixes:
|
2024-08-12 20:39:31 +00:00
|
|
|
|
if prop_value.startswith(prefix):
|
2021-12-28 12:15:46 +00:00
|
|
|
|
prefix_found = True
|
2021-06-26 11:16:41 +00:00
|
|
|
|
break
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if not prefix_found:
|
2021-06-26 11:16:41 +00:00
|
|
|
|
continue
|
2024-08-12 20:39:31 +00:00
|
|
|
|
if '.' not in prop_value:
|
2021-06-26 11:16:41 +00:00
|
|
|
|
continue
|
2024-08-12 20:39:31 +00:00
|
|
|
|
if ' ' in prop_value:
|
2021-06-26 11:16:41 +00:00
|
|
|
|
continue
|
2024-08-12 20:39:31 +00:00
|
|
|
|
if ',' in prop_value:
|
2021-06-26 11:16:41 +00:00
|
|
|
|
continue
|
2024-08-12 20:39:31 +00:00
|
|
|
|
return prop_value
|
2021-06-26 11:16:41 +00:00
|
|
|
|
return ''
|
2021-06-26 14:21:24 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 18:17:37 +00:00
|
|
|
|
def remove_domain_port(domain: str) -> str:
|
2021-06-26 14:21:24 +00:00
|
|
|
|
"""If the domain has a port appended then remove it
|
|
|
|
|
eg. mydomain.com:80 becomes mydomain.com
|
|
|
|
|
"""
|
|
|
|
|
if ':' in domain:
|
|
|
|
|
if domain.startswith('did:'):
|
|
|
|
|
return domain
|
|
|
|
|
domain = domain.split(':')[0]
|
|
|
|
|
return domain
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 18:14:21 +00:00
|
|
|
|
def get_port_from_domain(domain: str) -> int:
|
2021-06-26 14:21:24 +00:00
|
|
|
|
"""If the domain has a port number appended then return it
|
|
|
|
|
eg. mydomain.com:80 returns 80
|
|
|
|
|
"""
|
|
|
|
|
if ':' in domain:
|
|
|
|
|
if domain.startswith('did:'):
|
|
|
|
|
return None
|
2021-12-28 12:15:46 +00:00
|
|
|
|
port_str = domain.split(':')[1]
|
|
|
|
|
if port_str.isdigit():
|
|
|
|
|
return int(port_str)
|
2021-06-26 14:21:24 +00:00
|
|
|
|
return None
|
2021-07-06 09:44:45 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 18:10:53 +00:00
|
|
|
|
def valid_url_prefix(url: str) -> bool:
|
2021-07-06 09:44:45 +00:00
|
|
|
|
"""Does the given url have a valid prefix?
|
|
|
|
|
"""
|
|
|
|
|
if '/' not in url:
|
|
|
|
|
return False
|
|
|
|
|
prefixes = ('https:', 'http:', 'hyper:', 'i2p:', 'gnunet:')
|
|
|
|
|
for pre in prefixes:
|
|
|
|
|
if url.startswith(pre):
|
|
|
|
|
return True
|
|
|
|
|
return False
|
2021-07-20 14:39:43 +00:00
|
|
|
|
|
|
|
|
|
|
2024-04-16 13:47:21 +00:00
|
|
|
|
def valid_password(password: str, debug: bool) -> bool:
|
|
|
|
|
"""Returns true if the given password contains valid characters and
|
|
|
|
|
is within a range of lengths
|
2021-07-20 20:39:26 +00:00
|
|
|
|
"""
|
2024-04-16 13:47:21 +00:00
|
|
|
|
if len(password) < 8 or len(password) > 1024:
|
|
|
|
|
if debug:
|
|
|
|
|
print('WARN: password length out of range (8-255): ' +
|
|
|
|
|
str(len(password)))
|
|
|
|
|
return False
|
|
|
|
|
# check for trailing end of line or carriage returns
|
|
|
|
|
if remove_eol(password) != password:
|
2021-07-20 20:39:26 +00:00
|
|
|
|
return False
|
|
|
|
|
return True
|
2021-07-25 13:09:39 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 18:01:02 +00:00
|
|
|
|
def date_string_to_seconds(date_str: str) -> int:
|
2021-07-28 09:35:21 +00:00
|
|
|
|
"""Converts a date string (eg "published") into seconds since epoch
|
|
|
|
|
"""
|
2023-11-20 22:27:58 +00:00
|
|
|
|
expiry_time = \
|
|
|
|
|
date_from_string_format(date_str, ['%Y-%m-%dT%H:%M:%S%z'])
|
|
|
|
|
if not expiry_time:
|
2021-12-26 18:01:02 +00:00
|
|
|
|
print('EX: date_string_to_seconds unable to parse date ' +
|
|
|
|
|
str(date_str))
|
2021-07-28 09:35:21 +00:00
|
|
|
|
return None
|
2023-11-20 22:27:58 +00:00
|
|
|
|
return _datetime_to_timestamp(expiry_time)
|
2021-07-28 09:35:21 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 17:55:38 +00:00
|
|
|
|
def date_seconds_to_string(date_sec: int) -> str:
|
2021-07-28 09:35:21 +00:00
|
|
|
|
"""Converts a date in seconds since epoch to a string
|
|
|
|
|
"""
|
2023-11-20 22:27:58 +00:00
|
|
|
|
this_date = \
|
|
|
|
|
datetime.datetime.fromtimestamp(date_sec, datetime.timezone.utc)
|
|
|
|
|
if not this_date.tzinfo:
|
|
|
|
|
this_date = this_date.replace(tzinfo=datetime.timezone.utc)
|
|
|
|
|
this_date_tz = this_date.astimezone(datetime.timezone.utc)
|
|
|
|
|
return this_date_tz.strftime("%Y-%m-%dT%H:%M:%SZ")
|
2021-07-30 16:06:34 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 17:29:09 +00:00
|
|
|
|
def get_currencies() -> {}:
|
2021-08-07 17:44:25 +00:00
|
|
|
|
"""Returns a dictionary of currencies
|
|
|
|
|
"""
|
|
|
|
|
return {
|
|
|
|
|
"CA$": "CAD",
|
|
|
|
|
"J$": "JMD",
|
|
|
|
|
"£": "GBP",
|
|
|
|
|
"€": "EUR",
|
|
|
|
|
"؋": "AFN",
|
|
|
|
|
"ƒ": "AWG",
|
|
|
|
|
"₼": "AZN",
|
|
|
|
|
"Br": "BYN",
|
|
|
|
|
"BZ$": "BZD",
|
|
|
|
|
"$b": "BOB",
|
|
|
|
|
"KM": "BAM",
|
|
|
|
|
"P": "BWP",
|
|
|
|
|
"лв": "BGN",
|
|
|
|
|
"R$": "BRL",
|
|
|
|
|
"៛": "KHR",
|
|
|
|
|
"$U": "UYU",
|
|
|
|
|
"RD$": "DOP",
|
|
|
|
|
"$": "USD",
|
|
|
|
|
"₡": "CRC",
|
|
|
|
|
"kn": "HRK",
|
|
|
|
|
"₱": "CUP",
|
|
|
|
|
"Kč": "CZK",
|
|
|
|
|
"kr": "NOK",
|
|
|
|
|
"¢": "GHS",
|
|
|
|
|
"Q": "GTQ",
|
|
|
|
|
"L": "HNL",
|
|
|
|
|
"Ft": "HUF",
|
|
|
|
|
"Rp": "IDR",
|
|
|
|
|
"₹": "INR",
|
|
|
|
|
"﷼": "IRR",
|
|
|
|
|
"₪": "ILS",
|
|
|
|
|
"¥": "JPY",
|
|
|
|
|
"₩": "KRW",
|
|
|
|
|
"₭": "LAK",
|
|
|
|
|
"ден": "MKD",
|
|
|
|
|
"RM": "MYR",
|
|
|
|
|
"₨": "MUR",
|
|
|
|
|
"₮": "MNT",
|
|
|
|
|
"MT": "MZN",
|
|
|
|
|
"C$": "NIO",
|
|
|
|
|
"₦": "NGN",
|
|
|
|
|
"Gs": "PYG",
|
|
|
|
|
"zł": "PLN",
|
|
|
|
|
"lei": "RON",
|
|
|
|
|
"₽": "RUB",
|
|
|
|
|
"Дин": "RSD",
|
|
|
|
|
"S": "SOS",
|
|
|
|
|
"R": "ZAR",
|
|
|
|
|
"CHF": "CHF",
|
|
|
|
|
"NT$": "TWD",
|
|
|
|
|
"฿": "THB",
|
|
|
|
|
"TT$": "TTD",
|
|
|
|
|
"₴": "UAH",
|
2023-08-20 12:20:34 +00:00
|
|
|
|
"Bs": "VEB",
|
2021-08-07 17:44:25 +00:00
|
|
|
|
"₫": "VND",
|
|
|
|
|
"Z$": "ZQD"
|
|
|
|
|
}
|
2021-08-08 11:16:18 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 17:26:55 +00:00
|
|
|
|
def get_supported_languages(base_dir: str) -> []:
|
2021-08-08 11:16:18 +00:00
|
|
|
|
"""Returns a list of supported languages
|
|
|
|
|
"""
|
2021-12-26 17:26:55 +00:00
|
|
|
|
translations_dir = base_dir + '/translations'
|
2024-12-23 18:23:47 +00:00
|
|
|
|
languages_str: list[str] = []
|
2021-12-26 17:26:55 +00:00
|
|
|
|
for _, _, files in os.walk(translations_dir):
|
2021-12-28 12:15:46 +00:00
|
|
|
|
for fname in files:
|
|
|
|
|
if not fname.endswith('.json'):
|
2021-08-08 11:16:18 +00:00
|
|
|
|
continue
|
2021-12-28 12:15:46 +00:00
|
|
|
|
lang = fname.split('.')[0]
|
2021-08-08 11:16:18 +00:00
|
|
|
|
if len(lang) == 2:
|
2021-12-26 17:26:55 +00:00
|
|
|
|
languages_str.append(lang)
|
2021-08-08 11:16:18 +00:00
|
|
|
|
break
|
2021-12-26 17:26:55 +00:00
|
|
|
|
return languages_str
|
2021-08-08 18:39:03 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 17:18:34 +00:00
|
|
|
|
def get_category_types(base_dir: str) -> []:
|
2021-08-08 18:39:03 +00:00
|
|
|
|
"""Returns the list of ontologies
|
|
|
|
|
"""
|
2021-12-26 17:18:34 +00:00
|
|
|
|
ontology_dir = base_dir + '/ontology'
|
2024-12-23 18:23:47 +00:00
|
|
|
|
categories: list[str] = []
|
2021-12-26 17:18:34 +00:00
|
|
|
|
for _, _, files in os.walk(ontology_dir):
|
2021-12-28 12:15:46 +00:00
|
|
|
|
for fname in files:
|
|
|
|
|
if not fname.endswith('.json'):
|
2021-08-08 18:39:03 +00:00
|
|
|
|
continue
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if '#' in fname or '~' in fname:
|
2021-08-08 20:05:40 +00:00
|
|
|
|
continue
|
2021-12-28 12:15:46 +00:00
|
|
|
|
if fname.startswith('custom'):
|
2021-08-08 19:55:54 +00:00
|
|
|
|
continue
|
2021-12-28 12:15:46 +00:00
|
|
|
|
ontology_filename = fname.split('.')[0]
|
2021-12-26 17:18:34 +00:00
|
|
|
|
if 'Types' in ontology_filename:
|
|
|
|
|
categories.append(ontology_filename.replace('Types', ''))
|
2021-08-08 18:39:03 +00:00
|
|
|
|
break
|
|
|
|
|
return categories
|
2021-08-09 13:07:32 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 17:24:00 +00:00
|
|
|
|
def get_shares_files_list() -> []:
|
2021-08-09 13:07:32 +00:00
|
|
|
|
"""Returns the possible shares files
|
|
|
|
|
"""
|
|
|
|
|
return ('shares', 'wanted')
|
2021-08-22 18:38:02 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 17:21:37 +00:00
|
|
|
|
def replace_users_with_at(actor: str) -> str:
|
2021-08-22 18:38:02 +00:00
|
|
|
|
""" https://domain/users/nick becomes https://domain/@nick
|
|
|
|
|
"""
|
2021-12-26 17:15:04 +00:00
|
|
|
|
u_paths = get_user_paths()
|
|
|
|
|
for path in u_paths:
|
2021-08-22 18:38:02 +00:00
|
|
|
|
if path in actor:
|
2023-04-23 15:55:48 +00:00
|
|
|
|
if '/@/' not in actor:
|
|
|
|
|
actor = actor.replace(path, '/@')
|
2021-08-22 18:38:02 +00:00
|
|
|
|
break
|
|
|
|
|
return actor
|
2021-10-13 09:33:15 +00:00
|
|
|
|
|
|
|
|
|
|
2024-01-10 11:04:41 +00:00
|
|
|
|
def get_actor_from_post(post_json_object: {}) -> str:
|
|
|
|
|
"""Gets the actor url from the given post
|
|
|
|
|
"""
|
|
|
|
|
if not post_json_object.get('actor'):
|
|
|
|
|
return ''
|
|
|
|
|
|
|
|
|
|
actor_id = None
|
|
|
|
|
if isinstance(post_json_object['actor'], str):
|
|
|
|
|
# conventionally the actor is just a string url
|
|
|
|
|
actor_id = post_json_object['actor']
|
|
|
|
|
elif isinstance(post_json_object['actor'], dict):
|
|
|
|
|
# in pixelfed/friendica the actor is sometimes a dict
|
|
|
|
|
# with a lot of properties
|
|
|
|
|
if post_json_object['actor'].get('id'):
|
|
|
|
|
if isinstance(post_json_object['actor']['id'], str):
|
|
|
|
|
actor_id = post_json_object['actor']['id']
|
|
|
|
|
|
|
|
|
|
if actor_id:
|
|
|
|
|
# looks vaguely like a url
|
2024-01-27 17:04:21 +00:00
|
|
|
|
if resembles_url(actor_id):
|
2024-01-10 11:04:41 +00:00
|
|
|
|
return actor_id
|
|
|
|
|
return ''
|
|
|
|
|
|
|
|
|
|
|
2021-12-26 17:15:04 +00:00
|
|
|
|
def has_actor(post_json_object: {}, debug: bool) -> bool:
|
2021-10-13 09:33:15 +00:00
|
|
|
|
"""Does the given post have an actor?
|
|
|
|
|
"""
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object.get('actor'):
|
2024-01-09 16:59:23 +00:00
|
|
|
|
actor_url = get_actor_from_post(post_json_object)
|
|
|
|
|
if '#' in actor_url or not actor_url:
|
2021-12-06 12:58:54 +00:00
|
|
|
|
return False
|
2021-10-13 09:33:15 +00:00
|
|
|
|
return True
|
|
|
|
|
if debug:
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object.get('type'):
|
|
|
|
|
msg = post_json_object['type'] + ' has missing actor'
|
|
|
|
|
if post_json_object.get('id'):
|
|
|
|
|
msg += ' ' + post_json_object['id']
|
2021-10-13 09:33:15 +00:00
|
|
|
|
print(msg)
|
|
|
|
|
return False
|
2021-10-13 10:11:02 +00:00
|
|
|
|
|
|
|
|
|
|
2022-04-09 15:11:22 +00:00
|
|
|
|
def has_object_string_type(post_json_object: {}, debug: bool) -> bool:
|
2021-10-13 10:37:52 +00:00
|
|
|
|
"""Does the given post have a type field within an object dict?
|
2021-10-13 10:11:02 +00:00
|
|
|
|
"""
|
2021-12-26 10:57:03 +00:00
|
|
|
|
if not has_object_dict(post_json_object):
|
2021-10-13 10:11:02 +00:00
|
|
|
|
if debug:
|
2022-04-09 15:11:22 +00:00
|
|
|
|
print('has_object_string_type no object found')
|
2021-10-13 10:11:02 +00:00
|
|
|
|
return False
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object['object'].get('type'):
|
|
|
|
|
if isinstance(post_json_object['object']['type'], str):
|
2021-10-13 10:11:02 +00:00
|
|
|
|
return True
|
2023-04-23 09:36:20 +00:00
|
|
|
|
if post_json_object.get('type'):
|
|
|
|
|
print('DEBUG: ' + post_json_object['type'] +
|
|
|
|
|
' type within object is not a string ' +
|
|
|
|
|
str(post_json_object))
|
2021-10-13 10:11:02 +00:00
|
|
|
|
if debug:
|
2021-12-25 22:09:19 +00:00
|
|
|
|
print('No type field within object ' + post_json_object['id'])
|
2021-10-13 10:11:02 +00:00
|
|
|
|
return False
|
2021-10-13 10:37:52 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 15:54:46 +00:00
|
|
|
|
def has_object_string_object(post_json_object: {}, debug: bool) -> bool:
|
2021-10-13 10:37:52 +00:00
|
|
|
|
"""Does the given post have an object string field within an object dict?
|
|
|
|
|
"""
|
2021-12-26 10:57:03 +00:00
|
|
|
|
if not has_object_dict(post_json_object):
|
2021-10-13 10:37:52 +00:00
|
|
|
|
if debug:
|
2022-04-09 15:11:22 +00:00
|
|
|
|
print('has_object_string_type no object found')
|
2021-10-13 10:37:52 +00:00
|
|
|
|
return False
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object['object'].get('object'):
|
|
|
|
|
if isinstance(post_json_object['object']['object'], str):
|
2021-10-13 10:37:52 +00:00
|
|
|
|
return True
|
2022-05-30 20:47:23 +00:00
|
|
|
|
if debug:
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object.get('type'):
|
|
|
|
|
print('DEBUG: ' + post_json_object['type'] +
|
2021-10-13 10:37:52 +00:00
|
|
|
|
' object within dict is not a string')
|
|
|
|
|
if debug:
|
2021-12-25 22:09:19 +00:00
|
|
|
|
print('No object field within dict ' + post_json_object['id'])
|
2021-10-13 10:37:52 +00:00
|
|
|
|
return False
|
2021-10-13 11:15:06 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 17:12:07 +00:00
|
|
|
|
def has_object_string(post_json_object: {}, debug: bool) -> bool:
|
2021-10-13 11:15:06 +00:00
|
|
|
|
"""Does the given post have an object string field?
|
|
|
|
|
"""
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object.get('object'):
|
|
|
|
|
if isinstance(post_json_object['object'], str):
|
2021-10-13 11:15:06 +00:00
|
|
|
|
return True
|
2021-12-28 15:13:51 +00:00
|
|
|
|
if debug:
|
2021-12-25 22:09:19 +00:00
|
|
|
|
if post_json_object.get('type'):
|
|
|
|
|
print('DEBUG: ' + post_json_object['type'] +
|
2021-10-13 11:15:06 +00:00
|
|
|
|
' object is not a string')
|
|
|
|
|
if debug:
|
2021-12-25 22:09:19 +00:00
|
|
|
|
print('No object field within post ' + post_json_object['id'])
|
2021-10-13 11:15:06 +00:00
|
|
|
|
return False
|
2021-11-03 11:25:26 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 16:59:38 +00:00
|
|
|
|
def get_new_post_endpoints() -> []:
|
2021-11-03 11:25:26 +00:00
|
|
|
|
"""Returns a list of endpoints for new posts
|
|
|
|
|
"""
|
|
|
|
|
return (
|
|
|
|
|
'newpost', 'newblog', 'newunlisted', 'newfollowers', 'newdm',
|
2021-11-03 11:32:38 +00:00
|
|
|
|
'newreminder', 'newreport', 'newquestion', 'newshare', 'newwanted',
|
2024-01-01 23:45:54 +00:00
|
|
|
|
'editblogpost', 'newreadingstatus'
|
2021-11-03 11:25:26 +00:00
|
|
|
|
)
|
2021-12-17 12:01:54 +00:00
|
|
|
|
|
|
|
|
|
|
2021-12-26 16:59:38 +00:00
|
|
|
|
def get_fav_filename_from_url(base_dir: str, favicon_url: str) -> str:
|
2021-12-17 12:01:54 +00:00
|
|
|
|
"""Returns the cached filename for a favicon based upon its url
|
|
|
|
|
"""
|
2021-12-26 16:59:38 +00:00
|
|
|
|
if '://' in favicon_url:
|
|
|
|
|
favicon_url = favicon_url.split('://')[1]
|
|
|
|
|
if '/favicon.' in favicon_url:
|
|
|
|
|
favicon_url = favicon_url.replace('/favicon.', '.')
|
|
|
|
|
return base_dir + '/favicons/' + favicon_url.replace('/', '-')
|
2022-01-13 15:10:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def valid_hash_tag(hashtag: str) -> bool:
|
|
|
|
|
"""Returns true if the give hashtag contains valid characters
|
|
|
|
|
"""
|
|
|
|
|
# long hashtags are not valid
|
|
|
|
|
if len(hashtag) >= 32:
|
|
|
|
|
return False
|
2022-05-20 10:17:53 +00:00
|
|
|
|
# numbers are not permitted to be hashtags
|
|
|
|
|
if hashtag.isdigit():
|
|
|
|
|
return False
|
2022-01-13 15:10:41 +00:00
|
|
|
|
if set(hashtag).issubset(VALID_HASHTAG_CHARS):
|
|
|
|
|
return True
|
|
|
|
|
if _is_valid_language(hashtag):
|
|
|
|
|
return True
|
|
|
|
|
return False
|
2022-02-25 19:12:40 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def convert_published_to_local_timezone(published, timezone: str) -> str:
|
|
|
|
|
"""Converts a post published time into local time
|
|
|
|
|
"""
|
|
|
|
|
from_zone = tz.gettz('UTC')
|
|
|
|
|
if timezone:
|
2022-02-25 21:00:53 +00:00
|
|
|
|
try:
|
|
|
|
|
to_zone = tz.gettz(timezone)
|
|
|
|
|
except BaseException:
|
|
|
|
|
pass
|
|
|
|
|
if not timezone:
|
|
|
|
|
return published
|
2022-02-25 19:12:40 +00:00
|
|
|
|
|
|
|
|
|
utc = published.replace(tzinfo=from_zone)
|
|
|
|
|
local_time = utc.astimezone(to_zone)
|
|
|
|
|
return local_time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_account_timezones(base_dir: str) -> {}:
|
|
|
|
|
"""Returns a dictionary containing the preferred timezone for each account
|
|
|
|
|
"""
|
|
|
|
|
account_timezone = {}
|
2024-05-12 12:35:26 +00:00
|
|
|
|
dir_str = data_dir(base_dir)
|
|
|
|
|
for _, dirs, _ in os.walk(dir_str):
|
2022-02-25 19:12:40 +00:00
|
|
|
|
for acct in dirs:
|
|
|
|
|
if '@' not in acct:
|
|
|
|
|
continue
|
|
|
|
|
if acct.startswith('inbox@') or acct.startswith('Actor@'):
|
|
|
|
|
continue
|
2024-05-12 12:35:26 +00:00
|
|
|
|
acct_directory = os.path.join(dir_str, acct)
|
2022-05-30 20:47:23 +00:00
|
|
|
|
tz_filename = acct_directory + '/timezone.txt'
|
2022-02-25 19:12:40 +00:00
|
|
|
|
if not os.path.isfile(tz_filename):
|
|
|
|
|
continue
|
|
|
|
|
timezone = None
|
2024-07-13 22:56:02 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(tz_filename, 'r', encoding='utf-8') as fp_timezone:
|
|
|
|
|
timezone = fp_timezone.read().strip()
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: load_account_timezones unable to read ' +
|
|
|
|
|
tz_filename)
|
2022-02-25 19:12:40 +00:00
|
|
|
|
if timezone:
|
|
|
|
|
nickname = acct.split('@')[0]
|
|
|
|
|
account_timezone[nickname] = timezone
|
|
|
|
|
break
|
|
|
|
|
return account_timezone
|
|
|
|
|
|
|
|
|
|
|
2022-03-24 13:14:41 +00:00
|
|
|
|
def load_bold_reading(base_dir: str) -> {}:
|
|
|
|
|
"""Returns a dictionary containing the bold reading status for each account
|
|
|
|
|
"""
|
|
|
|
|
bold_reading = {}
|
2024-05-12 12:35:26 +00:00
|
|
|
|
dir_str = data_dir(base_dir)
|
|
|
|
|
for _, dirs, _ in os.walk(dir_str):
|
2022-03-24 13:14:41 +00:00
|
|
|
|
for acct in dirs:
|
|
|
|
|
if '@' not in acct:
|
|
|
|
|
continue
|
|
|
|
|
if acct.startswith('inbox@') or acct.startswith('Actor@'):
|
|
|
|
|
continue
|
2024-05-12 12:35:26 +00:00
|
|
|
|
bold_reading_filename = dir_str + '/' + acct + '/.boldReading'
|
2022-03-24 13:14:41 +00:00
|
|
|
|
if os.path.isfile(bold_reading_filename):
|
|
|
|
|
nickname = acct.split('@')[0]
|
|
|
|
|
bold_reading[nickname] = True
|
2022-05-30 20:47:23 +00:00
|
|
|
|
break
|
2022-03-24 13:14:41 +00:00
|
|
|
|
return bold_reading
|
|
|
|
|
|
|
|
|
|
|
2023-12-24 15:53:11 +00:00
|
|
|
|
def load_hide_follows(base_dir: str) -> {}:
|
|
|
|
|
"""Returns a dictionary containing the hide follows status for each account
|
|
|
|
|
"""
|
|
|
|
|
hide_follows = {}
|
2024-05-12 12:35:26 +00:00
|
|
|
|
dir_str = data_dir(base_dir)
|
|
|
|
|
for _, dirs, _ in os.walk(dir_str):
|
2023-12-24 15:53:11 +00:00
|
|
|
|
for acct in dirs:
|
|
|
|
|
if '@' not in acct:
|
|
|
|
|
continue
|
|
|
|
|
if acct.startswith('inbox@') or acct.startswith('Actor@'):
|
|
|
|
|
continue
|
2024-05-12 12:35:26 +00:00
|
|
|
|
hide_follows_filename = dir_str + '/' + acct + '/.hideFollows'
|
2023-12-24 15:53:11 +00:00
|
|
|
|
if os.path.isfile(hide_follows_filename):
|
|
|
|
|
nickname = acct.split('@')[0]
|
|
|
|
|
hide_follows[nickname] = True
|
|
|
|
|
break
|
|
|
|
|
return hide_follows
|
|
|
|
|
|
|
|
|
|
|
2022-02-25 19:12:40 +00:00
|
|
|
|
def get_account_timezone(base_dir: str, nickname: str, domain: str) -> str:
|
|
|
|
|
"""Returns the timezone for the given account
|
|
|
|
|
"""
|
|
|
|
|
tz_filename = \
|
2022-12-18 13:58:48 +00:00
|
|
|
|
acct_dir(base_dir, nickname, domain) + '/timezone.txt'
|
2022-02-25 19:12:40 +00:00
|
|
|
|
if not os.path.isfile(tz_filename):
|
|
|
|
|
return None
|
|
|
|
|
timezone = None
|
2024-07-13 22:56:02 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(tz_filename, 'r', encoding='utf-8') as fp_timezone:
|
|
|
|
|
timezone = fp_timezone.read().strip()
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: get_account_timezone unable to read ' + tz_filename)
|
2022-02-25 19:12:40 +00:00
|
|
|
|
return timezone
|
2022-02-25 19:57:31 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_account_timezone(base_dir: str, nickname: str, domain: str,
|
|
|
|
|
timezone: str) -> None:
|
|
|
|
|
"""Sets the timezone for the given account
|
|
|
|
|
"""
|
|
|
|
|
tz_filename = \
|
2022-12-18 13:58:48 +00:00
|
|
|
|
acct_dir(base_dir, nickname, domain) + '/timezone.txt'
|
2022-02-25 19:57:31 +00:00
|
|
|
|
timezone = timezone.strip()
|
2024-02-01 13:30:59 +00:00
|
|
|
|
try:
|
|
|
|
|
with open(tz_filename, 'w+', encoding='utf-8') as fp_timezone:
|
|
|
|
|
fp_timezone.write(timezone)
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: set_account_timezone unable to write ' +
|
|
|
|
|
tz_filename)
|
2022-03-12 19:46:47 +00:00
|
|
|
|
|
|
|
|
|
|
2024-01-28 19:11:55 +00:00
|
|
|
|
def _is_onion_request(calling_domain: str, referer_domain: str,
|
|
|
|
|
domain: str, onion_domain: str) -> bool:
|
2022-03-12 19:46:47 +00:00
|
|
|
|
"""Do the given domains indicate that this is a request
|
|
|
|
|
from an onion instance
|
|
|
|
|
"""
|
|
|
|
|
if not onion_domain:
|
|
|
|
|
return False
|
|
|
|
|
if domain == onion_domain:
|
|
|
|
|
return True
|
|
|
|
|
if calling_domain.endswith('.onion'):
|
|
|
|
|
return True
|
|
|
|
|
if not referer_domain:
|
|
|
|
|
return False
|
|
|
|
|
if referer_domain.endswith('.onion'):
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2024-01-28 19:11:55 +00:00
|
|
|
|
def _is_i2p_request(calling_domain: str, referer_domain: str,
|
|
|
|
|
domain: str, i2p_domain: str) -> bool:
|
2022-03-12 19:46:47 +00:00
|
|
|
|
"""Do the given domains indicate that this is a request
|
|
|
|
|
from an i2p instance
|
|
|
|
|
"""
|
|
|
|
|
if not i2p_domain:
|
|
|
|
|
return False
|
|
|
|
|
if domain == i2p_domain:
|
|
|
|
|
return True
|
|
|
|
|
if calling_domain.endswith('.i2p'):
|
|
|
|
|
return True
|
|
|
|
|
if not referer_domain:
|
|
|
|
|
return False
|
|
|
|
|
if referer_domain.endswith('.i2p'):
|
|
|
|
|
return True
|
|
|
|
|
return False
|
2022-05-08 17:15:48 +00:00
|
|
|
|
|
|
|
|
|
|
2023-08-04 09:38:48 +00:00
|
|
|
|
def disallow_announce(content: str, attachment: [], capabilities: {}) -> bool:
|
2022-05-08 17:15:48 +00:00
|
|
|
|
"""Are announces/boosts not allowed for the given post?
|
|
|
|
|
"""
|
2023-08-04 09:38:48 +00:00
|
|
|
|
# pixelfed style capabilities
|
2023-12-10 22:58:05 +00:00
|
|
|
|
if capabilities:
|
|
|
|
|
if 'announce' in capabilities:
|
|
|
|
|
if isinstance(capabilities['announce'], str):
|
|
|
|
|
if not capabilities['announce'].endswith('#Public'):
|
|
|
|
|
# TODO handle non-public announce permissions
|
|
|
|
|
print('CAPABILITIES: announce ' + capabilities['announce'])
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
# capabilities exist but with no announce defined
|
|
|
|
|
return True
|
2023-08-04 09:38:48 +00:00
|
|
|
|
|
|
|
|
|
# emojis
|
2022-05-08 17:15:48 +00:00
|
|
|
|
disallow_strings = (
|
|
|
|
|
':boost_no:',
|
|
|
|
|
':noboost:',
|
|
|
|
|
':noboosts:',
|
|
|
|
|
':no_boost:',
|
|
|
|
|
':no_boosts:',
|
|
|
|
|
':boosts_no:',
|
|
|
|
|
'dont_repeat',
|
|
|
|
|
'dont_announce',
|
2022-06-03 09:28:34 +00:00
|
|
|
|
'dont_boost',
|
|
|
|
|
'do not boost',
|
2022-06-14 20:38:56 +00:00
|
|
|
|
"don't boost",
|
|
|
|
|
'boost_denied',
|
|
|
|
|
'boosts_denied',
|
2022-09-11 18:09:32 +00:00
|
|
|
|
'boostdenied',
|
|
|
|
|
'boostsdenied'
|
2022-05-08 17:15:48 +00:00
|
|
|
|
)
|
2022-09-11 18:09:32 +00:00
|
|
|
|
content_lower = content.lower()
|
2022-05-08 17:15:48 +00:00
|
|
|
|
for diss in disallow_strings:
|
2022-09-11 18:09:32 +00:00
|
|
|
|
if diss in content_lower:
|
2022-05-08 17:15:48 +00:00
|
|
|
|
return True
|
2022-12-29 11:50:51 +00:00
|
|
|
|
|
|
|
|
|
# check for attached images without descriptions
|
|
|
|
|
if isinstance(attachment, list):
|
|
|
|
|
for item in attachment:
|
2022-12-29 11:55:46 +00:00
|
|
|
|
if not isinstance(item, dict):
|
2022-12-29 11:50:51 +00:00
|
|
|
|
continue
|
|
|
|
|
if not item.get('mediaType'):
|
|
|
|
|
continue
|
|
|
|
|
if not item.get('url'):
|
|
|
|
|
continue
|
|
|
|
|
if not item['mediaType'].startswith('image/'):
|
|
|
|
|
continue
|
|
|
|
|
if not item.get('name'):
|
|
|
|
|
# no image description
|
|
|
|
|
return True
|
2024-03-23 20:18:28 +00:00
|
|
|
|
image_description = item['name']
|
|
|
|
|
if not isinstance(image_description, str):
|
|
|
|
|
continue
|
|
|
|
|
if len(image_description) < 5:
|
|
|
|
|
# not enough description
|
|
|
|
|
return True
|
2022-05-08 17:15:48 +00:00
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def disallow_reply(content: str) -> bool:
|
|
|
|
|
"""Are replies not allowed for the given post?
|
|
|
|
|
"""
|
|
|
|
|
disallow_strings = (
|
|
|
|
|
':reply_no:',
|
|
|
|
|
':noreply:',
|
|
|
|
|
':noreplies:',
|
|
|
|
|
':no_reply:',
|
|
|
|
|
':no_replies:',
|
2024-04-03 09:42:58 +00:00
|
|
|
|
':no_responses:',
|
2022-05-08 17:15:48 +00:00
|
|
|
|
':replies_no:',
|
2022-06-03 09:28:34 +00:00
|
|
|
|
'dont_at_me',
|
|
|
|
|
'do not reply',
|
2022-06-14 20:38:56 +00:00
|
|
|
|
"don't reply",
|
2022-07-13 20:50:22 +00:00
|
|
|
|
"don't @ me",
|
2022-09-11 18:09:32 +00:00
|
|
|
|
'dont@me',
|
2024-04-03 09:36:29 +00:00
|
|
|
|
'dontatme',
|
|
|
|
|
'noresponses'
|
2022-05-08 17:15:48 +00:00
|
|
|
|
)
|
2022-09-11 18:09:32 +00:00
|
|
|
|
content_lower = content.lower()
|
2022-05-08 17:15:48 +00:00
|
|
|
|
for diss in disallow_strings:
|
2022-09-11 18:09:32 +00:00
|
|
|
|
if diss in content_lower:
|
2022-05-08 17:15:48 +00:00
|
|
|
|
return True
|
|
|
|
|
return False
|
2022-05-11 17:17:23 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_attachment_property_value(property_value: {}) -> (str, str):
|
|
|
|
|
"""Returns the fieldname and value for an attachment property
|
|
|
|
|
"""
|
|
|
|
|
prop_value = None
|
|
|
|
|
prop_value_name = None
|
|
|
|
|
if property_value.get('value'):
|
|
|
|
|
prop_value = property_value['value']
|
|
|
|
|
prop_value_name = 'value'
|
|
|
|
|
elif property_value.get('http://schema.org#value'):
|
|
|
|
|
prop_value_name = 'http://schema.org#value'
|
2022-05-11 19:05:21 +00:00
|
|
|
|
prop_value = property_value[prop_value_name]
|
2022-05-11 17:17:23 +00:00
|
|
|
|
elif property_value.get('https://schema.org#value'):
|
|
|
|
|
prop_value_name = 'https://schema.org#value'
|
2022-05-11 19:05:21 +00:00
|
|
|
|
prop_value = property_value[prop_value_name]
|
2023-07-09 10:41:51 +00:00
|
|
|
|
elif property_value.get('href'):
|
|
|
|
|
prop_value_name = 'href'
|
|
|
|
|
prop_value = property_value[prop_value_name]
|
2022-05-11 17:17:23 +00:00
|
|
|
|
return prop_value_name, prop_value
|
2022-07-22 10:54:57 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def safe_system_string(text: str) -> str:
|
|
|
|
|
"""Returns a safe version of a string which can be used within a
|
|
|
|
|
system command
|
|
|
|
|
"""
|
|
|
|
|
text = text.replace('$(', '(').replace('`', '')
|
|
|
|
|
return text
|
2022-09-13 09:27:43 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_json_content_from_accept(accept: str) -> str:
|
|
|
|
|
"""returns the json content type for the given accept
|
|
|
|
|
"""
|
|
|
|
|
protocol_str = 'application/json'
|
|
|
|
|
if accept:
|
|
|
|
|
if 'application/ld+json' in accept:
|
|
|
|
|
protocol_str = 'application/ld+json'
|
|
|
|
|
return protocol_str
|
2022-09-25 17:26:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def remove_inverted_text(text: str, system_language: str) -> str:
|
|
|
|
|
"""Removes any inverted text from the given string
|
|
|
|
|
"""
|
|
|
|
|
if system_language != 'en':
|
|
|
|
|
return text
|
|
|
|
|
|
2024-02-02 13:03:50 +00:00
|
|
|
|
text = uninvert_text(text)
|
|
|
|
|
|
2022-09-25 20:12:10 +00:00
|
|
|
|
inverted_lower = [*"_ʎ_ʍʌ_ʇ_ɹ____ɯʃʞɾıɥƃɟǝ_ɔ_ɐ"]
|
|
|
|
|
inverted_upper = [*"_⅄__ᴧ∩⊥_ᴚΌԀ_ᴎ_⅂⋊ſ__⅁ℲƎ◖Ↄ𐐒∀"]
|
2022-09-25 19:47:15 +00:00
|
|
|
|
|
|
|
|
|
start_separator = ''
|
|
|
|
|
separator = '\n'
|
|
|
|
|
if '</p>' in text:
|
|
|
|
|
text = text.replace('<p>', '')
|
|
|
|
|
start_separator = '<p>'
|
|
|
|
|
separator = '</p>'
|
|
|
|
|
paragraphs = text.split(separator)
|
|
|
|
|
new_text = ''
|
2022-09-26 09:37:44 +00:00
|
|
|
|
inverted_list = (inverted_lower, inverted_upper)
|
|
|
|
|
z_value = (ord('z'), ord('Z'))
|
2022-09-25 19:47:15 +00:00
|
|
|
|
for para in paragraphs:
|
|
|
|
|
replaced_chars = 0
|
|
|
|
|
|
2022-09-26 09:37:44 +00:00
|
|
|
|
for idx in range(2):
|
|
|
|
|
index = 0
|
|
|
|
|
for test_ch in inverted_list[idx]:
|
|
|
|
|
if test_ch == '_':
|
|
|
|
|
index += 1
|
|
|
|
|
continue
|
|
|
|
|
if test_ch in para:
|
|
|
|
|
para = para.replace(test_ch, chr(z_value[idx] - index))
|
|
|
|
|
replaced_chars += 1
|
2022-09-25 19:47:15 +00:00
|
|
|
|
index += 1
|
|
|
|
|
|
|
|
|
|
if replaced_chars > 2:
|
|
|
|
|
para = para[::-1]
|
|
|
|
|
if para:
|
|
|
|
|
new_text += start_separator + para
|
|
|
|
|
if separator in text:
|
|
|
|
|
new_text += separator
|
|
|
|
|
|
|
|
|
|
return new_text
|
2022-10-05 17:55:24 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def remove_square_capitals(text: str, system_language: str) -> str:
|
|
|
|
|
"""Removes any square capital text from the given string
|
|
|
|
|
"""
|
|
|
|
|
if system_language != 'en':
|
|
|
|
|
return text
|
|
|
|
|
offset = ord('A')
|
|
|
|
|
start_value = ord('🅰')
|
|
|
|
|
end_value = start_value + 26
|
|
|
|
|
result = ''
|
|
|
|
|
for text_ch in text:
|
|
|
|
|
text_value = ord(text_ch)
|
|
|
|
|
if text_value < start_value or text_value > end_value:
|
|
|
|
|
result += text_ch
|
|
|
|
|
else:
|
|
|
|
|
result += chr(offset + text_value - start_value)
|
|
|
|
|
return result
|
2022-11-11 11:26:17 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def dont_speak_hashtags(content: str) -> str:
|
|
|
|
|
"""Ensure that hashtags aren't announced by screen readers
|
|
|
|
|
"""
|
|
|
|
|
if not content:
|
|
|
|
|
return content
|
|
|
|
|
return content.replace('>#<span',
|
2022-11-11 11:40:43 +00:00
|
|
|
|
'><span aria-hidden="true">#</span><span')
|
2022-11-13 19:45:34 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_min_images_for_accounts(base_dir: str) -> []:
|
|
|
|
|
"""Loads a list of nicknames for accounts where all images should
|
|
|
|
|
be minimized by default
|
|
|
|
|
"""
|
2024-12-23 18:23:47 +00:00
|
|
|
|
min_images_for_accounts: list[str] = []
|
2024-05-12 12:35:26 +00:00
|
|
|
|
dir_str = data_dir(base_dir)
|
|
|
|
|
for subdir, dirs, _ in os.walk(dir_str):
|
2022-11-13 19:45:34 +00:00
|
|
|
|
for account in dirs:
|
|
|
|
|
if not is_account_dir(account):
|
|
|
|
|
continue
|
|
|
|
|
filename = os.path.join(subdir, account) + '/.minimize_all_images'
|
|
|
|
|
if os.path.isfile(filename):
|
|
|
|
|
min_images_for_accounts.append(account.split('@')[0])
|
|
|
|
|
break
|
|
|
|
|
return min_images_for_accounts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_minimize_all_images(base_dir: str,
|
|
|
|
|
nickname: str, domain: str,
|
|
|
|
|
minimize: bool,
|
|
|
|
|
min_images_for_accounts: []) -> None:
|
|
|
|
|
"""Add of remove a file indicating that all images for an account
|
|
|
|
|
should be minimized by default
|
|
|
|
|
"""
|
|
|
|
|
filename = acct_dir(base_dir, nickname, domain) + '/.minimize_all_images'
|
|
|
|
|
if minimize:
|
|
|
|
|
if nickname not in min_images_for_accounts:
|
|
|
|
|
min_images_for_accounts.append(nickname)
|
|
|
|
|
if not os.path.isfile(filename):
|
|
|
|
|
try:
|
|
|
|
|
with open(filename, 'w+', encoding='utf-8') as fp_min:
|
|
|
|
|
fp_min.write('\n')
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: unable to write ' + filename)
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
if nickname in min_images_for_accounts:
|
|
|
|
|
min_images_for_accounts.remove(nickname)
|
|
|
|
|
if os.path.isfile(filename):
|
|
|
|
|
try:
|
|
|
|
|
os.remove(filename)
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: unable to delete ' + filename)
|
2022-11-26 15:39:36 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_reverse_timeline(base_dir: str) -> []:
|
|
|
|
|
"""Loads flags for each user indicating whether they prefer to
|
|
|
|
|
see reversed timelines
|
|
|
|
|
"""
|
2024-12-23 18:23:47 +00:00
|
|
|
|
reverse_sequence: list[str] = []
|
2024-05-12 12:35:26 +00:00
|
|
|
|
dir_str = data_dir(base_dir)
|
|
|
|
|
for _, dirs, _ in os.walk(dir_str):
|
2022-11-26 15:39:36 +00:00
|
|
|
|
for acct in dirs:
|
|
|
|
|
if not is_account_dir(acct):
|
|
|
|
|
continue
|
|
|
|
|
nickname = acct.split('@')[0]
|
|
|
|
|
domain = acct.split('@')[1]
|
|
|
|
|
reverse_filename = \
|
|
|
|
|
acct_dir(base_dir, nickname, domain) + '/.reverse_timeline'
|
|
|
|
|
if os.path.isfile(reverse_filename):
|
|
|
|
|
if nickname not in reverse_sequence:
|
|
|
|
|
reverse_sequence.append(nickname)
|
|
|
|
|
break
|
|
|
|
|
return reverse_sequence
|
2022-11-26 16:27:01 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_reverse_timeline(base_dir: str, reverse_sequence: []) -> []:
|
|
|
|
|
"""Saves flags for each user indicating whether they prefer to
|
|
|
|
|
see reversed timelines
|
|
|
|
|
"""
|
2024-05-12 12:35:26 +00:00
|
|
|
|
dir_str = data_dir(base_dir)
|
|
|
|
|
for _, dirs, _ in os.walk(dir_str):
|
2022-11-26 16:27:01 +00:00
|
|
|
|
for acct in dirs:
|
|
|
|
|
if not is_account_dir(acct):
|
|
|
|
|
continue
|
|
|
|
|
nickname = acct.split('@')[0]
|
|
|
|
|
domain = acct.split('@')[1]
|
|
|
|
|
reverse_filename = \
|
|
|
|
|
acct_dir(base_dir, nickname, domain) + '/.reverse_timeline'
|
|
|
|
|
if nickname in reverse_sequence:
|
|
|
|
|
if not os.path.isfile(reverse_filename):
|
|
|
|
|
try:
|
|
|
|
|
with open(reverse_filename, 'w+',
|
|
|
|
|
encoding='utf-8') as fp_reverse:
|
|
|
|
|
fp_reverse.write('\n')
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: failed to save reverse ' + reverse_filename)
|
|
|
|
|
else:
|
|
|
|
|
if os.path.isfile(reverse_filename):
|
|
|
|
|
try:
|
|
|
|
|
os.remove(reverse_filename)
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: failed to delete reverse ' +
|
|
|
|
|
reverse_filename)
|
|
|
|
|
break
|
2022-12-24 15:33:26 +00:00
|
|
|
|
|
|
|
|
|
|
2024-04-20 11:07:45 +00:00
|
|
|
|
def get_quote_toot_url(post_json_object: str) -> str:
|
2024-04-20 13:27:06 +00:00
|
|
|
|
""" Returns the url for a quote toot
|
2024-04-20 11:07:45 +00:00
|
|
|
|
"""
|
|
|
|
|
# adhoc quote toot implementations
|
|
|
|
|
object_quote_url_fields = ('quoteUri', 'quoteUrl', 'quoteReply',
|
|
|
|
|
'toot:quoteReply', '_misskey_quote')
|
|
|
|
|
for fieldname in object_quote_url_fields:
|
2024-08-05 10:04:05 +00:00
|
|
|
|
if not post_json_object['object'].get(fieldname):
|
|
|
|
|
continue
|
|
|
|
|
quote_url = post_json_object['object'][fieldname]
|
|
|
|
|
if isinstance(quote_url, str):
|
|
|
|
|
if resembles_url(quote_url):
|
|
|
|
|
return remove_html(quote_url)
|
2024-04-20 11:07:45 +00:00
|
|
|
|
|
2023-01-08 13:16:54 +00:00
|
|
|
|
# More correct ActivityPub implementation - adding a Link tag
|
2024-08-05 09:38:09 +00:00
|
|
|
|
if not post_json_object['object'].get('tag'):
|
|
|
|
|
return ''
|
|
|
|
|
|
|
|
|
|
if not isinstance(post_json_object['object']['tag'], list):
|
|
|
|
|
return ''
|
|
|
|
|
|
|
|
|
|
for item in post_json_object['object']['tag']:
|
|
|
|
|
if not isinstance(item, dict):
|
|
|
|
|
continue
|
|
|
|
|
if item.get('rel'):
|
|
|
|
|
mk_quote = False
|
|
|
|
|
if isinstance(item['rel'], list):
|
|
|
|
|
for rel_str in item['rel']:
|
|
|
|
|
if not isinstance(rel_str, str):
|
|
|
|
|
continue
|
|
|
|
|
if '_misskey_quote' in rel_str:
|
|
|
|
|
mk_quote = True
|
|
|
|
|
elif isinstance(item['rel'], str):
|
|
|
|
|
if '_misskey_quote' in item['rel']:
|
|
|
|
|
mk_quote = True
|
|
|
|
|
if mk_quote and item.get('href'):
|
|
|
|
|
if isinstance(item['href'], str):
|
|
|
|
|
if resembles_url(item['href']):
|
|
|
|
|
return remove_html(item['href'])
|
|
|
|
|
if not item.get('type'):
|
|
|
|
|
continue
|
|
|
|
|
if not item.get('mediaType'):
|
|
|
|
|
continue
|
|
|
|
|
if not isinstance(item['type'], str):
|
|
|
|
|
continue
|
|
|
|
|
if item['type'] != 'Link':
|
|
|
|
|
continue
|
|
|
|
|
if not isinstance(item['mediaType'], str):
|
|
|
|
|
continue
|
|
|
|
|
if 'json' not in item['mediaType']:
|
|
|
|
|
continue
|
|
|
|
|
if item.get('href'):
|
|
|
|
|
if isinstance(item['href'], str):
|
|
|
|
|
if resembles_url(item['href']):
|
|
|
|
|
return remove_html(item['href'])
|
2024-04-20 11:07:45 +00:00
|
|
|
|
return ''
|
|
|
|
|
|
|
|
|
|
|
2024-04-20 09:59:56 +00:00
|
|
|
|
def quote_toots_allowed(base_dir: str, nickname: str, domain: str,
|
|
|
|
|
sender_nickname: str, sender_domain: str) -> bool:
|
|
|
|
|
""" Returns true if quote toots are allowed by the given account
|
|
|
|
|
for the given sender
|
|
|
|
|
"""
|
|
|
|
|
account_dir = acct_dir(base_dir, nickname, domain)
|
|
|
|
|
quotes_enabled_filename = account_dir + '/.allowQuotes'
|
|
|
|
|
if os.path.isfile(quotes_enabled_filename):
|
|
|
|
|
# check blocks on individual sending accounts
|
|
|
|
|
quotes_blocked_filename = account_dir + '/quotesblocked.txt'
|
|
|
|
|
if sender_nickname is None:
|
|
|
|
|
return True
|
|
|
|
|
if os.path.isfile(quotes_blocked_filename):
|
|
|
|
|
sender_handle = sender_nickname + '@' + sender_domain
|
2024-04-20 13:27:06 +00:00
|
|
|
|
if text_in_file(sender_handle, quotes_blocked_filename, False):
|
2024-04-20 09:59:56 +00:00
|
|
|
|
# quote toots not permitted from this sender
|
|
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2023-11-01 20:29:02 +00:00
|
|
|
|
def license_link_from_name(license_name: str) -> str:
|
2022-12-27 21:30:20 +00:00
|
|
|
|
"""Returns the license link from its name
|
|
|
|
|
"""
|
2023-11-01 20:29:02 +00:00
|
|
|
|
if '://' in license_name:
|
|
|
|
|
return license_name
|
|
|
|
|
value_upper = license_name.upper()
|
2024-08-02 12:00:07 +00:00
|
|
|
|
cc_strings1 = ('CC-BY-SA-NC', 'CC-BY-NC-SA', 'CC BY SA NC', 'CC BY NC SA')
|
|
|
|
|
cc_strings2 = ('CC-BY-SA', 'CC-SA-BY', 'CC BY SA', 'CC SA BY')
|
|
|
|
|
if string_contains(value_upper, cc_strings1):
|
2022-12-27 21:30:20 +00:00
|
|
|
|
value = 'https://creativecommons.org/licenses/by-nc-sa/4.0'
|
2024-08-02 12:00:07 +00:00
|
|
|
|
elif string_contains(value_upper, cc_strings2):
|
2022-12-27 21:30:20 +00:00
|
|
|
|
value = 'https://creativecommons.org/licenses/by-sa/4.0'
|
|
|
|
|
elif 'CC-BY-NC' in value_upper or 'CC BY NC' in value_upper:
|
|
|
|
|
value = 'https://creativecommons.org/licenses/by-nc/4.0'
|
|
|
|
|
elif 'CC-BY-ND' in value_upper or 'CC BY ND' in value_upper:
|
|
|
|
|
value = 'https://creativecommons.org/licenses/by-nc-nd/4.0'
|
|
|
|
|
elif 'CC-BY' in value_upper or 'CC BY' in value_upper:
|
|
|
|
|
value = 'https://creativecommons.org/licenses/by/4.0'
|
|
|
|
|
elif 'GFDL' in value_upper or 'GNU FREE DOC' in value_upper:
|
|
|
|
|
value = 'https://www.gnu.org/licenses/fdl-1.3.html'
|
|
|
|
|
elif 'OPL' in value_upper or 'OPEN PUBLICATION LIC' in value_upper:
|
|
|
|
|
value = 'https://opencontent.org/openpub'
|
2023-01-16 17:52:54 +00:00
|
|
|
|
elif 'PDDL' in value_upper or 'OPEN DATA COMMONS PUBLIC' in value_upper:
|
|
|
|
|
value = 'https://opendatacommons.org/licenses/pddl'
|
|
|
|
|
elif 'ODBL' in value_upper or 'OPEN DATA COMMONS OPEN' in value_upper:
|
|
|
|
|
value = 'https://opendatacommons.org/licenses/odbl'
|
|
|
|
|
elif 'ODC' in value_upper or 'OPEN DATA COMMONS ATTR' in value_upper:
|
|
|
|
|
value = 'https://opendatacommons.org/licenses/by'
|
|
|
|
|
elif 'OGL' in value_upper or 'OPEN GOVERNMENT LIC' in value_upper:
|
|
|
|
|
value = \
|
|
|
|
|
'https://www.nationalarchives.gov.uk/doc/open-government-licence'
|
2022-12-27 21:30:20 +00:00
|
|
|
|
elif 'PDL' in value_upper or \
|
|
|
|
|
'PUBLIC DOCUMENTATION LIC' in value_upper:
|
|
|
|
|
value = 'http://www.openoffice.org/licenses/PDL.html'
|
|
|
|
|
elif 'FREEBSD' in value_upper:
|
|
|
|
|
value = 'https://www.freebsd.org/copyright/freebsd-doc-license'
|
|
|
|
|
elif 'WTF' in value_upper:
|
|
|
|
|
value = 'http://www.wtfpl.net/txt/copying'
|
|
|
|
|
elif 'UNLICENSE' in value_upper:
|
|
|
|
|
value = 'https://unlicense.org'
|
|
|
|
|
else:
|
|
|
|
|
value = 'https://creativecommons.org/publicdomain/zero/1.0'
|
|
|
|
|
return value
|
2023-01-02 09:55:41 +00:00
|
|
|
|
|
|
|
|
|
|
2023-01-02 11:41:48 +00:00
|
|
|
|
def _get_escaped_chars() -> {}:
|
|
|
|
|
"""Returns escaped characters
|
2023-01-02 09:55:41 +00:00
|
|
|
|
"""
|
2023-01-02 11:41:48 +00:00
|
|
|
|
return {
|
2023-01-02 09:55:41 +00:00
|
|
|
|
"&": "&",
|
|
|
|
|
"<": "<",
|
|
|
|
|
">": ">",
|
|
|
|
|
'"': """,
|
|
|
|
|
"'": "'"
|
|
|
|
|
}
|
2023-01-02 11:41:48 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def escape_text(txt: str) -> str:
|
|
|
|
|
"""Escape text for inclusion in xml/rss
|
|
|
|
|
"""
|
|
|
|
|
for orig, replacement in _get_escaped_chars().items():
|
2023-01-02 09:55:41 +00:00
|
|
|
|
txt = txt.replace(orig, replacement)
|
|
|
|
|
return txt
|
2023-01-02 10:24:35 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def unescaped_text(txt: str) -> str:
|
|
|
|
|
"""Escape text for inclusion in xml/rss
|
|
|
|
|
"""
|
2023-01-02 11:41:48 +00:00
|
|
|
|
for orig, replacement in _get_escaped_chars().items():
|
2023-01-02 10:24:35 +00:00
|
|
|
|
txt = txt.replace(replacement, orig)
|
|
|
|
|
return txt
|
2023-01-07 11:45:19 +00:00
|
|
|
|
|
|
|
|
|
|
2024-07-09 10:36:23 +00:00
|
|
|
|
def valid_content_warning(summary: str) -> str:
|
|
|
|
|
"""Returns a validated content warning
|
|
|
|
|
"""
|
|
|
|
|
cw_str = remove_html(summary)
|
|
|
|
|
# hashtags within content warnings apparently cause a lot of trouble
|
|
|
|
|
# so remove them
|
|
|
|
|
if '#' in cw_str:
|
|
|
|
|
cw_str = cw_str.replace('#', '').replace(' ', ' ')
|
|
|
|
|
return remove_invalid_chars(cw_str)
|
|
|
|
|
|
|
|
|
|
|
2023-01-07 11:45:19 +00:00
|
|
|
|
def harmless_markup(post_json_object: {}) -> None:
|
|
|
|
|
"""render harmless any dangerous markup
|
|
|
|
|
"""
|
2024-06-22 13:58:55 +00:00
|
|
|
|
if not isinstance(post_json_object['object'], dict):
|
|
|
|
|
return
|
2024-06-21 20:59:12 +00:00
|
|
|
|
|
2024-08-20 09:56:23 +00:00
|
|
|
|
remove_trash = [' id="wordads-inline-marker"']
|
|
|
|
|
|
2024-06-22 13:58:55 +00:00
|
|
|
|
for field_name in ('content', 'summary'):
|
2023-01-07 11:45:19 +00:00
|
|
|
|
if post_json_object['object'].get(field_name):
|
2024-07-09 10:36:23 +00:00
|
|
|
|
# tidy up content warnings
|
|
|
|
|
if field_name == 'summary':
|
|
|
|
|
summary = post_json_object['object'][field_name]
|
|
|
|
|
post_json_object['object'][field_name] = \
|
|
|
|
|
valid_content_warning(summary)
|
|
|
|
|
|
2024-08-20 09:56:23 +00:00
|
|
|
|
text = post_json_object['object'][field_name]
|
|
|
|
|
|
|
|
|
|
# take out the trash
|
|
|
|
|
for trash in remove_trash:
|
|
|
|
|
if trash in text:
|
|
|
|
|
post_json_object['object'][field_name] = \
|
|
|
|
|
text.replace(trash, '')
|
|
|
|
|
|
|
|
|
|
# remove things which would cause display issues
|
|
|
|
|
if dangerous_markup(text, False, ['pre']):
|
|
|
|
|
post_json_object['object'][field_name] = remove_html(text)
|
2023-05-18 11:15:18 +00:00
|
|
|
|
post_json_object['object'][field_name] = \
|
2024-08-20 09:56:23 +00:00
|
|
|
|
remove_markup_tag(text, 'pre')
|
2024-06-21 20:59:12 +00:00
|
|
|
|
|
2023-01-07 11:45:19 +00:00
|
|
|
|
map_name = field_name + 'Map'
|
|
|
|
|
if post_json_object['object'].get(map_name):
|
2023-10-11 21:16:27 +00:00
|
|
|
|
if isinstance(post_json_object['object'][map_name], dict):
|
|
|
|
|
map_dict = post_json_object['object'][map_name].items()
|
|
|
|
|
for lang, content in map_dict:
|
|
|
|
|
if not isinstance(content, str):
|
|
|
|
|
continue
|
2024-07-09 10:41:24 +00:00
|
|
|
|
|
|
|
|
|
# tidy up language mapped content warnings
|
|
|
|
|
if field_name == 'summary':
|
|
|
|
|
post_json_object['object'][map_name][lang] = \
|
|
|
|
|
valid_content_warning(content)
|
|
|
|
|
content = post_json_object['object'][map_name][lang]
|
|
|
|
|
|
2024-08-20 09:56:23 +00:00
|
|
|
|
# take out the trash
|
|
|
|
|
for trash in remove_trash:
|
2024-08-20 10:42:34 +00:00
|
|
|
|
if trash in content:
|
2024-08-20 09:56:23 +00:00
|
|
|
|
post_json_object['object'][map_name][lang] = \
|
2024-08-20 10:42:34 +00:00
|
|
|
|
content.replace(trash, '')
|
2024-08-20 09:56:23 +00:00
|
|
|
|
|
|
|
|
|
# remove things which would cause display issues
|
2023-10-11 21:16:27 +00:00
|
|
|
|
if dangerous_markup(content, False, ['pre']):
|
|
|
|
|
content = remove_html(content)
|
|
|
|
|
post_json_object['object'][map_name][lang] = \
|
|
|
|
|
content
|
|
|
|
|
content = post_json_object['object'][map_name][lang]
|
2023-01-07 11:45:19 +00:00
|
|
|
|
post_json_object['object'][map_name][lang] = \
|
2023-10-11 21:16:27 +00:00
|
|
|
|
remove_markup_tag(content, 'pre')
|
|
|
|
|
else:
|
2023-10-11 21:20:15 +00:00
|
|
|
|
print('WARN: harmless_markup unknown Map ' + map_name + ' ' +
|
2023-10-11 21:16:27 +00:00
|
|
|
|
str(post_json_object['object'][map_name]))
|
2023-05-12 13:47:28 +00:00
|
|
|
|
|
2023-10-12 11:59:14 +00:00
|
|
|
|
|
2023-05-12 14:39:26 +00:00
|
|
|
|
def ap_proxy_type(json_object: {}) -> str:
|
|
|
|
|
"""Returns a string indicating the proxy for an activitypub post
|
2023-05-12 13:47:28 +00:00
|
|
|
|
or None if not proxied
|
2023-05-12 17:13:33 +00:00
|
|
|
|
See https://codeberg.org/fediverse/fep/src/branch/main/feps/fep-fffd.md
|
2023-05-12 13:47:28 +00:00
|
|
|
|
"""
|
2023-05-12 14:39:26 +00:00
|
|
|
|
if not json_object.get('proxyOf'):
|
2023-05-12 13:47:28 +00:00
|
|
|
|
return None
|
2023-05-12 14:39:26 +00:00
|
|
|
|
if not isinstance(json_object['proxyOf'], list):
|
2023-05-12 13:47:28 +00:00
|
|
|
|
return None
|
2023-05-12 14:39:26 +00:00
|
|
|
|
for proxy_dict in json_object['proxyOf']:
|
2023-05-12 13:47:28 +00:00
|
|
|
|
if proxy_dict.get('protocol'):
|
|
|
|
|
if isinstance(proxy_dict['protocol'], str):
|
|
|
|
|
return proxy_dict['protocol']
|
|
|
|
|
return None
|
2023-09-10 10:12:39 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def language_right_to_left(language: str) -> bool:
|
|
|
|
|
"""is the given language written from right to left?
|
|
|
|
|
"""
|
2023-09-10 15:33:09 +00:00
|
|
|
|
rtl_languages = ('ar', 'fa', 'he', 'yi')
|
2023-09-10 10:12:39 +00:00
|
|
|
|
if language in rtl_languages:
|
|
|
|
|
return True
|
|
|
|
|
return False
|
2023-09-12 18:38:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def binary_is_image(filename: str, media_binary) -> bool:
|
|
|
|
|
"""Returns true if the given file binary data contains an image
|
|
|
|
|
"""
|
|
|
|
|
if len(media_binary) < 13:
|
|
|
|
|
return False
|
|
|
|
|
filename_lower = filename.lower()
|
|
|
|
|
bin_is_image = False
|
|
|
|
|
if filename_lower.endswith('.jpeg') or filename_lower.endswith('jpg'):
|
|
|
|
|
if media_binary[6:10] in (b'JFIF', b'Exif'):
|
|
|
|
|
bin_is_image = True
|
|
|
|
|
elif filename_lower.endswith('.ico'):
|
|
|
|
|
if media_binary.startswith(b'\x00\x00\x01\x00'):
|
|
|
|
|
bin_is_image = True
|
|
|
|
|
elif filename_lower.endswith('.png'):
|
|
|
|
|
if media_binary.startswith(b'\211PNG\r\n\032\n'):
|
|
|
|
|
bin_is_image = True
|
|
|
|
|
elif filename_lower.endswith('.webp'):
|
|
|
|
|
if media_binary.startswith(b'RIFF') and media_binary[8:12] == b'WEBP':
|
|
|
|
|
bin_is_image = True
|
|
|
|
|
elif filename_lower.endswith('.gif'):
|
|
|
|
|
if media_binary[:6] in (b'GIF87a', b'GIF89a'):
|
|
|
|
|
bin_is_image = True
|
|
|
|
|
elif filename_lower.endswith('.avif'):
|
|
|
|
|
if media_binary[4:12] == b'ftypavif':
|
|
|
|
|
bin_is_image = True
|
|
|
|
|
elif filename_lower.endswith('.heic'):
|
|
|
|
|
if media_binary[4:12] == b'ftypmif1':
|
|
|
|
|
bin_is_image = True
|
|
|
|
|
elif filename_lower.endswith('.jxl'):
|
|
|
|
|
if media_binary.startswith(b'\xff\n'):
|
|
|
|
|
bin_is_image = True
|
|
|
|
|
elif filename_lower.endswith('.svg'):
|
|
|
|
|
if '<svg' in str(media_binary):
|
|
|
|
|
bin_is_image = True
|
|
|
|
|
return bin_is_image
|
2023-10-02 18:56:22 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_status_count(base_dir: str) -> int:
|
|
|
|
|
"""Get the total number of posts
|
|
|
|
|
"""
|
|
|
|
|
status_ctr = 0
|
2024-05-12 12:35:26 +00:00
|
|
|
|
accounts_dir = data_dir(base_dir)
|
2023-10-02 18:56:22 +00:00
|
|
|
|
for _, dirs, _ in os.walk(accounts_dir):
|
|
|
|
|
for acct in dirs:
|
|
|
|
|
if not is_account_dir(acct):
|
|
|
|
|
continue
|
2023-11-01 20:26:58 +00:00
|
|
|
|
account_dir = os.path.join(accounts_dir, acct + '/outbox')
|
|
|
|
|
for _, _, files2 in os.walk(account_dir):
|
2023-10-02 18:56:22 +00:00
|
|
|
|
status_ctr += len(files2)
|
|
|
|
|
break
|
|
|
|
|
break
|
|
|
|
|
return status_ctr
|
2023-10-02 20:29:30 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def lines_in_file(filename: str) -> int:
|
|
|
|
|
"""Returns the number of lines in a file
|
|
|
|
|
"""
|
|
|
|
|
if os.path.isfile(filename):
|
|
|
|
|
try:
|
|
|
|
|
with open(filename, 'r', encoding='utf-8') as fp_lines:
|
|
|
|
|
return len(fp_lines.read().split('\n'))
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: lines_in_file error reading ' + filename)
|
|
|
|
|
return 0
|
2023-10-12 14:34:49 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def corp_servers() -> ():
|
|
|
|
|
"""Returns a list of despised corporate thieves
|
|
|
|
|
"""
|
|
|
|
|
return ('GitHub.com', 'github.com', 'cloudflare', 'microsoft.com',
|
2024-07-19 18:20:06 +00:00
|
|
|
|
'google.com', 'google.co.')
|
2023-10-29 22:00:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_media_url_from_video(post_json_object: {}) -> (str, str, str, str):
|
|
|
|
|
"""Within a Video post (eg peertube) return the media details
|
|
|
|
|
"""
|
|
|
|
|
media_type = None
|
|
|
|
|
media_url = None
|
|
|
|
|
media_torrent = None
|
|
|
|
|
media_magnet = None
|
2024-08-03 15:00:28 +00:00
|
|
|
|
if not post_json_object.get('url'):
|
|
|
|
|
return media_type, media_url, media_torrent, media_magnet
|
|
|
|
|
if not isinstance(post_json_object['url'], list):
|
|
|
|
|
return media_type, media_url, media_torrent, media_magnet
|
|
|
|
|
for media_link in post_json_object['url']:
|
|
|
|
|
if not isinstance(media_link, dict):
|
|
|
|
|
continue
|
|
|
|
|
if not media_link.get('mediaType'):
|
|
|
|
|
continue
|
|
|
|
|
if not media_link.get('href'):
|
|
|
|
|
continue
|
|
|
|
|
if media_link.get('tag'):
|
|
|
|
|
media_tags = media_link['tag']
|
|
|
|
|
if isinstance(media_tags, list):
|
|
|
|
|
for tag_link in media_tags:
|
|
|
|
|
if not isinstance(tag_link, dict):
|
|
|
|
|
continue
|
|
|
|
|
if not tag_link.get('mediaType'):
|
|
|
|
|
continue
|
|
|
|
|
if not tag_link.get('href'):
|
|
|
|
|
continue
|
|
|
|
|
if tag_link['mediaType'] == 'video/mp4' or \
|
|
|
|
|
tag_link['mediaType'] == 'video/ogv':
|
|
|
|
|
media_type = tag_link['mediaType']
|
|
|
|
|
media_url = remove_html(tag_link['href'])
|
|
|
|
|
break
|
|
|
|
|
if media_type and media_url:
|
2023-10-31 19:20:50 +00:00
|
|
|
|
continue
|
2024-08-03 15:00:28 +00:00
|
|
|
|
if media_link['mediaType'] == 'application/x-bittorrent':
|
|
|
|
|
media_torrent = remove_html(media_link['href'])
|
|
|
|
|
if media_link['href'].startswith('magnet:'):
|
|
|
|
|
media_magnet = remove_html(media_link['href'])
|
|
|
|
|
if media_link['mediaType'] != 'video/mp4' and \
|
|
|
|
|
media_link['mediaType'] != 'video/ogv':
|
|
|
|
|
continue
|
|
|
|
|
if not media_url:
|
|
|
|
|
media_type = media_link['mediaType']
|
|
|
|
|
media_url = remove_html(media_link['href'])
|
2023-10-29 22:00:04 +00:00
|
|
|
|
return media_type, media_url, media_torrent, media_magnet
|
2023-12-24 23:42:38 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_reply_to(post_json_object: {}) -> str:
|
|
|
|
|
"""Returns the reply to link from a post
|
|
|
|
|
"""
|
|
|
|
|
if post_json_object.get('inReplyTo'):
|
2024-11-27 15:31:10 +00:00
|
|
|
|
if not isinstance(post_json_object['inReplyTo'], str):
|
2024-11-28 20:40:15 +00:00
|
|
|
|
if isinstance(post_json_object['inReplyTo'], dict):
|
|
|
|
|
if post_json_object['inReplyTo'].get('id'):
|
|
|
|
|
reply_id = post_json_object['inReplyTo']['id']
|
|
|
|
|
if isinstance(reply_id, str):
|
|
|
|
|
return reply_id
|
2024-11-27 15:31:10 +00:00
|
|
|
|
print('WARN: inReplyTo is not a string ' +
|
|
|
|
|
str(post_json_object['inReplyTo']))
|
|
|
|
|
return ''
|
2023-12-24 23:42:38 +00:00
|
|
|
|
return post_json_object['inReplyTo']
|
|
|
|
|
if post_json_object.get('inReplyToBook'):
|
2024-11-27 15:31:10 +00:00
|
|
|
|
if not isinstance(post_json_object['inReplyToBook'], str):
|
2024-11-28 20:40:15 +00:00
|
|
|
|
if isinstance(post_json_object['inReplyToBook'], dict):
|
|
|
|
|
if post_json_object['inReplyToBook'].get('id'):
|
|
|
|
|
reply_id = post_json_object['inReplyToBook']['id']
|
|
|
|
|
if isinstance(reply_id, str):
|
|
|
|
|
return reply_id
|
2024-11-27 15:31:10 +00:00
|
|
|
|
print('WARN: inReplyToBook is not a string ' +
|
|
|
|
|
str(post_json_object['inReplyToBook']))
|
|
|
|
|
return ''
|
2023-12-24 23:42:38 +00:00
|
|
|
|
return post_json_object['inReplyToBook']
|
|
|
|
|
return ''
|
2024-01-07 22:19:29 +00:00
|
|
|
|
|
|
|
|
|
|
2024-01-27 17:04:21 +00:00
|
|
|
|
def resembles_url(text: str) -> bool:
|
|
|
|
|
"""Does the given text look like a url?
|
|
|
|
|
"""
|
2024-08-05 09:34:41 +00:00
|
|
|
|
if '://' in text and '.' in text and \
|
|
|
|
|
' ' not in text and '<' not in text:
|
2024-01-27 17:04:21 +00:00
|
|
|
|
return True
|
|
|
|
|
return False
|
2024-01-27 17:21:08 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def local_only_is_local(message_json: {}, domain_full: str) -> bool:
|
|
|
|
|
"""Returns True if the given json post is verified as local only
|
|
|
|
|
"""
|
2024-08-05 09:34:41 +00:00
|
|
|
|
if message_json['object']['localOnly'] is not True:
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
# check that the to addresses are local
|
|
|
|
|
if isinstance(message_json['object']['to'], list):
|
|
|
|
|
for to_actor in message_json['object']['to']:
|
|
|
|
|
to_domain, to_port = \
|
|
|
|
|
get_domain_from_actor(to_actor)
|
|
|
|
|
if not to_domain:
|
|
|
|
|
continue
|
|
|
|
|
to_domain_full = \
|
|
|
|
|
get_full_domain(to_domain, to_port)
|
|
|
|
|
if domain_full != to_domain_full:
|
|
|
|
|
print("REJECT: inbox " +
|
|
|
|
|
"local only post isn't local " +
|
2024-01-27 17:21:08 +00:00
|
|
|
|
str(message_json))
|
|
|
|
|
return False
|
2024-08-05 09:34:41 +00:00
|
|
|
|
|
|
|
|
|
# check that the sender is local
|
|
|
|
|
attrib_field = message_json['object']['attributedTo']
|
|
|
|
|
local_actor = get_attributed_to(attrib_field)
|
|
|
|
|
local_domain, local_port = \
|
|
|
|
|
get_domain_from_actor(local_actor)
|
|
|
|
|
if local_domain:
|
|
|
|
|
local_domain_full = \
|
|
|
|
|
get_full_domain(local_domain, local_port)
|
|
|
|
|
if domain_full != local_domain_full:
|
|
|
|
|
print("REJECT: " +
|
|
|
|
|
"inbox local only post isn't local " +
|
|
|
|
|
str(message_json))
|
|
|
|
|
return False
|
2024-01-27 17:21:08 +00:00
|
|
|
|
return True
|
2024-01-27 17:35:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def post_summary_contains_links(message_json: {}) -> bool:
|
|
|
|
|
"""check if the json post summary contains links
|
|
|
|
|
"""
|
2024-08-05 09:32:40 +00:00
|
|
|
|
if not (message_json['object'].get('type') and
|
|
|
|
|
message_json['object'].get('summary')):
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
if message_json['object']['type'] != 'Person' and \
|
|
|
|
|
message_json['object']['type'] != 'Application' and \
|
|
|
|
|
message_json['object']['type'] != 'Group':
|
|
|
|
|
if len(message_json['object']['summary']) > 1024:
|
|
|
|
|
actor_url = get_actor_from_post(message_json)
|
|
|
|
|
print('INBOX: summary is too long ' +
|
|
|
|
|
actor_url + ' ' +
|
|
|
|
|
message_json['object']['summary'])
|
|
|
|
|
return True
|
|
|
|
|
if '://' in message_json['object']['summary']:
|
|
|
|
|
actor_url = get_actor_from_post(message_json)
|
|
|
|
|
print('INBOX: summary should not contain links ' +
|
|
|
|
|
actor_url + ' ' +
|
|
|
|
|
message_json['object']['summary'])
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
if len(message_json['object']['summary']) > 4096:
|
|
|
|
|
actor_url = get_actor_from_post(message_json)
|
|
|
|
|
print('INBOX: person summary is too long ' +
|
|
|
|
|
actor_url + ' ' +
|
|
|
|
|
message_json['object']['summary'])
|
|
|
|
|
return True
|
2024-01-27 17:35:04 +00:00
|
|
|
|
return False
|
2024-01-28 19:11:55 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def convert_domains(calling_domain: str, referer_domain: str,
|
|
|
|
|
msg_str: str, http_prefix: str,
|
|
|
|
|
domain: str,
|
|
|
|
|
onion_domain: str,
|
|
|
|
|
i2p_domain: str) -> str:
|
|
|
|
|
"""Convert domains to onion or i2p, depending upon who is asking
|
|
|
|
|
"""
|
|
|
|
|
curr_http_prefix = http_prefix + '://'
|
|
|
|
|
if _is_onion_request(calling_domain, referer_domain,
|
|
|
|
|
domain,
|
|
|
|
|
onion_domain):
|
|
|
|
|
msg_str = msg_str.replace(curr_http_prefix +
|
|
|
|
|
domain,
|
|
|
|
|
'http://' +
|
|
|
|
|
onion_domain)
|
|
|
|
|
elif _is_i2p_request(calling_domain, referer_domain,
|
|
|
|
|
domain,
|
|
|
|
|
i2p_domain):
|
|
|
|
|
msg_str = msg_str.replace(curr_http_prefix +
|
|
|
|
|
domain,
|
|
|
|
|
'http://' +
|
|
|
|
|
i2p_domain)
|
|
|
|
|
return msg_str
|
2024-01-29 21:05:16 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_instance_url(calling_domain: str,
|
|
|
|
|
http_prefix: str,
|
|
|
|
|
domain_full: str,
|
|
|
|
|
onion_domain: str,
|
|
|
|
|
i2p_domain: str) -> str:
|
|
|
|
|
"""Returns the URL for this instance
|
|
|
|
|
"""
|
|
|
|
|
if calling_domain.endswith('.onion') and \
|
|
|
|
|
onion_domain:
|
|
|
|
|
instance_url = 'http://' + onion_domain
|
|
|
|
|
elif (calling_domain.endswith('.i2p') and
|
|
|
|
|
i2p_domain):
|
|
|
|
|
instance_url = 'http://' + i2p_domain
|
|
|
|
|
else:
|
|
|
|
|
instance_url = \
|
|
|
|
|
http_prefix + '://' + domain_full
|
|
|
|
|
return instance_url
|
2024-03-01 17:10:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def check_bad_path(path: str):
|
|
|
|
|
"""for http GET or POST check that the path looks valid
|
|
|
|
|
"""
|
|
|
|
|
path_lower = path.lower()
|
2024-08-03 18:33:04 +00:00
|
|
|
|
bad_strings = ('..', '/.', '%2e%2e', '%252e%252e')
|
2024-08-03 18:30:54 +00:00
|
|
|
|
|
|
|
|
|
# allow /.well-known/...
|
2024-08-05 12:22:30 +00:00
|
|
|
|
if '/.' in path_lower:
|
|
|
|
|
if path_lower.startswith('/.well-known/') or \
|
|
|
|
|
path_lower.startswith('/users/.well-known/'):
|
|
|
|
|
bad_strings = ('..', '%2e%2e', '%252e%252e')
|
2024-08-03 18:30:54 +00:00
|
|
|
|
|
2024-08-03 14:49:38 +00:00
|
|
|
|
if string_contains(path_lower, bad_strings):
|
2024-03-01 17:10:04 +00:00
|
|
|
|
print('WARN: bad path ' + path)
|
|
|
|
|
return True
|
|
|
|
|
return False
|
2024-03-20 20:54:03 +00:00
|
|
|
|
|
|
|
|
|
|
2024-03-20 21:31:13 +00:00
|
|
|
|
def set_premium_account(base_dir: str, nickname: str, domain: str,
|
|
|
|
|
flag_state: bool) -> bool:
|
|
|
|
|
""" Set or clear the premium account flag
|
|
|
|
|
"""
|
|
|
|
|
premium_filename = acct_dir(base_dir, nickname, domain) + '/.premium'
|
|
|
|
|
if os.path.isfile(premium_filename):
|
|
|
|
|
if not flag_state:
|
|
|
|
|
try:
|
|
|
|
|
os.remove(premium_filename)
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: unable to remove premium flag ' + premium_filename)
|
|
|
|
|
else:
|
|
|
|
|
if flag_state:
|
|
|
|
|
try:
|
|
|
|
|
with open(premium_filename, 'w+',
|
|
|
|
|
encoding='utf-8') as fp_premium:
|
|
|
|
|
fp_premium.write('\n')
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: unable to set premium flag ' + premium_filename)
|
2024-03-29 14:52:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_post_attachments(post_json_object: {}) -> []:
|
|
|
|
|
""" Returns the list of attachments for a post
|
|
|
|
|
"""
|
|
|
|
|
post_obj = post_json_object
|
|
|
|
|
if has_object_dict(post_json_object):
|
|
|
|
|
post_obj = post_json_object['object']
|
|
|
|
|
if not post_obj.get('attachment'):
|
|
|
|
|
return []
|
|
|
|
|
if isinstance(post_obj['attachment'], list):
|
|
|
|
|
return post_obj['attachment']
|
|
|
|
|
if isinstance(post_obj['attachment'], dict):
|
|
|
|
|
return [post_obj['attachment']]
|
|
|
|
|
return []
|
2024-04-10 09:51:43 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def string_ends_with(text: str, possible_endings: []) -> bool:
|
|
|
|
|
""" Does the given text end with at least one of the endings
|
|
|
|
|
"""
|
|
|
|
|
for ending in possible_endings:
|
|
|
|
|
if text.endswith(ending):
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def string_contains(text: str, possible_substrings: []) -> bool:
|
|
|
|
|
""" Does the given text contain at least one of the possible substrings
|
|
|
|
|
"""
|
|
|
|
|
for substring in possible_substrings:
|
|
|
|
|
if substring in text:
|
|
|
|
|
return True
|
|
|
|
|
return False
|
2024-04-24 19:35:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def remove_link_tracking(url: str) -> str:
|
|
|
|
|
""" Removes any web link tracking, such as utm_medium, utm_campaign
|
|
|
|
|
or utm_source
|
|
|
|
|
"""
|
|
|
|
|
if '?utm_' not in url:
|
|
|
|
|
return url
|
|
|
|
|
return url.split('?utm_')[0]
|
2024-07-25 12:34:18 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_image_file(base_dir: str, name: str, directory: str,
|
|
|
|
|
theme: str) -> (str, str):
|
|
|
|
|
"""returns the filenames for an image with the given name
|
|
|
|
|
"""
|
|
|
|
|
banner_extensions = get_image_extensions()
|
|
|
|
|
banner_file = ''
|
|
|
|
|
banner_filename = ''
|
|
|
|
|
im_name = name
|
|
|
|
|
for ext in banner_extensions:
|
|
|
|
|
banner_file_test = im_name + '.' + ext
|
|
|
|
|
banner_filename_test = directory + '/' + banner_file_test
|
2024-08-04 19:10:20 +00:00
|
|
|
|
if not os.path.isfile(banner_filename_test):
|
|
|
|
|
continue
|
|
|
|
|
banner_file = banner_file_test
|
|
|
|
|
banner_filename = banner_filename_test
|
|
|
|
|
return banner_file, banner_filename
|
2024-07-25 12:34:18 +00:00
|
|
|
|
# if not found then use the default image
|
|
|
|
|
curr_theme = 'default'
|
|
|
|
|
if theme:
|
|
|
|
|
curr_theme = theme
|
|
|
|
|
directory = base_dir + '/theme/' + curr_theme
|
|
|
|
|
for ext in banner_extensions:
|
|
|
|
|
banner_file_test = name + '.' + ext
|
|
|
|
|
banner_filename_test = directory + '/' + banner_file_test
|
2024-08-04 19:10:20 +00:00
|
|
|
|
if not os.path.isfile(banner_filename_test):
|
|
|
|
|
continue
|
|
|
|
|
banner_file = name + '_' + curr_theme + '.' + ext
|
|
|
|
|
banner_filename = banner_filename_test
|
|
|
|
|
break
|
2024-07-25 12:34:18 +00:00
|
|
|
|
return banner_file, banner_filename
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_watermark_file(base_dir: str,
|
|
|
|
|
nickname: str, domain: str) -> (str, str):
|
|
|
|
|
"""Gets the filename for watermarking when an image is attached to a post
|
|
|
|
|
"""
|
|
|
|
|
account_dir = acct_dir(base_dir, nickname, domain)
|
|
|
|
|
watermark_file, watermark_filename = \
|
|
|
|
|
get_image_file(base_dir, 'watermark_image', account_dir, '')
|
|
|
|
|
return watermark_file, watermark_filename
|
2024-08-08 17:23:33 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def replace_strings(text: str, replacements: {}) -> str:
|
|
|
|
|
"""Does a series of string replacements
|
|
|
|
|
"""
|
|
|
|
|
for orig_str, new_str in replacements.items():
|
|
|
|
|
text = text.replace(orig_str, new_str)
|
|
|
|
|
return text
|
2024-09-13 12:09:20 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def account_is_indexable(actor_json: {}) -> bool:
|
|
|
|
|
"""Returns true if the given actor is indexable
|
|
|
|
|
"""
|
|
|
|
|
if 'indexable' not in actor_json:
|
|
|
|
|
return False
|
|
|
|
|
if isinstance(actor_json['indexable'], bool):
|
|
|
|
|
return actor_json['indexable']
|
|
|
|
|
if isinstance(actor_json['indexable'], list):
|
|
|
|
|
if '#Public' in str(actor_json['indexable']):
|
|
|
|
|
return True
|
|
|
|
|
elif isinstance(actor_json['indexable'], str):
|
|
|
|
|
if '#Public' in actor_json['indexable']:
|
|
|
|
|
return True
|
|
|
|
|
return False
|
2024-10-12 12:09:23 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_searchable_by_default(base_dir: str) -> {}:
|
|
|
|
|
"""loads the searchable_by states for each account
|
|
|
|
|
"""
|
|
|
|
|
result = {}
|
|
|
|
|
dir_str = data_dir(base_dir)
|
|
|
|
|
for _, dirs, _ in os.walk(dir_str):
|
|
|
|
|
for account in dirs:
|
|
|
|
|
if not is_account_dir(account):
|
|
|
|
|
continue
|
|
|
|
|
nickname = account.split('@')[0]
|
|
|
|
|
filename = os.path.join(dir_str, account) + '/.searchableByDefault'
|
|
|
|
|
if os.path.isfile(filename):
|
|
|
|
|
try:
|
|
|
|
|
with open(filename, 'r', encoding='utf-8') as fp_search:
|
|
|
|
|
result[nickname] = fp_search.read().strip()
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: unable to load searchableByDefault ' + filename)
|
|
|
|
|
break
|
|
|
|
|
return result
|
2024-10-12 12:31:49 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_searchable_by(base_dir: str, nickname: str, domain: str,
|
|
|
|
|
searchable_by: str) -> None:
|
|
|
|
|
"""Sets the searchable_by state for an account from the dropdown on
|
|
|
|
|
new post screen
|
|
|
|
|
"""
|
|
|
|
|
if not searchable_by:
|
|
|
|
|
return
|
|
|
|
|
filename = acct_dir(base_dir, nickname, domain) + '/.searchableByDefault'
|
|
|
|
|
|
|
|
|
|
# already the same state?
|
|
|
|
|
if os.path.isfile(filename):
|
|
|
|
|
if text_in_file(searchable_by, filename, True):
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# write the new state
|
|
|
|
|
try:
|
|
|
|
|
with open(filename, 'w+', encoding='utf-8') as fp_search:
|
|
|
|
|
fp_search.write(searchable_by)
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: unable to write searchableByDropdown ' + filename)
|
2024-11-04 21:05:12 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def browser_supports_download_filename(ua_str: str) -> bool:
|
|
|
|
|
"""Does the browser indicated by the user agent string support specifying
|
|
|
|
|
a default download filename?
|
|
|
|
|
https://developer.mozilla.org/en-US/docs/Web/HTML/Element/a#download
|
|
|
|
|
https://www.w3schools.com/howto/howto_html_download_link.asp
|
|
|
|
|
"""
|
|
|
|
|
if 'mozilla' in ua_str or 'firefox' in ua_str:
|
|
|
|
|
return True
|
|
|
|
|
return False
|
2024-12-17 13:50:48 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def detect_mitm(self) -> bool:
|
|
|
|
|
"""Detect if a request contains a MiTM
|
|
|
|
|
"""
|
2024-12-19 16:51:22 +00:00
|
|
|
|
mitm_domains = (
|
|
|
|
|
'cloudflare', 'radware', 'imperva', 'akamai', 'azure',
|
|
|
|
|
'fastly', 'google'
|
|
|
|
|
)
|
2024-12-17 13:50:48 +00:00
|
|
|
|
# look for domains within these headers
|
|
|
|
|
check_headers = (
|
|
|
|
|
'Server', 'Report-To', 'Report-to', 'report-to',
|
|
|
|
|
'Expect-CT', 'Expect-Ct', 'expect-ct'
|
|
|
|
|
)
|
|
|
|
|
for interloper in mitm_domains:
|
|
|
|
|
for header_name in check_headers:
|
|
|
|
|
if not self.headers.get(header_name):
|
|
|
|
|
continue
|
2024-12-19 16:03:31 +00:00
|
|
|
|
if interloper in str(self.headers[header_name]).lower():
|
2024-12-17 13:50:48 +00:00
|
|
|
|
print('MITM: ' + header_name + ' = ' +
|
|
|
|
|
str(self.headers[header_name]))
|
|
|
|
|
return True
|
|
|
|
|
# The presence of these headers on their own indicates a MiTM
|
|
|
|
|
mitm_headers = (
|
|
|
|
|
'CF-Connecting-IP', 'CF-RAY', 'CF-IPCountry', 'CF-Visitor',
|
|
|
|
|
'CDN-Loop', 'CF-Worker', 'CF-Cache-Status'
|
|
|
|
|
)
|
|
|
|
|
for header_name in mitm_headers:
|
|
|
|
|
if self.headers.get(header_name):
|
|
|
|
|
print('MITM: ' + header_name + ' = ' +
|
|
|
|
|
self.headers[header_name])
|
|
|
|
|
return True
|
|
|
|
|
if self.headers.get(header_name.lower()):
|
|
|
|
|
print('MITM: ' + header_name + ' = ' +
|
|
|
|
|
self.headers[header_name.lower()])
|
|
|
|
|
return True
|
|
|
|
|
return False
|
2024-12-17 15:23:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_mitm_servers(base_dir: str) -> []:
|
|
|
|
|
"""Loads a list of servers implementing insecure transport security
|
|
|
|
|
"""
|
|
|
|
|
mitm_servers_filename = data_dir(base_dir) + '/mitm_servers.txt'
|
2024-12-23 18:23:47 +00:00
|
|
|
|
mitm_servers: list[str] = []
|
2024-12-17 15:23:07 +00:00
|
|
|
|
if os.path.isfile(mitm_servers_filename):
|
|
|
|
|
try:
|
|
|
|
|
with open(mitm_servers_filename, 'r',
|
|
|
|
|
encoding='utf-8') as fp_mitm:
|
|
|
|
|
mitm_servers = fp_mitm.read()
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: error while reading mitm_servers.txt')
|
|
|
|
|
if not mitm_servers:
|
2024-12-17 15:30:03 +00:00
|
|
|
|
return []
|
2024-12-17 15:23:07 +00:00
|
|
|
|
mitm_servers = mitm_servers.split('\n')
|
|
|
|
|
return mitm_servers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_mitm_servers(base_dir: str, mitm_servers: []) -> None:
|
|
|
|
|
"""Saves a list of servers implementing insecure transport security
|
|
|
|
|
"""
|
|
|
|
|
mitm_servers_str = ''
|
|
|
|
|
for domain in mitm_servers:
|
|
|
|
|
if domain:
|
|
|
|
|
mitm_servers_str += domain + '\n'
|
|
|
|
|
|
|
|
|
|
mitm_servers_filename = data_dir(base_dir) + '/mitm_servers.txt'
|
|
|
|
|
try:
|
|
|
|
|
with open(mitm_servers_filename, 'w+',
|
|
|
|
|
encoding='utf-8') as fp_mitm:
|
|
|
|
|
fp_mitm.write(mitm_servers_str)
|
|
|
|
|
except OSError:
|
|
|
|
|
print('EX: error while saving mitm_servers.txt')
|
2024-12-18 19:20:27 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def text_mode_removals(text: str, translate: {}) -> str:
|
|
|
|
|
"""Removes some elements of a post when displaying in a text mode browser
|
|
|
|
|
"""
|
|
|
|
|
text = text.replace(translate['SHOW MORE'], '')
|
|
|
|
|
text = text.replace(translate['mitm'], '👁 ')
|
|
|
|
|
return text
|