epicyon/relationships.py

512 lines
17 KiB
Python
Raw Normal View History

2022-11-27 20:35:12 +00:00
__filename__ = "relationships.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
2024-12-22 23:37:30 +00:00
__version__ = "1.6.0"
2022-11-27 20:35:12 +00:00
__maintainer__ = "Bob Mottram"
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Core"
import os
from flags import is_dormant
2024-05-12 12:35:26 +00:00
from utils import data_dir
2022-12-01 19:44:12 +00:00
from utils import get_user_paths
2022-11-27 20:35:12 +00:00
from utils import acct_dir
2022-11-27 22:59:24 +00:00
from utils import valid_nickname
from utils import get_full_domain
from utils import local_actor_url
from utils import remove_domain_port
from utils import remove_eol
2022-11-28 13:33:11 +00:00
from utils import is_account_dir
from utils import get_nickname_from_actor
from utils import get_domain_from_actor
from utils import load_json
2022-11-27 20:35:12 +00:00
def get_moved_accounts(base_dir: str, nickname: str, domain: str,
2024-02-19 17:50:35 +00:00
filename: str) -> {}:
2022-11-27 20:35:12 +00:00
"""returns a dict of moved accounts
"""
2024-05-12 12:35:26 +00:00
moved_accounts_filename = data_dir(base_dir) + '/actors_moved.txt'
2022-11-28 13:33:11 +00:00
if not os.path.isfile(moved_accounts_filename):
2022-11-27 20:35:12 +00:00
return {}
refollow_str = ''
try:
2022-11-28 13:33:11 +00:00
with open(moved_accounts_filename, 'r',
2022-11-27 20:35:12 +00:00
encoding='utf-8') as fp_refollow:
refollow_str = fp_refollow.read()
except OSError:
2024-07-19 10:30:41 +00:00
print('EX: get_moved_accounts unable to read 1 ' +
2022-11-28 13:33:11 +00:00
moved_accounts_filename)
2022-11-27 20:35:12 +00:00
refollow_list = refollow_str.split('\n')
2022-11-27 20:44:47 +00:00
refollow_dict = {}
2022-11-28 14:03:08 +00:00
2022-11-27 20:35:12 +00:00
follow_filename = \
acct_dir(base_dir, nickname, domain) + '/' + filename
follow_str = ''
try:
with open(follow_filename, 'r',
encoding='utf-8') as fp_follow:
follow_str = fp_follow.read()
except OSError:
2024-07-19 10:30:41 +00:00
print('EX: get_moved_accounts unable to read 2 ' +
2022-11-27 20:35:12 +00:00
follow_filename)
follow_list = follow_str.split('\n')
ctr = 0
for line in refollow_list:
if ' ' not in line:
continue
prev_handle = line.split(' ')[0]
new_handle = line.split(' ')[1]
refollow_dict[prev_handle] = new_handle
ctr = ctr + 1
2022-11-27 20:35:12 +00:00
result = {}
for handle in follow_list:
2022-11-27 20:44:47 +00:00
if refollow_dict.get(handle):
if refollow_dict[handle] not in follow_list:
result[handle] = refollow_dict[handle]
2022-11-27 20:35:12 +00:00
return result
2022-11-27 22:59:24 +00:00
def get_moved_feed(base_dir: str, domain: str, port: int, path: str,
http_prefix: str, authorized: bool,
follows_per_page=12) -> {}:
"""Returns the moved accounts feed from GET requests.
"""
2022-11-27 23:25:18 +00:00
# Don't show moved accounts to non-authorized viewers
2022-11-27 22:59:24 +00:00
if not authorized:
2022-11-27 23:25:18 +00:00
follows_per_page = 0
2022-11-27 22:59:24 +00:00
if '/moved' not in path:
return None
2022-11-28 15:57:44 +00:00
if '?page=' not in path:
path = path.replace('/moved', '/moved?page=true')
2022-11-27 22:59:24 +00:00
# handle page numbers
header_only = True
page_number = None
if '?page=' in path:
page_number = path.split('?page=')[1]
if len(page_number) > 5:
page_number = "1"
if page_number == 'true' or not authorized:
page_number = 1
else:
try:
page_number = int(page_number)
except BaseException:
print('EX: get_moved_feed unable to convert to int ' +
str(page_number))
path = path.split('?page=')[0]
header_only = False
if not path.endswith('/moved'):
return None
nickname = None
if path.startswith('/users/'):
nickname = \
path.replace('/users/', '', 1).replace('/moved', '')
if path.startswith('/@'):
2023-04-23 15:55:48 +00:00
if '/@/' not in path:
nickname = path.replace('/@', '', 1).replace('/moved', '')
2022-11-27 22:59:24 +00:00
if not nickname:
return None
if not valid_nickname(domain, nickname):
return None
domain = get_full_domain(domain, port)
lines = get_moved_accounts(base_dir, nickname, domain,
'following.txt')
if header_only:
first_str = \
local_actor_url(http_prefix, nickname, domain) + \
'/moved?page=1'
id_str = \
local_actor_url(http_prefix, nickname, domain) + '/moved'
total_str = str(len(lines.items()))
following = {
"@context": [
'https://www.w3.org/ns/activitystreams',
'https://w3id.org/security/v1'
],
2022-11-27 22:59:24 +00:00
'first': first_str,
'id': id_str,
2022-11-27 23:34:24 +00:00
'orderedItems': [],
2022-11-27 22:59:24 +00:00
'totalItems': total_str,
2022-11-27 23:25:18 +00:00
'type': 'OrderedCollection'
2022-11-27 22:59:24 +00:00
}
return following
if not page_number:
page_number = 1
next_page_number = int(page_number + 1)
id_str = \
local_actor_url(http_prefix, nickname, domain) + \
'/moved?page=' + str(page_number)
part_of_str = \
local_actor_url(http_prefix, nickname, domain) + '/moved'
following = {
"@context": [
'https://www.w3.org/ns/activitystreams',
'https://w3id.org/security/v1'
],
2022-11-27 22:59:24 +00:00
'id': id_str,
'orderedItems': [],
'partOf': part_of_str,
'totalItems': 0,
'type': 'OrderedCollectionPage'
}
handle_domain = domain
handle_domain = remove_domain_port(handle_domain)
curr_page = 1
page_ctr = 0
total_ctr = 0
2024-01-27 17:04:21 +00:00
for handle, _ in lines.items():
2022-11-27 22:59:24 +00:00
# nickname@domain
page_ctr += 1
total_ctr += 1
if curr_page == page_number:
line2_lower = handle.lower()
line2 = remove_eol(line2_lower)
2023-05-17 13:45:28 +00:00
url = None
if '@' in line2:
nick = line2.split('@')[0]
dom = line2.split('@')[1]
if not nick.startswith('!'):
# person actor
url = local_actor_url(http_prefix, nick, dom)
else:
# group actor
url = http_prefix + '://' + dom + '/c/' + nick
2022-11-27 22:59:24 +00:00
else:
2023-05-17 13:45:28 +00:00
if '://' in line2:
url = remove_eol(handle)
if url:
following['orderedItems'].append(url)
2022-11-27 22:59:24 +00:00
if page_ctr >= follows_per_page:
page_ctr = 0
curr_page += 1
following['totalItems'] = total_ctr
last_page = int(total_ctr / follows_per_page)
last_page = max(last_page, 1)
if next_page_number > last_page:
following['next'] = \
local_actor_url(http_prefix, nickname, domain) + \
'/moved?page=' + str(last_page)
return following
2022-11-28 13:33:11 +00:00
def update_moved_actors(base_dir: str, debug: bool) -> None:
"""Updates the file containing moved actors
"""
actors_cache_dir = base_dir + '/cache/actors'
if not os.path.isdir(actors_cache_dir):
if debug:
print('No cached actors')
return
if debug:
print('Updating moved actors')
actors_dict = {}
2022-11-28 13:38:22 +00:00
ctr = 0
2022-11-28 13:33:11 +00:00
for _, _, files in os.walk(actors_cache_dir):
for actor_str in files:
if not actor_str.endswith('.json'):
continue
orig_str = actor_str
actor_str = actor_str.replace('.json', '').replace('#', '/')
nickname = get_nickname_from_actor(actor_str)
domain, port = get_domain_from_actor(actor_str)
2023-01-15 14:33:18 +00:00
if not domain:
continue
2022-11-28 13:33:11 +00:00
domain_full = get_full_domain(domain, port)
handle = nickname + '@' + domain_full
actors_dict[handle] = orig_str
2022-11-28 13:38:22 +00:00
ctr += 1
2022-11-28 13:33:11 +00:00
break
2022-11-28 13:38:22 +00:00
if actors_dict:
print('Actors dict created ' + str(ctr))
else:
print('No cached actors found')
2022-11-28 13:33:11 +00:00
# get the handles to be checked for movedTo attribute
2024-12-23 17:45:20 +00:00
handles_to_check: list[str] = []
2024-05-12 12:35:26 +00:00
dir_str = data_dir(base_dir)
for _, dirs, _ in os.walk(dir_str):
2022-11-28 13:33:11 +00:00
for account in dirs:
if not is_account_dir(account):
continue
2024-05-12 12:35:26 +00:00
following_filename = dir_str + '/' + account + '/following.txt'
2022-11-28 13:33:11 +00:00
if not os.path.isfile(following_filename):
continue
following_str = ''
try:
with open(following_filename, 'r',
encoding='utf-8') as fp_foll:
2022-11-28 13:42:37 +00:00
following_str = fp_foll.read()
2022-11-28 13:33:11 +00:00
except OSError:
print('EX: update_moved_actors unable to read ' +
following_filename)
continue
following_list = following_str.split('\n')
for handle in following_list:
if handle not in handles_to_check:
handles_to_check.append(handle)
break
2022-11-28 13:38:22 +00:00
if handles_to_check:
print('All accounts handles list generated ' +
str(len(handles_to_check)))
else:
print('No accounts are following')
2022-11-28 13:33:11 +00:00
moved_str = ''
2022-11-28 13:38:22 +00:00
ctr = 0
2022-11-28 13:33:11 +00:00
for handle in handles_to_check:
if not actors_dict.get(handle):
continue
actor_filename = base_dir + '/cache/actors/' + actors_dict[handle]
if not os.path.isfile(actor_filename):
continue
2024-06-20 10:47:58 +00:00
actor_json = load_json(actor_filename)
2022-11-28 13:33:11 +00:00
if not actor_json:
continue
if not actor_json.get('movedTo'):
continue
2022-11-28 13:46:32 +00:00
nickname = get_nickname_from_actor(actor_json['movedTo'])
2023-01-15 09:14:29 +00:00
if not nickname:
continue
2022-11-28 13:46:32 +00:00
domain, port = get_domain_from_actor(actor_json['movedTo'])
2023-01-15 09:14:29 +00:00
if not domain:
continue
2022-11-28 13:46:32 +00:00
domain_full = get_full_domain(domain, port)
new_handle = nickname + '@' + domain_full
moved_str += handle + ' ' + new_handle + '\n'
2022-11-28 13:38:22 +00:00
ctr = ctr + 1
2022-11-28 13:33:11 +00:00
2022-11-28 13:38:22 +00:00
if moved_str:
print('Moved accounts detected ' + str(ctr))
else:
print('No moved accounts detected')
2022-11-28 13:33:11 +00:00
2024-05-12 12:35:26 +00:00
moved_accounts_filename = data_dir(base_dir) + '/actors_moved.txt'
2022-11-28 13:33:11 +00:00
if not moved_str:
if os.path.isfile(moved_accounts_filename):
try:
os.remove(moved_accounts_filename)
except OSError:
print('EX: update_moved_actors unable to remove ' +
moved_accounts_filename)
return
try:
with open(moved_accounts_filename, 'w+',
encoding='utf-8') as fp_moved:
fp_moved.write(moved_str)
except OSError:
print('EX: update_moved_actors unable to save ' +
moved_accounts_filename)
2022-12-01 19:44:12 +00:00
def _get_inactive_accounts(base_dir: str, nickname: str, domain: str,
dormant_months: int,
sites_unavailable: []) -> []:
2022-12-01 19:44:12 +00:00
"""returns a list of inactive accounts
"""
# get the list of followers
followers_filename = \
acct_dir(base_dir, nickname, domain) + '/followers.txt'
followers_str = ''
try:
with open(followers_filename, 'r',
encoding='utf-8') as fp_follow:
followers_str = fp_follow.read()
except OSError:
print('EX: get_moved_accounts unable to read ' +
followers_filename)
followers_list = followers_str.split('\n')
2024-12-23 17:45:20 +00:00
result: list[str] = []
2022-12-01 19:44:12 +00:00
users_list = get_user_paths()
for handle in followers_list:
if handle in result:
continue
if '@' in handle:
follower_nickname = handle.split('@')[0]
follower_domain = handle.split('@')[1]
if follower_domain in sites_unavailable:
result.append(handle)
continue
2022-12-01 19:44:12 +00:00
found = False
for http_prefix in ('https://', 'http://'):
for users_str in users_list:
actor = \
http_prefix + follower_domain + users_str + \
follower_nickname
if is_dormant(base_dir, nickname, domain, actor,
dormant_months):
result.append(handle)
found = True
break
2023-05-10 11:53:38 +00:00
if not found:
actor = \
http_prefix + follower_domain + '/' + \
follower_nickname
if is_dormant(base_dir, nickname, domain, actor,
dormant_months):
result.append(handle)
found = True
2022-12-01 19:44:12 +00:00
if found:
break
elif '://' in handle:
actor = handle
follower_domain = actor.split('://')[1]
if '/' in follower_domain:
follower_domain = follower_domain.split('/')[0]
if follower_domain in sites_unavailable:
result.append(actor)
continue
2022-12-01 19:44:12 +00:00
if is_dormant(base_dir, nickname, domain, actor,
dormant_months):
result.append(actor)
2022-12-01 19:44:12 +00:00
return result
def get_inactive_feed(base_dir: str, domain: str, port: int, path: str,
http_prefix: str, authorized: bool,
dormant_months: int,
follows_per_page: int, sites_unavailable: []) -> {}:
2022-12-01 19:44:12 +00:00
"""Returns the inactive accounts feed from GET requests.
"""
# Don't show inactive accounts to non-authorized viewers
if not authorized:
follows_per_page = 0
if '/inactive' not in path:
return None
if '?page=' not in path:
path = path.replace('/inactive', '/inactive?page=true')
# handle page numbers
header_only = True
page_number = None
if '?page=' in path:
page_number = path.split('?page=')[1]
if len(page_number) > 5:
page_number = "1"
if page_number == 'true' or not authorized:
page_number = 1
else:
try:
page_number = int(page_number)
except BaseException:
print('EX: get_inactive_feed unable to convert to int ' +
str(page_number))
path = path.split('?page=')[0]
header_only = False
if not path.endswith('/inactive'):
return None
nickname = None
if path.startswith('/users/'):
nickname = \
path.replace('/users/', '', 1).replace('/inactive', '')
if path.startswith('/@'):
2023-04-23 15:55:48 +00:00
if '/@/' not in path:
nickname = path.replace('/@', '', 1).replace('/inactive', '')
2022-12-01 19:44:12 +00:00
if not nickname:
return None
if not valid_nickname(domain, nickname):
return None
domain = get_full_domain(domain, port)
lines = _get_inactive_accounts(base_dir, nickname, domain,
dormant_months,
sites_unavailable)
2022-12-01 19:44:12 +00:00
if header_only:
first_str = \
local_actor_url(http_prefix, nickname, domain) + \
'/moved?page=1'
id_str = \
local_actor_url(http_prefix, nickname, domain) + '/inactive'
2024-02-25 15:27:25 +00:00
total_str = str(len(lines))
2022-12-01 19:44:12 +00:00
following = {
"@context": [
'https://www.w3.org/ns/activitystreams',
'https://w3id.org/security/v1'
],
2022-12-01 19:44:12 +00:00
'first': first_str,
'id': id_str,
'orderedItems': [],
'totalItems': total_str,
'type': 'OrderedCollection'
}
return following
if not page_number:
page_number = 1
next_page_number = int(page_number + 1)
id_str = \
local_actor_url(http_prefix, nickname, domain) + \
'/inactive?page=' + str(page_number)
part_of_str = \
local_actor_url(http_prefix, nickname, domain) + '/inactive'
following = {
"@context": [
'https://www.w3.org/ns/activitystreams',
'https://w3id.org/security/v1'
],
2022-12-01 19:44:12 +00:00
'id': id_str,
'orderedItems': [],
'partOf': part_of_str,
'totalItems': 0,
'type': 'OrderedCollectionPage'
}
handle_domain = domain
handle_domain = remove_domain_port(handle_domain)
curr_page = 1
page_ctr = 0
total_ctr = 0
2022-12-01 19:51:10 +00:00
for handle in lines:
2022-12-01 19:44:12 +00:00
# nickname@domain
page_ctr += 1
total_ctr += 1
if curr_page == page_number:
line2_lower = handle.lower()
line2 = remove_eol(line2_lower)
2023-05-17 13:45:28 +00:00
url = None
if '@' in line2:
nick = line2.split('@')[0]
dom = line2.split('@')[1]
if not nick.startswith('!'):
# person actor
url = local_actor_url(http_prefix, nick, dom)
else:
# group actor
url = http_prefix + '://' + dom + '/c/' + nick
2022-12-01 19:44:12 +00:00
else:
2023-05-17 13:45:28 +00:00
if '://' in line2:
url = remove_eol(handle)
if url:
following['orderedItems'].append(url)
2022-12-01 19:44:12 +00:00
if page_ctr >= follows_per_page:
page_ctr = 0
curr_page += 1
following['totalItems'] = total_ctr
last_page = int(total_ctr / follows_per_page)
last_page = max(last_page, 1)
if next_page_number > last_page:
following['next'] = \
local_actor_url(http_prefix, nickname, domain) + \
'/inactive?page=' + str(last_page)
return following