__filename__ = "relationships.py" __author__ = "Bob Mottram" __license__ = "AGPL3+" __version__ = "1.5.0" __maintainer__ = "Bob Mottram" __email__ = "bob@libreserver.org" __status__ = "Production" __module_group__ = "Core" import os from utils import data_dir from utils import get_user_paths from utils import is_dormant from utils import acct_dir from utils import valid_nickname from utils import get_full_domain from utils import local_actor_url from utils import remove_domain_port from utils import remove_eol from utils import is_account_dir from utils import get_nickname_from_actor from utils import get_domain_from_actor from utils import load_json def get_moved_accounts(base_dir: str, nickname: str, domain: str, filename: str) -> {}: """returns a dict of moved accounts """ moved_accounts_filename = data_dir(base_dir) + '/actors_moved.txt' if not os.path.isfile(moved_accounts_filename): return {} refollow_str = '' try: with open(moved_accounts_filename, 'r', encoding='utf-8') as fp_refollow: refollow_str = fp_refollow.read() except OSError: print('EX: get_moved_accounts unable to read 1 ' + moved_accounts_filename) refollow_list = refollow_str.split('\n') refollow_dict = {} follow_filename = \ acct_dir(base_dir, nickname, domain) + '/' + filename follow_str = '' try: with open(follow_filename, 'r', encoding='utf-8') as fp_follow: follow_str = fp_follow.read() except OSError: print('EX: get_moved_accounts unable to read 2 ' + follow_filename) follow_list = follow_str.split('\n') ctr = 0 for line in refollow_list: if ' ' not in line: continue prev_handle = line.split(' ')[0] new_handle = line.split(' ')[1] refollow_dict[prev_handle] = new_handle ctr = ctr + 1 result = {} for handle in follow_list: if refollow_dict.get(handle): if refollow_dict[handle] not in follow_list: result[handle] = refollow_dict[handle] return result def get_moved_feed(base_dir: str, domain: str, port: int, path: str, http_prefix: str, authorized: bool, follows_per_page=12) -> {}: """Returns the moved accounts feed from GET requests. """ # Don't show moved accounts to non-authorized viewers if not authorized: follows_per_page = 0 if '/moved' not in path: return None if '?page=' not in path: path = path.replace('/moved', '/moved?page=true') # handle page numbers header_only = True page_number = None if '?page=' in path: page_number = path.split('?page=')[1] if len(page_number) > 5: page_number = "1" if page_number == 'true' or not authorized: page_number = 1 else: try: page_number = int(page_number) except BaseException: print('EX: get_moved_feed unable to convert to int ' + str(page_number)) path = path.split('?page=')[0] header_only = False if not path.endswith('/moved'): return None nickname = None if path.startswith('/users/'): nickname = \ path.replace('/users/', '', 1).replace('/moved', '') if path.startswith('/@'): if '/@/' not in path: nickname = path.replace('/@', '', 1).replace('/moved', '') if not nickname: return None if not valid_nickname(domain, nickname): return None domain = get_full_domain(domain, port) lines = get_moved_accounts(base_dir, nickname, domain, 'following.txt') if header_only: first_str = \ local_actor_url(http_prefix, nickname, domain) + \ '/moved?page=1' id_str = \ local_actor_url(http_prefix, nickname, domain) + '/moved' total_str = str(len(lines.items())) following = { '@context': 'https://www.w3.org/ns/activitystreams', 'first': first_str, 'id': id_str, 'orderedItems': [], 'totalItems': total_str, 'type': 'OrderedCollection' } return following if not page_number: page_number = 1 next_page_number = int(page_number + 1) id_str = \ local_actor_url(http_prefix, nickname, domain) + \ '/moved?page=' + str(page_number) part_of_str = \ local_actor_url(http_prefix, nickname, domain) + '/moved' following = { '@context': 'https://www.w3.org/ns/activitystreams', 'id': id_str, 'orderedItems': [], 'partOf': part_of_str, 'totalItems': 0, 'type': 'OrderedCollectionPage' } handle_domain = domain handle_domain = remove_domain_port(handle_domain) curr_page = 1 page_ctr = 0 total_ctr = 0 for handle, _ in lines.items(): # nickname@domain page_ctr += 1 total_ctr += 1 if curr_page == page_number: line2_lower = handle.lower() line2 = remove_eol(line2_lower) url = None if '@' in line2: nick = line2.split('@')[0] dom = line2.split('@')[1] if not nick.startswith('!'): # person actor url = local_actor_url(http_prefix, nick, dom) else: # group actor url = http_prefix + '://' + dom + '/c/' + nick else: if '://' in line2: url = remove_eol(handle) if url: following['orderedItems'].append(url) if page_ctr >= follows_per_page: page_ctr = 0 curr_page += 1 following['totalItems'] = total_ctr last_page = int(total_ctr / follows_per_page) last_page = max(last_page, 1) if next_page_number > last_page: following['next'] = \ local_actor_url(http_prefix, nickname, domain) + \ '/moved?page=' + str(last_page) return following def update_moved_actors(base_dir: str, debug: bool) -> None: """Updates the file containing moved actors """ actors_cache_dir = base_dir + '/cache/actors' if not os.path.isdir(actors_cache_dir): if debug: print('No cached actors') return if debug: print('Updating moved actors') actors_dict = {} ctr = 0 for _, _, files in os.walk(actors_cache_dir): for actor_str in files: if not actor_str.endswith('.json'): continue orig_str = actor_str actor_str = actor_str.replace('.json', '').replace('#', '/') nickname = get_nickname_from_actor(actor_str) domain, port = get_domain_from_actor(actor_str) if not domain: continue domain_full = get_full_domain(domain, port) handle = nickname + '@' + domain_full actors_dict[handle] = orig_str ctr += 1 break if actors_dict: print('Actors dict created ' + str(ctr)) else: print('No cached actors found') # get the handles to be checked for movedTo attribute handles_to_check = [] dir_str = data_dir(base_dir) for _, dirs, _ in os.walk(dir_str): for account in dirs: if not is_account_dir(account): continue following_filename = dir_str + '/' + account + '/following.txt' if not os.path.isfile(following_filename): continue following_str = '' try: with open(following_filename, 'r', encoding='utf-8') as fp_foll: following_str = fp_foll.read() except OSError: print('EX: update_moved_actors unable to read ' + following_filename) continue following_list = following_str.split('\n') for handle in following_list: if handle not in handles_to_check: handles_to_check.append(handle) break if handles_to_check: print('All accounts handles list generated ' + str(len(handles_to_check))) else: print('No accounts are following') moved_str = '' ctr = 0 for handle in handles_to_check: if not actors_dict.get(handle): continue actor_filename = base_dir + '/cache/actors/' + actors_dict[handle] if not os.path.isfile(actor_filename): continue actor_json = load_json(actor_filename) if not actor_json: continue if not actor_json.get('movedTo'): continue nickname = get_nickname_from_actor(actor_json['movedTo']) if not nickname: continue domain, port = get_domain_from_actor(actor_json['movedTo']) if not domain: continue domain_full = get_full_domain(domain, port) new_handle = nickname + '@' + domain_full moved_str += handle + ' ' + new_handle + '\n' ctr = ctr + 1 if moved_str: print('Moved accounts detected ' + str(ctr)) else: print('No moved accounts detected') moved_accounts_filename = data_dir(base_dir) + '/actors_moved.txt' if not moved_str: if os.path.isfile(moved_accounts_filename): try: os.remove(moved_accounts_filename) except OSError: print('EX: update_moved_actors unable to remove ' + moved_accounts_filename) return try: with open(moved_accounts_filename, 'w+', encoding='utf-8') as fp_moved: fp_moved.write(moved_str) except OSError: print('EX: update_moved_actors unable to save ' + moved_accounts_filename) def _get_inactive_accounts(base_dir: str, nickname: str, domain: str, dormant_months: int, sites_unavailable: []) -> []: """returns a list of inactive accounts """ # get the list of followers followers_filename = \ acct_dir(base_dir, nickname, domain) + '/followers.txt' followers_str = '' try: with open(followers_filename, 'r', encoding='utf-8') as fp_follow: followers_str = fp_follow.read() except OSError: print('EX: get_moved_accounts unable to read ' + followers_filename) followers_list = followers_str.split('\n') result = [] users_list = get_user_paths() for handle in followers_list: if handle in result: continue if '@' in handle: follower_nickname = handle.split('@')[0] follower_domain = handle.split('@')[1] if follower_domain in sites_unavailable: result.append(handle) continue found = False for http_prefix in ('https://', 'http://'): for users_str in users_list: actor = \ http_prefix + follower_domain + users_str + \ follower_nickname if is_dormant(base_dir, nickname, domain, actor, dormant_months): result.append(handle) found = True break if not found: actor = \ http_prefix + follower_domain + '/' + \ follower_nickname if is_dormant(base_dir, nickname, domain, actor, dormant_months): result.append(handle) found = True if found: break elif '://' in handle: actor = handle follower_domain = actor.split('://')[1] if '/' in follower_domain: follower_domain = follower_domain.split('/')[0] if follower_domain in sites_unavailable: result.append(actor) continue if is_dormant(base_dir, nickname, domain, actor, dormant_months): result.append(actor) return result def get_inactive_feed(base_dir: str, domain: str, port: int, path: str, http_prefix: str, authorized: bool, dormant_months: int, follows_per_page: int, sites_unavailable: []) -> {}: """Returns the inactive accounts feed from GET requests. """ # Don't show inactive accounts to non-authorized viewers if not authorized: follows_per_page = 0 if '/inactive' not in path: return None if '?page=' not in path: path = path.replace('/inactive', '/inactive?page=true') # handle page numbers header_only = True page_number = None if '?page=' in path: page_number = path.split('?page=')[1] if len(page_number) > 5: page_number = "1" if page_number == 'true' or not authorized: page_number = 1 else: try: page_number = int(page_number) except BaseException: print('EX: get_inactive_feed unable to convert to int ' + str(page_number)) path = path.split('?page=')[0] header_only = False if not path.endswith('/inactive'): return None nickname = None if path.startswith('/users/'): nickname = \ path.replace('/users/', '', 1).replace('/inactive', '') if path.startswith('/@'): if '/@/' not in path: nickname = path.replace('/@', '', 1).replace('/inactive', '') if not nickname: return None if not valid_nickname(domain, nickname): return None domain = get_full_domain(domain, port) lines = _get_inactive_accounts(base_dir, nickname, domain, dormant_months, sites_unavailable) if header_only: first_str = \ local_actor_url(http_prefix, nickname, domain) + \ '/moved?page=1' id_str = \ local_actor_url(http_prefix, nickname, domain) + '/inactive' total_str = str(len(lines)) following = { '@context': 'https://www.w3.org/ns/activitystreams', 'first': first_str, 'id': id_str, 'orderedItems': [], 'totalItems': total_str, 'type': 'OrderedCollection' } return following if not page_number: page_number = 1 next_page_number = int(page_number + 1) id_str = \ local_actor_url(http_prefix, nickname, domain) + \ '/inactive?page=' + str(page_number) part_of_str = \ local_actor_url(http_prefix, nickname, domain) + '/inactive' following = { '@context': 'https://www.w3.org/ns/activitystreams', 'id': id_str, 'orderedItems': [], 'partOf': part_of_str, 'totalItems': 0, 'type': 'OrderedCollectionPage' } handle_domain = domain handle_domain = remove_domain_port(handle_domain) curr_page = 1 page_ctr = 0 total_ctr = 0 for handle in lines: # nickname@domain page_ctr += 1 total_ctr += 1 if curr_page == page_number: line2_lower = handle.lower() line2 = remove_eol(line2_lower) url = None if '@' in line2: nick = line2.split('@')[0] dom = line2.split('@')[1] if not nick.startswith('!'): # person actor url = local_actor_url(http_prefix, nick, dom) else: # group actor url = http_prefix + '://' + dom + '/c/' + nick else: if '://' in line2: url = remove_eol(handle) if url: following['orderedItems'].append(url) if page_ctr >= follows_per_page: page_ctr = 0 curr_page += 1 following['totalItems'] = total_ctr last_page = int(total_ctr / follows_per_page) last_page = max(last_page, 1) if next_page_number > last_page: following['next'] = \ local_actor_url(http_prefix, nickname, domain) + \ '/inactive?page=' + str(last_page) return following