epicyon/webapp_hashtagswarm.py

260 lines
9.7 KiB
Python

__filename__ = "webapp_hashtagswarm.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.5.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Web Interface"
import os
from datetime import datetime, timezone
from utils import get_nickname_from_actor
from utils import get_config_param
from utils import escape_text
from utils import date_utcnow
from utils import date_epoch
from categories import get_hashtag_categories
from categories import get_hashtag_category
from webapp_utils import set_custom_background
from webapp_utils import get_search_banner_file
from webapp_utils import get_content_warning_button
from webapp_utils import html_header_with_external_style
from webapp_utils import html_footer
def get_hashtag_categories_feed(base_dir: str,
hashtag_categories: {} = None) -> str:
"""Returns an rss feed for hashtag categories
"""
if not hashtag_categories:
hashtag_categories = get_hashtag_categories(base_dir, False, None)
if not hashtag_categories:
return None
rss_str = \
"<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" + \
"<rss version=\"2.0\">\n" + \
'<channel>\n' + \
' <title>#categories</title>\n'
rss_date_str = \
date_utcnow().strftime("%a, %d %b %Y %H:%M:%S UT")
for category_str, hashtag_list in hashtag_categories.items():
rss_str += \
'<item>\n' + \
' <title>' + escape_text(category_str) + '</title>\n'
list_str = ''
for hashtag in hashtag_list:
if ':' in hashtag:
continue
if '&' in hashtag:
continue
list_str += hashtag + ' '
rss_str += \
' <description>' + \
escape_text(list_str.strip()) + '</description>\n' + \
' <link/>\n' + \
' <pubDate>' + rss_date_str + '</pubDate>\n' + \
'</item>\n'
rss_str += \
'</channel>\n' + \
'</rss>\n'
return rss_str
def html_hash_tag_swarm(base_dir: str, actor: str, translate: {}) -> str:
"""Returns a tag swarm of today's hashtags
"""
max_tag_length = 42
curr_time = date_utcnow()
prev_time_epoch = date_epoch()
days_since_epoch = (curr_time - prev_time_epoch).days
days_since_epoch_str = str(days_since_epoch) + ' '
days_since_epoch_str2 = str(days_since_epoch - 1) + ' '
recently = days_since_epoch - 1
tag_swarm = []
category_swarm = []
domain_histogram = {}
# Load the blocked hashtags into memory.
# This avoids needing to repeatedly load the blocked file for each hashtag
blocked_str = ''
global_blocking_filename = base_dir + '/accounts/blocking.txt'
if os.path.isfile(global_blocking_filename):
with open(global_blocking_filename, 'r',
encoding='utf-8') as fp_block:
blocked_str = fp_block.read()
for _, _, files in os.walk(base_dir + '/tags'):
for fname in files:
if not fname.endswith('.txt'):
continue
tags_filename = os.path.join(base_dir + '/tags', fname)
if not os.path.isfile(tags_filename):
continue
# get last modified datetime
mod_time_since_epoc = os.path.getmtime(tags_filename)
last_modified_date = \
datetime.fromtimestamp(mod_time_since_epoc,
timezone.utc)
file_days_since_epoch = \
(last_modified_date - prev_time_epoch).days
# check if the file was last modified within the previous
# two days
if file_days_since_epoch < recently:
continue
hash_tag_name = fname.split('.')[0]
if len(hash_tag_name) > max_tag_length:
# NoIncrediblyLongAndBoringHashtagsShownHere
continue
if '#' in hash_tag_name or \
'&' in hash_tag_name or \
'"' in hash_tag_name or \
"'" in hash_tag_name:
continue
if '#' + hash_tag_name + '\n' in blocked_str:
continue
with open(tags_filename, 'r', encoding='utf-8') as fp_tags:
# only read one line, which saves time and memory
last_tag = fp_tags.readline()
if not last_tag.startswith(days_since_epoch_str):
if not last_tag.startswith(days_since_epoch_str2):
continue
with open(tags_filename, 'r', encoding='utf-8') as fp_tags:
while True:
line = fp_tags.readline()
if not line:
break
if ' ' not in line:
break
sections = line.split(' ')
if len(sections) != 3:
break
post_days_since_epoch_str = sections[0]
if not post_days_since_epoch_str.isdigit():
break
post_days_since_epoch = int(post_days_since_epoch_str)
if post_days_since_epoch < recently:
break
post_url = sections[2]
if '##' not in post_url:
break
post_domain = post_url.split('##')[1]
if '#' in post_domain:
post_domain = post_domain.split('#')[0]
if domain_histogram.get(post_domain):
domain_histogram[post_domain] = \
domain_histogram[post_domain] + 1
else:
domain_histogram[post_domain] = 1
tag_swarm.append(hash_tag_name)
category_filename = \
tags_filename.replace('.txt', '.category')
if os.path.isfile(category_filename):
category_str = \
get_hashtag_category(base_dir, hash_tag_name)
if len(category_str) < max_tag_length:
if '#' not in category_str and \
'&' not in category_str and \
'"' not in category_str and \
"'" not in category_str:
if category_str not in category_swarm:
category_swarm.append(category_str)
break
break
if not tag_swarm:
return ''
tag_swarm.sort()
# swarm of categories
category_swarm_str = ''
if category_swarm:
if len(category_swarm) > 3:
category_swarm.sort()
for category_str in category_swarm:
category_swarm_str += \
'<a href="' + actor + '/category/' + category_str + \
'" class="hashtagswarm"><b>' + category_str + '</b></a>\n'
category_swarm_str += '<br>\n'
# swarm of tags
tag_swarm_str = ''
for tag_name in tag_swarm:
tag_display_name = tag_name
tag_map_filename = \
os.path.join(base_dir + '/tagmaps', tag_name + '.txt')
if os.path.isfile(tag_map_filename):
tag_display_name = '📌' + tag_name
tag_swarm_str += \
'<a href="' + actor + '/tags/' + tag_name + \
'" class="hashtagswarm">' + tag_display_name + '</a>\n'
if category_swarm_str:
tag_swarm_str = \
get_content_warning_button('alltags', translate, tag_swarm_str)
tag_swarm_html = category_swarm_str + tag_swarm_str.strip() + '\n'
return tag_swarm_html
def html_search_hashtag_category(translate: {},
base_dir: str, path: str, domain: str,
theme: str) -> str:
"""Show hashtags after selecting a category on the main search screen
"""
actor = path.split('/category/')[0]
category_str = path.split('/category/')[1].strip()
search_nickname = get_nickname_from_actor(actor)
if not search_nickname:
return ''
set_custom_background(base_dir, 'search-background', 'follow-background')
css_filename = base_dir + '/epicyon-search.css'
if os.path.isfile(base_dir + '/search.css'):
css_filename = base_dir + '/search.css'
instance_title = \
get_config_param(base_dir, 'instanceTitle')
html_str = \
html_header_with_external_style(css_filename, instance_title, None)
# show a banner above the search box
search_banner_file, search_banner_filename = \
get_search_banner_file(base_dir, search_nickname, domain, theme)
if os.path.isfile(search_banner_filename):
html_str += '<a href="' + actor + '/search">\n'
html_str += '<img loading="lazy" decoding="async" ' + \
'class="timeline-banner" src="' + \
actor + '/' + search_banner_file + '" alt="" /></a>\n'
html_str += \
'<div class="follow">' + \
'<center><br><br><br>' + \
'<h1><a href="' + actor + '/search"><b>' + \
translate['Category'] + ': ' + category_str + '</b></a></h1>'
hashtags_dict = get_hashtag_categories(base_dir, True, category_str)
if hashtags_dict:
for _, hashtag_list in hashtags_dict.items():
hashtag_list.sort()
for tag_name in hashtag_list:
html_str += \
'<a href="' + actor + '/tags/' + tag_name + \
'" class="hashtagswarm">' + tag_name + '</a>\n'
html_str += \
'</center>' + \
'</div>'
html_str += html_footer()
return html_str