mirror of https://gitlab.com/bashrc2/epicyon
Move hashtag functions to hashtag module
parent
89f6e3d26d
commit
36915c35d3
180
inbox.py
180
inbox.py
|
@ -21,9 +21,7 @@ from reaction import valid_emoji_content
|
|||
from utils import harmless_markup
|
||||
from utils import quote_toots_allowed
|
||||
from utils import lines_in_file
|
||||
from utils import resembles_url
|
||||
from utils import get_url_from_post
|
||||
from utils import date_from_string_format
|
||||
from utils import date_epoch
|
||||
from utils import date_utcnow
|
||||
from utils import contains_statuses
|
||||
|
@ -44,7 +42,6 @@ from utils import is_system_account
|
|||
from utils import invalid_ciphertext
|
||||
from utils import contains_private_key
|
||||
from utils import remove_html
|
||||
from utils import file_last_modified
|
||||
from utils import has_object_string
|
||||
from utils import has_object_string_object
|
||||
from utils import get_reply_interval_hours
|
||||
|
@ -65,7 +62,6 @@ from utils import remove_id_ending
|
|||
from utils import get_protocol_prefixes
|
||||
from utils import is_blog_post
|
||||
from utils import remove_avatar_from_cache
|
||||
from utils import is_public_post
|
||||
from utils import get_cached_post_filename
|
||||
from utils import remove_post_from_cache
|
||||
from utils import url_permitted
|
||||
|
@ -83,13 +79,10 @@ from utils import undo_reaction_collection_entry
|
|||
from utils import has_group_type
|
||||
from utils import local_actor_url
|
||||
from utils import has_object_string_type
|
||||
from utils import valid_hash_tag
|
||||
from utils import get_attributed_to
|
||||
from utils import get_reply_to
|
||||
from utils import get_actor_from_post
|
||||
from utils import data_dir
|
||||
from categories import get_hashtag_categories
|
||||
from categories import set_hashtag_category
|
||||
from httpsig import get_digest_algorithm_from_headers
|
||||
from httpsig import verify_post_headers
|
||||
from session import create_session
|
||||
|
@ -149,15 +142,13 @@ from git import is_git_patch
|
|||
from git import receive_git_patch
|
||||
from followingCalendar import receiving_calendar_events
|
||||
from happening import save_event_post
|
||||
from delete import remove_old_hashtags
|
||||
from categories import guess_hashtag_category
|
||||
from context import has_valid_context
|
||||
from speaker import update_speaker
|
||||
from announce import is_self_announce
|
||||
from announce import create_announce
|
||||
from notifyOnPost import notify_when_person_posts
|
||||
from conversation import update_conversation
|
||||
from webapp_hashtagswarm import html_hash_tag_swarm
|
||||
from webapp_hashtagswarm import store_hash_tags
|
||||
from person import valid_sending_actor
|
||||
from person import get_person_avatar_url
|
||||
from fitnessFunctions import fitness_performance
|
||||
|
@ -166,10 +157,6 @@ from content import reject_twitter_summary
|
|||
from content import load_dogwhistles
|
||||
from content import valid_url_lengths
|
||||
from threads import begin_thread
|
||||
from maps import get_map_links_from_post_content
|
||||
from maps import get_location_from_post
|
||||
from maps import add_tag_map_links
|
||||
from maps import geocoords_from_map_link
|
||||
from reading import store_book_events
|
||||
|
||||
|
||||
|
@ -207,171 +194,6 @@ def _store_last_post_id(base_dir: str, nickname: str, domain: str,
|
|||
print('EX: Unable to write last post id to ' + actor_filename)
|
||||
|
||||
|
||||
def _update_cached_hashtag_swarm(base_dir: str, nickname: str, domain: str,
|
||||
http_prefix: str, domain_full: str,
|
||||
translate: {}) -> bool:
|
||||
"""Updates the hashtag swarm stored as a file
|
||||
"""
|
||||
cached_hashtag_swarm_filename = \
|
||||
acct_dir(base_dir, nickname, domain) + '/.hashtagSwarm'
|
||||
save_swarm = True
|
||||
if os.path.isfile(cached_hashtag_swarm_filename):
|
||||
last_modified = file_last_modified(cached_hashtag_swarm_filename)
|
||||
modified_date = None
|
||||
try:
|
||||
modified_date = \
|
||||
date_from_string_format(last_modified, ["%Y-%m-%dT%H:%M:%S%z"])
|
||||
except BaseException:
|
||||
print('EX: unable to parse last modified cache date ' +
|
||||
str(last_modified))
|
||||
if modified_date:
|
||||
curr_date = date_utcnow()
|
||||
time_diff = curr_date - modified_date
|
||||
diff_mins = int(time_diff.total_seconds() / 60)
|
||||
if diff_mins < 30:
|
||||
# was saved recently, so don't save again
|
||||
# This avoids too much disk I/O
|
||||
save_swarm = False
|
||||
print('Not updating hashtag swarm')
|
||||
else:
|
||||
print('Updating cached hashtag swarm, last changed ' +
|
||||
str(diff_mins) + ' minutes ago')
|
||||
else:
|
||||
print('WARN: no modified date for ' + str(last_modified))
|
||||
if save_swarm:
|
||||
actor = local_actor_url(http_prefix, nickname, domain_full)
|
||||
new_swarm_str = html_hash_tag_swarm(base_dir, actor, translate)
|
||||
if new_swarm_str:
|
||||
try:
|
||||
with open(cached_hashtag_swarm_filename, 'w+',
|
||||
encoding='utf-8') as fp_swarm:
|
||||
fp_swarm.write(new_swarm_str)
|
||||
return True
|
||||
except OSError:
|
||||
print('EX: unable to write cached hashtag swarm ' +
|
||||
cached_hashtag_swarm_filename)
|
||||
remove_old_hashtags(base_dir, 3)
|
||||
return False
|
||||
|
||||
|
||||
def store_hash_tags(base_dir: str, nickname: str, domain: str,
|
||||
http_prefix: str, domain_full: str,
|
||||
post_json_object: {}, translate: {}) -> None:
|
||||
"""Extracts hashtags from an incoming post and updates the
|
||||
relevant tags files.
|
||||
"""
|
||||
if not is_public_post(post_json_object):
|
||||
return
|
||||
if not has_object_dict(post_json_object):
|
||||
return
|
||||
if not post_json_object['object'].get('tag'):
|
||||
return
|
||||
if not post_json_object.get('id'):
|
||||
return
|
||||
if not isinstance(post_json_object['object']['tag'], list):
|
||||
return
|
||||
tags_dir = base_dir + '/tags'
|
||||
|
||||
# add tags directory if it doesn't exist
|
||||
if not os.path.isdir(tags_dir):
|
||||
print('Creating tags directory')
|
||||
os.mkdir(tags_dir)
|
||||
|
||||
# obtain any map links and these can be associated with hashtags
|
||||
# get geolocations from content
|
||||
map_links = []
|
||||
published = None
|
||||
if 'content' in post_json_object['object']:
|
||||
published = post_json_object['object']['published']
|
||||
post_content = post_json_object['object']['content']
|
||||
map_links += get_map_links_from_post_content(post_content)
|
||||
# get geolocation from tags
|
||||
location_str = get_location_from_post(post_json_object)
|
||||
if location_str:
|
||||
if resembles_url(location_str):
|
||||
zoom, latitude, longitude = \
|
||||
geocoords_from_map_link(location_str,
|
||||
'openstreetmap.org')
|
||||
if latitude and longitude and zoom and \
|
||||
location_str not in map_links:
|
||||
map_links.append(location_str)
|
||||
tag_maps_dir = base_dir + '/tagmaps'
|
||||
if map_links:
|
||||
# add tagmaps directory if it doesn't exist
|
||||
if not os.path.isdir(tag_maps_dir):
|
||||
print('Creating tagmaps directory')
|
||||
os.mkdir(tag_maps_dir)
|
||||
|
||||
post_url = remove_id_ending(post_json_object['id'])
|
||||
post_url = post_url.replace('/', '#')
|
||||
hashtags_ctr = 0
|
||||
for tag in post_json_object['object']['tag']:
|
||||
if not tag.get('type'):
|
||||
continue
|
||||
if not isinstance(tag['type'], str):
|
||||
continue
|
||||
if tag['type'] != 'Hashtag':
|
||||
continue
|
||||
if not tag.get('name'):
|
||||
continue
|
||||
tag_name = tag['name'].replace('#', '').strip()
|
||||
if not valid_hash_tag(tag_name):
|
||||
continue
|
||||
tags_filename = tags_dir + '/' + tag_name + '.txt'
|
||||
days_diff = date_utcnow() - date_epoch()
|
||||
days_since_epoch = days_diff.days
|
||||
tag_line = \
|
||||
str(days_since_epoch) + ' ' + nickname + ' ' + post_url + '\n'
|
||||
if map_links and published:
|
||||
add_tag_map_links(tag_maps_dir, tag_name, map_links,
|
||||
published, post_url)
|
||||
hashtag_added = False
|
||||
if not os.path.isfile(tags_filename):
|
||||
try:
|
||||
with open(tags_filename, 'w+', encoding='utf-8') as fp_tags:
|
||||
fp_tags.write(tag_line)
|
||||
hashtag_added = True
|
||||
except OSError:
|
||||
print('EX: store_hash_tags unable to write ' + tags_filename)
|
||||
else:
|
||||
content = ''
|
||||
try:
|
||||
with open(tags_filename, 'r', encoding='utf-8') as fp_tags:
|
||||
content = fp_tags.read()
|
||||
except OSError:
|
||||
print('EX: store_hash_tags failed to read ' + tags_filename)
|
||||
if post_url not in content:
|
||||
content = tag_line + content
|
||||
try:
|
||||
with open(tags_filename, 'w+',
|
||||
encoding='utf-8') as fp_tags2:
|
||||
fp_tags2.write(content)
|
||||
hashtag_added = True
|
||||
except OSError as ex:
|
||||
print('EX: Failed to write entry to tags file ' +
|
||||
tags_filename + ' ' + str(ex))
|
||||
|
||||
if hashtag_added:
|
||||
hashtags_ctr += 1
|
||||
|
||||
# automatically assign a category to the tag if possible
|
||||
category_filename = tags_dir + '/' + tag_name + '.category'
|
||||
if not os.path.isfile(category_filename):
|
||||
hashtag_categories = \
|
||||
get_hashtag_categories(base_dir, False, None)
|
||||
category_str = \
|
||||
guess_hashtag_category(tag_name, hashtag_categories, 6)
|
||||
if category_str:
|
||||
set_hashtag_category(base_dir, tag_name,
|
||||
category_str, False, False)
|
||||
|
||||
# if some hashtags were found then recalculate the swarm
|
||||
# ready for later display
|
||||
if hashtags_ctr > 0:
|
||||
_update_cached_hashtag_swarm(base_dir, nickname, domain,
|
||||
http_prefix, domain_full, translate)
|
||||
|
||||
|
||||
def _inbox_store_post_to_html_cache(recent_posts_cache: {},
|
||||
max_recent_posts: int,
|
||||
translate: {},
|
||||
|
|
|
@ -37,9 +37,9 @@ from utils import dangerous_markup
|
|||
from utils import local_actor_url
|
||||
from utils import text_in_file
|
||||
from utils import data_dir
|
||||
from inbox import store_hash_tags
|
||||
from session import create_session
|
||||
from threads import begin_thread
|
||||
from webapp_hashtagswarm import store_hash_tags
|
||||
|
||||
|
||||
def _update_feeds_outbox_index(base_dir: str, domain: str,
|
||||
|
|
|
@ -47,7 +47,6 @@ from media import replace_you_tube
|
|||
from media import replace_twitter
|
||||
from media import get_media_path
|
||||
from media import create_media_dirs
|
||||
from inbox import store_hash_tags
|
||||
from inbox import inbox_update_index
|
||||
from announce import outbox_announce
|
||||
from announce import outbox_undo_announce
|
||||
|
@ -65,6 +64,7 @@ from delete import outbox_delete
|
|||
from shares import outbox_share_upload
|
||||
from shares import outbox_undo_share_upload
|
||||
from webapp_post import individual_post_as_html
|
||||
from webapp_hashtagswarm import store_hash_tags
|
||||
from speaker import update_speaker
|
||||
from reading import store_book_events
|
||||
from reading import has_edition_tag
|
||||
|
|
|
@ -9,6 +9,15 @@ __module_group__ = "Web Interface"
|
|||
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from utils import valid_hash_tag
|
||||
from utils import remove_id_ending
|
||||
from utils import resembles_url
|
||||
from utils import has_object_dict
|
||||
from utils import is_public_post
|
||||
from utils import local_actor_url
|
||||
from utils import date_from_string_format
|
||||
from utils import file_last_modified
|
||||
from utils import acct_dir
|
||||
from utils import data_dir
|
||||
from utils import get_nickname_from_actor
|
||||
from utils import get_config_param
|
||||
|
@ -16,6 +25,13 @@ from utils import escape_text
|
|||
from utils import date_utcnow
|
||||
from utils import date_epoch
|
||||
from utils import string_contains
|
||||
from delete import remove_old_hashtags
|
||||
from maps import add_tag_map_links
|
||||
from maps import geocoords_from_map_link
|
||||
from maps import get_map_links_from_post_content
|
||||
from maps import get_location_from_post
|
||||
from categories import set_hashtag_category
|
||||
from categories import guess_hashtag_category
|
||||
from categories import get_hashtag_categories
|
||||
from categories import get_hashtag_category
|
||||
from webapp_utils import set_custom_background
|
||||
|
@ -271,3 +287,168 @@ def html_search_hashtag_category(translate: {},
|
|||
'</div>'
|
||||
html_str += html_footer()
|
||||
return html_str
|
||||
|
||||
|
||||
def _update_cached_hashtag_swarm(base_dir: str, nickname: str, domain: str,
|
||||
http_prefix: str, domain_full: str,
|
||||
translate: {}) -> bool:
|
||||
"""Updates the hashtag swarm stored as a file
|
||||
"""
|
||||
cached_hashtag_swarm_filename = \
|
||||
acct_dir(base_dir, nickname, domain) + '/.hashtagSwarm'
|
||||
save_swarm = True
|
||||
if os.path.isfile(cached_hashtag_swarm_filename):
|
||||
last_modified = file_last_modified(cached_hashtag_swarm_filename)
|
||||
modified_date = None
|
||||
try:
|
||||
modified_date = \
|
||||
date_from_string_format(last_modified, ["%Y-%m-%dT%H:%M:%S%z"])
|
||||
except BaseException:
|
||||
print('EX: unable to parse last modified cache date ' +
|
||||
str(last_modified))
|
||||
if modified_date:
|
||||
curr_date = date_utcnow()
|
||||
time_diff = curr_date - modified_date
|
||||
diff_mins = int(time_diff.total_seconds() / 60)
|
||||
if diff_mins < 30:
|
||||
# was saved recently, so don't save again
|
||||
# This avoids too much disk I/O
|
||||
save_swarm = False
|
||||
print('Not updating hashtag swarm')
|
||||
else:
|
||||
print('Updating cached hashtag swarm, last changed ' +
|
||||
str(diff_mins) + ' minutes ago')
|
||||
else:
|
||||
print('WARN: no modified date for ' + str(last_modified))
|
||||
if save_swarm:
|
||||
actor = local_actor_url(http_prefix, nickname, domain_full)
|
||||
new_swarm_str = html_hash_tag_swarm(base_dir, actor, translate)
|
||||
if new_swarm_str:
|
||||
try:
|
||||
with open(cached_hashtag_swarm_filename, 'w+',
|
||||
encoding='utf-8') as fp_swarm:
|
||||
fp_swarm.write(new_swarm_str)
|
||||
return True
|
||||
except OSError:
|
||||
print('EX: unable to write cached hashtag swarm ' +
|
||||
cached_hashtag_swarm_filename)
|
||||
remove_old_hashtags(base_dir, 3)
|
||||
return False
|
||||
|
||||
|
||||
def store_hash_tags(base_dir: str, nickname: str, domain: str,
|
||||
http_prefix: str, domain_full: str,
|
||||
post_json_object: {}, translate: {}) -> None:
|
||||
"""Extracts hashtags from an incoming post and updates the
|
||||
relevant tags files.
|
||||
"""
|
||||
if not is_public_post(post_json_object):
|
||||
return
|
||||
if not has_object_dict(post_json_object):
|
||||
return
|
||||
if not post_json_object['object'].get('tag'):
|
||||
return
|
||||
if not post_json_object.get('id'):
|
||||
return
|
||||
if not isinstance(post_json_object['object']['tag'], list):
|
||||
return
|
||||
tags_dir = base_dir + '/tags'
|
||||
|
||||
# add tags directory if it doesn't exist
|
||||
if not os.path.isdir(tags_dir):
|
||||
print('Creating tags directory')
|
||||
os.mkdir(tags_dir)
|
||||
|
||||
# obtain any map links and these can be associated with hashtags
|
||||
# get geolocations from content
|
||||
map_links = []
|
||||
published = None
|
||||
if 'content' in post_json_object['object']:
|
||||
published = post_json_object['object']['published']
|
||||
post_content = post_json_object['object']['content']
|
||||
map_links += get_map_links_from_post_content(post_content)
|
||||
# get geolocation from tags
|
||||
location_str = get_location_from_post(post_json_object)
|
||||
if location_str:
|
||||
if resembles_url(location_str):
|
||||
zoom, latitude, longitude = \
|
||||
geocoords_from_map_link(location_str,
|
||||
'openstreetmap.org')
|
||||
if latitude and longitude and zoom and \
|
||||
location_str not in map_links:
|
||||
map_links.append(location_str)
|
||||
tag_maps_dir = base_dir + '/tagmaps'
|
||||
if map_links:
|
||||
# add tagmaps directory if it doesn't exist
|
||||
if not os.path.isdir(tag_maps_dir):
|
||||
print('Creating tagmaps directory')
|
||||
os.mkdir(tag_maps_dir)
|
||||
|
||||
post_url = remove_id_ending(post_json_object['id'])
|
||||
post_url = post_url.replace('/', '#')
|
||||
hashtags_ctr = 0
|
||||
for tag in post_json_object['object']['tag']:
|
||||
if not tag.get('type'):
|
||||
continue
|
||||
if not isinstance(tag['type'], str):
|
||||
continue
|
||||
if tag['type'] != 'Hashtag':
|
||||
continue
|
||||
if not tag.get('name'):
|
||||
continue
|
||||
tag_name = tag['name'].replace('#', '').strip()
|
||||
if not valid_hash_tag(tag_name):
|
||||
continue
|
||||
tags_filename = tags_dir + '/' + tag_name + '.txt'
|
||||
days_diff = date_utcnow() - date_epoch()
|
||||
days_since_epoch = days_diff.days
|
||||
tag_line = \
|
||||
str(days_since_epoch) + ' ' + nickname + ' ' + post_url + '\n'
|
||||
if map_links and published:
|
||||
add_tag_map_links(tag_maps_dir, tag_name, map_links,
|
||||
published, post_url)
|
||||
hashtag_added = False
|
||||
if not os.path.isfile(tags_filename):
|
||||
try:
|
||||
with open(tags_filename, 'w+', encoding='utf-8') as fp_tags:
|
||||
fp_tags.write(tag_line)
|
||||
hashtag_added = True
|
||||
except OSError:
|
||||
print('EX: store_hash_tags unable to write ' + tags_filename)
|
||||
else:
|
||||
content = ''
|
||||
try:
|
||||
with open(tags_filename, 'r', encoding='utf-8') as fp_tags:
|
||||
content = fp_tags.read()
|
||||
except OSError:
|
||||
print('EX: store_hash_tags failed to read ' + tags_filename)
|
||||
if post_url not in content:
|
||||
content = tag_line + content
|
||||
try:
|
||||
with open(tags_filename, 'w+',
|
||||
encoding='utf-8') as fp_tags2:
|
||||
fp_tags2.write(content)
|
||||
hashtag_added = True
|
||||
except OSError as ex:
|
||||
print('EX: Failed to write entry to tags file ' +
|
||||
tags_filename + ' ' + str(ex))
|
||||
|
||||
if hashtag_added:
|
||||
hashtags_ctr += 1
|
||||
|
||||
# automatically assign a category to the tag if possible
|
||||
category_filename = tags_dir + '/' + tag_name + '.category'
|
||||
if not os.path.isfile(category_filename):
|
||||
hashtag_categories = \
|
||||
get_hashtag_categories(base_dir, False, None)
|
||||
category_str = \
|
||||
guess_hashtag_category(tag_name, hashtag_categories, 6)
|
||||
if category_str:
|
||||
set_hashtag_category(base_dir, tag_name,
|
||||
category_str, False, False)
|
||||
|
||||
# if some hashtags were found then recalculate the swarm
|
||||
# ready for later display
|
||||
if hashtags_ctr > 0:
|
||||
_update_cached_hashtag_swarm(base_dir, nickname, domain,
|
||||
http_prefix, domain_full, translate)
|
||||
|
|
Loading…
Reference in New Issue