Checking for url strings

main
Bob Mottram 2024-01-27 17:04:21 +00:00
parent b129df0eaa
commit d312a52c26
12 changed files with 41 additions and 27 deletions

View File

@ -301,6 +301,7 @@ from languages import set_actor_languages
from languages import get_understood_languages
from like import update_likes_collection
from reaction import update_reaction_collection
from utils import resembles_url
from utils import get_url_from_post
from utils import date_from_string_format
from utils import corp_servers
@ -2347,8 +2348,7 @@ class PubServer(BaseHTTPRequestHandler):
if debug:
print('INBOX: checking that actor looks like a url')
actor_url = get_actor_from_post(message_json)
if '://' not in actor_url or \
'.' not in actor_url:
if not resembles_url(actor_url):
print('INBOX: POST actor does not look like a url ' +
actor_url)
self._400()
@ -7159,8 +7159,7 @@ class PubServer(BaseHTTPRequestHandler):
if fields['libretranslateUrl'] != \
curr_libretranslate_url:
lt_url = fields['libretranslateUrl']
if '://' in lt_url and \
'.' in lt_url:
if resembles_url(lt_url):
set_config_param(base_dir,
'libretranslateUrl',
lt_url)
@ -7551,8 +7550,7 @@ class PubServer(BaseHTTPRequestHandler):
moved_to = actor_json['movedTo']
if fields.get('movedTo'):
if fields['movedTo'] != moved_to and \
'://' in fields['movedTo'] and \
'.' in fields['movedTo']:
resembles_url(fields['movedTo']):
actor_json['movedTo'] = fields['movedTo']
send_move_activity = True
actor_changed = True
@ -7615,7 +7613,7 @@ class PubServer(BaseHTTPRequestHandler):
also_known_as = []
for alt_actor in new_also_known_as:
alt_actor = alt_actor.strip()
if '://' in alt_actor and '.' in alt_actor:
if resembles_url(alt_actor):
if alt_actor not in also_known_as:
also_known_as.append(alt_actor)
actor_json['alsoKnownAs'] = also_known_as

View File

@ -87,6 +87,7 @@ from utils import follow_person
from utils import valid_nickname
from utils import get_protocol_prefixes
from utils import acct_dir
from utils import resembles_url
from media import archive_media
from media import get_attachment_media_type
from delete import send_delete_via_server
@ -867,8 +868,7 @@ def _command_options() -> None:
# automatic translations
if argb.libretranslateUrl:
if '://' in argb.libretranslateUrl and \
'.' in argb.libretranslateUrl:
if resembles_url(argb.libretranslateUrl):
set_config_param(base_dir, 'libretranslateUrl',
argb.libretranslateUrl)
if argb.libretranslateApiKey:

View File

@ -18,6 +18,7 @@ from languages import understood_post_language
from like import update_likes_collection
from reaction import update_reaction_collection
from reaction import valid_emoji_content
from utils import resembles_url
from utils import get_url_from_post
from utils import date_from_string_format
from utils import date_epoch
@ -371,7 +372,7 @@ def store_hash_tags(base_dir: str, nickname: str, domain: str,
# get geolocation from tags
location_str = get_location_from_post(post_json_object)
if location_str:
if '://' in location_str and '.' in location_str:
if resembles_url(location_str):
zoom, latitude, longitude = geocoords_from_map_link(location_str)
if latitude and longitude and zoom and \
location_str not in map_links:

View File

@ -17,6 +17,7 @@ from utils import remove_html
from utils import has_object_dict
from utils import get_config_param
from utils import local_actor_url
from utils import resembles_url
from cache import get_person_from_cache
@ -209,7 +210,7 @@ def get_links_from_content(content: str) -> {}:
if '"' not in subsection:
continue
url = subsection.split('"')[1].strip()
if '://' in url and '.' in url and \
if resembles_url(url) and \
'>' in subsection:
if url not in links:
link_text = subsection.split('>')[1]

View File

@ -19,6 +19,7 @@ from datetime import timezone
from collections import OrderedDict
from utils import valid_post_date
from categories import set_hashtag_category
from utils import resembles_url
from utils import get_url_from_post
from utils import remove_zero_length_strings
from utils import date_from_string_format
@ -659,7 +660,7 @@ def xml_podcast_to_dict(base_dir: str, xml_item: str, xml_str: str) -> {}:
episode_image = episode_image.split('>')[1]
if '<' in episode_image:
episode_image = episode_image.split('<')[0]
if '://' in episode_image and '.' in episode_image:
if resembles_url(episode_image):
podcast_episode_image = episode_image
break
@ -756,7 +757,7 @@ def get_link_from_rss_item(rss_item: str,
link_str = enclosure.split('url="')[1]
if '"' in link_str:
link = link_str.split('"')[0]
if '://' in link:
if resembles_url(link):
return link, mime_type
if '<link>' in rss_item and '</link>' in rss_item:

View File

@ -159,7 +159,7 @@ def get_moved_feed(base_dir: str, domain: str, port: int, path: str,
curr_page = 1
page_ctr = 0
total_ctr = 0
for handle, new_handle in lines.items():
for handle, _ in lines.items():
# nickname@domain
page_ctr += 1
total_ctr += 1

View File

@ -23,6 +23,7 @@ from session import post_json
from session import post_image
from session import create_session
from session import get_json_valid
from utils import resembles_url
from utils import date_utcnow
from utils import dangerous_markup
from utils import remove_html
@ -2147,7 +2148,7 @@ def vf_proposal_from_share(shared_item: {},
"name": shared_item['location'].title()
}
if shared_item['imageUrl']:
if '://' in shared_item['imageUrl']:
if resembles_url(shared_item['imageUrl']):
file_extension = None
accepted_types = get_media_extensions()
for mtype in accepted_types:

View File

@ -147,8 +147,7 @@ def get_attributed_to(field) -> str:
if isinstance(attrib['type'], str) and \
isinstance(attrib['id'], str):
if attrib['type'] == 'Person' and \
'://' in attrib['id'] and \
'.' in attrib['id']:
resembles_url(attrib['id']):
return attrib['id']
if isinstance(field[0], str):
return field[0]
@ -4037,7 +4036,7 @@ def get_actor_from_post(post_json_object: {}) -> str:
if actor_id:
# looks vaguely like a url
if '://' in actor_id and '.' in actor_id:
if resembles_url(actor_id):
return actor_id
return ''
@ -4915,3 +4914,14 @@ def is_valid_date(date_str: str) -> bool:
return False
date_sect_ctr += 1
return True
def resembles_url(text: str) -> bool:
"""Does the given text look like a url?
"""
if '://' in text and \
'.' in text and \
' ' not in text and \
'<' not in text:
return True
return False

View File

@ -18,6 +18,7 @@ from utils import get_content_from_post
from utils import dangerous_markup
from utils import license_link_from_name
from utils import get_media_url_from_video
from utils import resembles_url
from blocking import is_blocked
from filters import is_filtered
@ -168,9 +169,7 @@ def convert_video_to_note(base_dir: str, nickname: str, domain: str,
system_language):
new_post['object']['support'] = support_str
# if this is a link
if ' ' not in support_str and \
'://' in support_str and \
'.' in support_str:
if resembles_url(support_str):
# add a buy link
new_post['object']['attachment'].append({
'type': 'Link',

View File

@ -75,6 +75,7 @@ from utils import language_right_to_left
from utils import get_attributed_to
from utils import get_reply_to
from utils import get_actor_from_post
from utils import resembles_url
from content import format_mixed_right_to_left
from content import replace_remote_hashtags
from content import detect_dogwhistles
@ -2364,7 +2365,7 @@ def individual_post_as_html(signing_priv_key_pem: str,
post_proxied = ap_proxy_type(post_json_object['object'])
if post_proxied:
post_proxied = remove_html(post_proxied)
if '://' in post_proxied:
if resembles_url(post_proxied):
proxy_str = 'Proxy'
if translate.get(proxy_str):
proxy_str = translate[proxy_str]
@ -2905,7 +2906,7 @@ def individual_post_as_html(signing_priv_key_pem: str,
# show embedded map if the location contains a map url
location_str = get_location_from_post(post_json_object)
if location_str:
if '://' in location_str and '.' in location_str:
if resembles_url(location_str):
bounding_box_degrees = 0.001
map_str = \
html_open_street_map(location_str,

View File

@ -40,6 +40,7 @@ from utils import get_account_timezone
from utils import remove_eol
from utils import is_valid_date
from utils import get_actor_from_post
from utils import resembles_url
from languages import get_actor_languages
from skills import get_skills
from theme import get_themes_list
@ -633,7 +634,7 @@ def _get_profile_header(base_dir: str, http_prefix: str, nickname: str,
actor_proxied = ''
else:
actor_proxied = remove_html(actor_proxied)
if '://' in actor_proxied:
if resembles_url(actor_proxied):
proxy_str = 'Proxy'
if translate.get(proxy_str):
proxy_str = translate[proxy_str]
@ -792,7 +793,7 @@ def _get_profile_header_after_search(base_dir: str,
actor_proxied = ''
else:
actor_proxied = remove_html(actor_proxied)
if '://' in actor_proxied:
if resembles_url(actor_proxied):
proxy_str = 'Proxy'
if translate.get(proxy_str):
proxy_str = translate[proxy_str]

View File

@ -37,6 +37,7 @@ from utils import local_actor_url
from utils import text_in_file
from utils import remove_eol
from utils import binary_is_image
from utils import resembles_url
from filters import is_filtered
from cache import get_actor_public_key_from_id
from cache import store_person_in_cache
@ -1442,7 +1443,7 @@ def get_post_attachments_as_html(base_dir: str,
license_str = ''
if media_license and media_creator:
media_license = remove_html(media_license)
if '://' in media_license:
if resembles_url(media_license):
license_str += \
'<a href="' + media_license + \
'" target="_blank" ' + \
@ -1531,7 +1532,7 @@ def get_post_attachments_as_html(base_dir: str,
license_str = ''
attachment_str += '<figcaption>'
media_license = remove_html(media_license)
if '://' in media_license:
if resembles_url(media_license):
license_str += \
'<a href="' + media_license + \
'" target="_blank" ' + \