Remove link tracking for outgoing posts

merge-requests/30/head
Bob Mottram 2024-04-24 11:38:45 +01:00
parent 85d92d0c22
commit c9982cd428
2 changed files with 41 additions and 8 deletions

View File

@ -663,6 +663,15 @@ def _contains_academic_references(content: str) -> bool:
return False
def remove_link_tracking(url: str) -> str:
""" Removes any web link tracking, such as utm_medium, utm_campaign
or utm_source
"""
if '?utm_' not in url:
return url
return url.split('?utm_')[0]
def add_web_links(content: str) -> str:
"""Adds markup for web links
"""
@ -697,7 +706,7 @@ def add_web_links(content: str) -> str:
continue
if _contains_doi_reference(wrd, replace_dict):
continue
# does the word begin with a prefix?
# does the word begin with a link prefix?
prefix_found = False
for prefix in prefixes:
if wrd.startswith(prefix):
@ -705,16 +714,18 @@ def add_web_links(content: str) -> str:
break
if not prefix_found:
continue
# the word contains a prefix
if wrd.endswith('.') or wrd.endswith(';'):
wrd = wrd[:-1]
markup = '<a href="' + wrd + '" tabindex="10" ' + \
# the word contains a link prefix
url = wrd
if url.endswith('.') or wrd.endswith(';'):
url = url[:-1]
url = remove_link_tracking(url)
markup = '<a href="' + url + '" tabindex="10" ' + \
'rel="nofollow noopener noreferrer" target="_blank">'
for prefix in prefixes:
if wrd.startswith(prefix):
if url.startswith(prefix):
markup += '<span class="invisible">' + prefix + '</span>'
break
link_text = wrd
link_text = url
for prefix in prefixes:
link_text = link_text.replace(prefix, '')
# prevent links from becoming too long
@ -725,7 +736,7 @@ def add_web_links(content: str) -> str:
link_text[MAX_LINK_LENGTH:] + '</span></a>'
else:
markup += '<span class="ellipsis">' + link_text + '</span></a>'
replace_dict[wrd] = markup
replace_dict[url] = markup
# do the replacements
for url, markup in replace_dict.items():

View File

@ -147,6 +147,7 @@ from inbox import valid_inbox
from inbox import valid_inbox_filenames
from inbox import cache_svg_images
from categories import guess_hashtag_category
from content import remove_link_tracking
from content import format_mixed_right_to_left
from content import replace_remote_hashtags
from content import add_name_emojis_to_tags
@ -8758,6 +8759,26 @@ def _test_remove_tags() -> None:
assert result == 'This is some content. Some other content'
def _test_link_tracking() -> None:
print('link tracking')
url = 'someweblink.net/some/path'
expected = url
assert remove_link_tracking(url) == expected
url = \
'https://somenauseating.com/we-want-to-track-your-web-browsing-' + \
'habits-and-then-sell-that-to-letter-agencies?utm_medium=email&' + \
'utm_campaign=Latest%20from%20SomeNauseating%20DotCom' + \
'%20for%20April%2024%202024%20-%503948479461&utm_content=' + \
'Latest%20from%20SomeNeuseating%20DotCom%20for%20April%2024%' + \
'202024%20-%34567123+CID_34678246&utm_source=campaign_monitor_uk' + \
'&utm_term=wibble'
expected = \
'https://somenauseating.com/we-want-to-track-your-web-browsing-' + \
'habits-and-then-sell-that-to-letter-agencies'
assert remove_link_tracking(url) == expected
def run_all_tests():
base_dir = os.getcwd()
print('Running tests...')
@ -8775,6 +8796,7 @@ def run_all_tests():
_test_checkbox_names()
_test_thread_functions()
_test_functions()
_test_link_tracking()
_test_remove_tags()
_test_check_individual_post_content()
_test_uninvert2()