From c9982cd428f89b731dafc793e356e4f9a03fc9ca Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Wed, 24 Apr 2024 11:38:45 +0100 Subject: [PATCH] Remove link tracking for outgoing posts --- content.py | 27 +++++++++++++++++++-------- tests.py | 22 ++++++++++++++++++++++ 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/content.py b/content.py index 38912abfd..c5eb57288 100644 --- a/content.py +++ b/content.py @@ -663,6 +663,15 @@ def _contains_academic_references(content: str) -> bool: return False +def remove_link_tracking(url: str) -> str: + """ Removes any web link tracking, such as utm_medium, utm_campaign + or utm_source + """ + if '?utm_' not in url: + return url + return url.split('?utm_')[0] + + def add_web_links(content: str) -> str: """Adds markup for web links """ @@ -697,7 +706,7 @@ def add_web_links(content: str) -> str: continue if _contains_doi_reference(wrd, replace_dict): continue - # does the word begin with a prefix? + # does the word begin with a link prefix? prefix_found = False for prefix in prefixes: if wrd.startswith(prefix): @@ -705,16 +714,18 @@ def add_web_links(content: str) -> str: break if not prefix_found: continue - # the word contains a prefix - if wrd.endswith('.') or wrd.endswith(';'): - wrd = wrd[:-1] - markup = '' for prefix in prefixes: - if wrd.startswith(prefix): + if url.startswith(prefix): markup += '' break - link_text = wrd + link_text = url for prefix in prefixes: link_text = link_text.replace(prefix, '') # prevent links from becoming too long @@ -725,7 +736,7 @@ def add_web_links(content: str) -> str: link_text[MAX_LINK_LENGTH:] + '' else: markup += '' + link_text + '' - replace_dict[wrd] = markup + replace_dict[url] = markup # do the replacements for url, markup in replace_dict.items(): diff --git a/tests.py b/tests.py index 286fe0120..7d5fbc6e5 100644 --- a/tests.py +++ b/tests.py @@ -147,6 +147,7 @@ from inbox import valid_inbox from inbox import valid_inbox_filenames from inbox import cache_svg_images from categories import guess_hashtag_category +from content import remove_link_tracking from content import format_mixed_right_to_left from content import replace_remote_hashtags from content import add_name_emojis_to_tags @@ -8758,6 +8759,26 @@ def _test_remove_tags() -> None: assert result == 'This is some content. Some other content' +def _test_link_tracking() -> None: + print('link tracking') + url = 'someweblink.net/some/path' + expected = url + assert remove_link_tracking(url) == expected + + url = \ + 'https://somenauseating.com/we-want-to-track-your-web-browsing-' + \ + 'habits-and-then-sell-that-to-letter-agencies?utm_medium=email&' + \ + 'utm_campaign=Latest%20from%20SomeNauseating%20DotCom' + \ + '%20for%20April%2024%202024%20-%503948479461&utm_content=' + \ + 'Latest%20from%20SomeNeuseating%20DotCom%20for%20April%2024%' + \ + '202024%20-%34567123+CID_34678246&utm_source=campaign_monitor_uk' + \ + '&utm_term=wibble' + expected = \ + 'https://somenauseating.com/we-want-to-track-your-web-browsing-' + \ + 'habits-and-then-sell-that-to-letter-agencies' + assert remove_link_tracking(url) == expected + + def run_all_tests(): base_dir = os.getcwd() print('Running tests...') @@ -8775,6 +8796,7 @@ def run_all_tests(): _test_checkbox_names() _test_thread_functions() _test_functions() + _test_link_tracking() _test_remove_tags() _test_check_individual_post_content() _test_uninvert2()