mirror of https://gitlab.com/bashrc2/epicyon
Remove link tracking gloop from post urls
parent
cc71d42a3c
commit
2903cb521f
21
content.py
21
content.py
|
@ -663,6 +663,27 @@ def _contains_academic_references(content: str) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def remove_link_trackers_from_content(content: str) -> str:
|
||||
""" Removes any link trackers from urls within the content
|
||||
"""
|
||||
if '?utm_' not in content:
|
||||
return content
|
||||
sections = content.split('?utm_')
|
||||
ctr = 0
|
||||
new_content = ''
|
||||
for section_str in sections:
|
||||
if ctr == 0:
|
||||
new_content = section_str
|
||||
ctr = 1
|
||||
continue
|
||||
if '"' in section_str:
|
||||
new_content += '"' + section_str.split('"', 1)[1]
|
||||
else:
|
||||
new_content += section_str
|
||||
ctr += 1
|
||||
return new_content
|
||||
|
||||
|
||||
def remove_link_tracking(url: str) -> str:
|
||||
""" Removes any web link tracking, such as utm_medium, utm_campaign
|
||||
or utm_source
|
||||
|
|
19
tests.py
19
tests.py
|
@ -147,6 +147,7 @@ from inbox import valid_inbox
|
|||
from inbox import valid_inbox_filenames
|
||||
from inbox import cache_svg_images
|
||||
from categories import guess_hashtag_category
|
||||
from content import remove_link_trackers_from_content
|
||||
from content import remove_link_tracking
|
||||
from content import format_mixed_right_to_left
|
||||
from content import replace_remote_hashtags
|
||||
|
@ -8778,6 +8779,24 @@ def _test_link_tracking() -> None:
|
|||
'habits-and-then-sell-that-to-letter-agencies'
|
||||
assert remove_link_tracking(url) == expected
|
||||
|
||||
content = 'Some content'
|
||||
expected = content
|
||||
assert remove_link_trackers_from_content(content) == expected
|
||||
|
||||
content = \
|
||||
'Some <a href="dreadfulsite.com/abc?utm_medium=gloop">content</a>'
|
||||
expected = \
|
||||
'Some <a href="dreadfulsite.com/abc">content</a>'
|
||||
assert remove_link_trackers_from_content(content) == expected
|
||||
|
||||
content = \
|
||||
'Some <a href="dreadfulsite.com/abc?utm_medium=gloop">content</a> ' + \
|
||||
'<a href="surveillancecrap.com/def?utm_campaign=ohno">scurrilous</a>'
|
||||
expected = \
|
||||
'Some <a href="dreadfulsite.com/abc">content</a> ' + \
|
||||
'<a href="surveillancecrap.com/def">scurrilous</a>'
|
||||
assert remove_link_trackers_from_content(content) == expected
|
||||
|
||||
|
||||
def run_all_tests():
|
||||
base_dir = os.getcwd()
|
||||
|
|
|
@ -78,6 +78,7 @@ from utils import get_attributed_to
|
|||
from utils import get_reply_to
|
||||
from utils import get_actor_from_post
|
||||
from utils import resembles_url
|
||||
from content import remove_link_trackers_from_content
|
||||
from content import format_mixed_right_to_left
|
||||
from content import replace_remote_hashtags
|
||||
from content import detect_dogwhistles
|
||||
|
@ -2845,6 +2846,7 @@ def individual_post_as_html(signing_priv_key_pem: str,
|
|||
not post_is_blog:
|
||||
content_str = bold_reading_string(content_str)
|
||||
|
||||
object_content = remove_link_trackers_from_content(content_str)
|
||||
object_content = \
|
||||
remove_long_words(content_str, 40, [])
|
||||
object_content = \
|
||||
|
|
Loading…
Reference in New Issue