mirror of https://gitlab.com/bashrc2/epicyon
Remove link tracking gloop from post urls
parent
cc71d42a3c
commit
2903cb521f
21
content.py
21
content.py
|
@ -663,6 +663,27 @@ def _contains_academic_references(content: str) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def remove_link_trackers_from_content(content: str) -> str:
|
||||||
|
""" Removes any link trackers from urls within the content
|
||||||
|
"""
|
||||||
|
if '?utm_' not in content:
|
||||||
|
return content
|
||||||
|
sections = content.split('?utm_')
|
||||||
|
ctr = 0
|
||||||
|
new_content = ''
|
||||||
|
for section_str in sections:
|
||||||
|
if ctr == 0:
|
||||||
|
new_content = section_str
|
||||||
|
ctr = 1
|
||||||
|
continue
|
||||||
|
if '"' in section_str:
|
||||||
|
new_content += '"' + section_str.split('"', 1)[1]
|
||||||
|
else:
|
||||||
|
new_content += section_str
|
||||||
|
ctr += 1
|
||||||
|
return new_content
|
||||||
|
|
||||||
|
|
||||||
def remove_link_tracking(url: str) -> str:
|
def remove_link_tracking(url: str) -> str:
|
||||||
""" Removes any web link tracking, such as utm_medium, utm_campaign
|
""" Removes any web link tracking, such as utm_medium, utm_campaign
|
||||||
or utm_source
|
or utm_source
|
||||||
|
|
19
tests.py
19
tests.py
|
@ -147,6 +147,7 @@ from inbox import valid_inbox
|
||||||
from inbox import valid_inbox_filenames
|
from inbox import valid_inbox_filenames
|
||||||
from inbox import cache_svg_images
|
from inbox import cache_svg_images
|
||||||
from categories import guess_hashtag_category
|
from categories import guess_hashtag_category
|
||||||
|
from content import remove_link_trackers_from_content
|
||||||
from content import remove_link_tracking
|
from content import remove_link_tracking
|
||||||
from content import format_mixed_right_to_left
|
from content import format_mixed_right_to_left
|
||||||
from content import replace_remote_hashtags
|
from content import replace_remote_hashtags
|
||||||
|
@ -8778,6 +8779,24 @@ def _test_link_tracking() -> None:
|
||||||
'habits-and-then-sell-that-to-letter-agencies'
|
'habits-and-then-sell-that-to-letter-agencies'
|
||||||
assert remove_link_tracking(url) == expected
|
assert remove_link_tracking(url) == expected
|
||||||
|
|
||||||
|
content = 'Some content'
|
||||||
|
expected = content
|
||||||
|
assert remove_link_trackers_from_content(content) == expected
|
||||||
|
|
||||||
|
content = \
|
||||||
|
'Some <a href="dreadfulsite.com/abc?utm_medium=gloop">content</a>'
|
||||||
|
expected = \
|
||||||
|
'Some <a href="dreadfulsite.com/abc">content</a>'
|
||||||
|
assert remove_link_trackers_from_content(content) == expected
|
||||||
|
|
||||||
|
content = \
|
||||||
|
'Some <a href="dreadfulsite.com/abc?utm_medium=gloop">content</a> ' + \
|
||||||
|
'<a href="surveillancecrap.com/def?utm_campaign=ohno">scurrilous</a>'
|
||||||
|
expected = \
|
||||||
|
'Some <a href="dreadfulsite.com/abc">content</a> ' + \
|
||||||
|
'<a href="surveillancecrap.com/def">scurrilous</a>'
|
||||||
|
assert remove_link_trackers_from_content(content) == expected
|
||||||
|
|
||||||
|
|
||||||
def run_all_tests():
|
def run_all_tests():
|
||||||
base_dir = os.getcwd()
|
base_dir = os.getcwd()
|
||||||
|
|
|
@ -78,6 +78,7 @@ from utils import get_attributed_to
|
||||||
from utils import get_reply_to
|
from utils import get_reply_to
|
||||||
from utils import get_actor_from_post
|
from utils import get_actor_from_post
|
||||||
from utils import resembles_url
|
from utils import resembles_url
|
||||||
|
from content import remove_link_trackers_from_content
|
||||||
from content import format_mixed_right_to_left
|
from content import format_mixed_right_to_left
|
||||||
from content import replace_remote_hashtags
|
from content import replace_remote_hashtags
|
||||||
from content import detect_dogwhistles
|
from content import detect_dogwhistles
|
||||||
|
@ -2845,6 +2846,7 @@ def individual_post_as_html(signing_priv_key_pem: str,
|
||||||
not post_is_blog:
|
not post_is_blog:
|
||||||
content_str = bold_reading_string(content_str)
|
content_str = bold_reading_string(content_str)
|
||||||
|
|
||||||
|
object_content = remove_link_trackers_from_content(content_str)
|
||||||
object_content = \
|
object_content = \
|
||||||
remove_long_words(content_str, 40, [])
|
remove_long_words(content_str, 40, [])
|
||||||
object_content = \
|
object_content = \
|
||||||
|
|
Loading…
Reference in New Issue