Fix for cwlists on domains which begin with a dot

main
Bob Mottram 2025-09-24 12:35:05 +01:00
parent d03b04beda
commit 19f568dd06
3 changed files with 25 additions and 2 deletions

View File

@ -81,10 +81,12 @@ def _add_cw_match_domains(item: {}, content: str, cw_text: str,
the post content
"""
matched = False
for domain in item['domains']:
if '.' in domain:
first_section = domain.split('.')[0]
if len(first_section) < 4:
len_first_section = len(first_section)
if len_first_section in range(1, 4):
if '.' + domain in content or \
'/' + domain in content:
if cw_text:
@ -116,6 +118,7 @@ def add_cw_from_lists(post_json_object: {}, cw_lists: {}, translate: {},
if 'content' not in post_json_object['object']:
if 'contentMap' not in post_json_object['object']:
return
cw_text: str = ''
if post_json_object['object'].get('summary'):
cw_text = post_json_object['object']['summary']

View File

@ -3324,6 +3324,6 @@
"www.ipv6.rs",
"www.homediningkitchen.com",
"www.psichologyanswers.com",
"www.umatechnology.org",
"www.umatechnology.org"
]
}

View File

@ -7628,6 +7628,26 @@ def _test_add_cw_lists(base_dir: str) -> None:
assert post_json_object['object']['summary'] == \
"Murdoch Press / Existing CW"
content = \
'<p>Wah wah-wah wah waaaah.</p><p><a ' + \
'href="https://www.ft.com/content/abcdef" target="_blank" ' + \
'rel="nofollow noopener" translate="no"><span class="invisible">' + \
'https://www.</span><span class="ellipsis">ft.com/content/abcdef' + \
'</span><span class="invisible">ghi</span></a></p>'
post_json_object = {
"object": {
"sensitive": True,
"summary": "Existing CW",
"content": content
}
}
add_cw_from_lists(post_json_object, cw_lists, translate,
'Paywalled or registration only',
system_language, languages_understood)
assert post_json_object['object']['sensitive'] is True
assert post_json_object['object']['summary'] == \
"Paywalled or Registration Only / Existing CW"
def _test_valid_emoji_content() -> None:
print('test_valid_emoji_content')