mirror of https://gitlab.com/bashrc2/epicyon
Handling of mixed direction post content
parent
d76f69d162
commit
b77389400d
28
content.py
28
content.py
|
@ -15,6 +15,8 @@ import email.parser
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from shutil import copyfile
|
from shutil import copyfile
|
||||||
from dateutil.parser import parse
|
from dateutil.parser import parse
|
||||||
|
from utils import is_right_to_left_text
|
||||||
|
from utils import language_right_to_left
|
||||||
from utils import binary_is_image
|
from utils import binary_is_image
|
||||||
from utils import get_content_from_post
|
from utils import get_content_from_post
|
||||||
from utils import get_full_domain
|
from utils import get_full_domain
|
||||||
|
@ -2195,3 +2197,29 @@ def add_name_emojis_to_tags(base_dir: str, http_prefix: str,
|
||||||
if updated:
|
if updated:
|
||||||
new_tag['updated'] = updated
|
new_tag['updated'] = updated
|
||||||
actor_json['tag'].append(new_tag)
|
actor_json['tag'].append(new_tag)
|
||||||
|
|
||||||
|
|
||||||
|
def format_mixed_right_to_left(content: str,
|
||||||
|
language: str) -> str:
|
||||||
|
"""Adds RTL direction formatting for non-RTL language
|
||||||
|
eg. where some paragraphs are English and others are Arabic
|
||||||
|
"""
|
||||||
|
# not a RTL language
|
||||||
|
if language_right_to_left(language):
|
||||||
|
return content
|
||||||
|
paragraphs = content.split('<p>')
|
||||||
|
result = ''
|
||||||
|
changed = False
|
||||||
|
for text_html in paragraphs:
|
||||||
|
if '</p>' not in text_html:
|
||||||
|
continue
|
||||||
|
text_html = '<p>' + text_html
|
||||||
|
text_plain = remove_html(text_html)
|
||||||
|
if is_right_to_left_text(text_plain):
|
||||||
|
text_html = text_html.replace('<p>', '<p><div dir="rtl">', 1)
|
||||||
|
text_html = text_html.replace('</p>', '</div></p>', 1)
|
||||||
|
changed = True
|
||||||
|
result += text_html
|
||||||
|
if not changed:
|
||||||
|
return content
|
||||||
|
return result
|
||||||
|
|
|
@ -381,7 +381,7 @@ def get_reply_language(base_dir: str,
|
||||||
post_obj = post_json_object['object']
|
post_obj = post_json_object['object']
|
||||||
if not post_obj.get('contentMap'):
|
if not post_obj.get('contentMap'):
|
||||||
return None
|
return None
|
||||||
for lang, content in post_obj['contentMap'].items():
|
for lang, _ in post_obj['contentMap'].items():
|
||||||
lang_filename = base_dir + '/translations/' + lang + '.json'
|
lang_filename = base_dir + '/translations/' + lang + '.json'
|
||||||
if not os.path.isfile(lang_filename):
|
if not os.path.isfile(lang_filename):
|
||||||
continue
|
continue
|
||||||
|
|
61
tests.py
61
tests.py
|
@ -56,6 +56,7 @@ from follow import clear_followers
|
||||||
from follow import send_follow_request_via_server
|
from follow import send_follow_request_via_server
|
||||||
from follow import send_unfollow_request_via_server
|
from follow import send_unfollow_request_via_server
|
||||||
from siteactive import site_is_active
|
from siteactive import site_is_active
|
||||||
|
from utils import is_right_to_left_text
|
||||||
from utils import remove_markup_tag
|
from utils import remove_markup_tag
|
||||||
from utils import remove_style_within_html
|
from utils import remove_style_within_html
|
||||||
from utils import html_tag_has_closing
|
from utils import html_tag_has_closing
|
||||||
|
@ -142,6 +143,7 @@ from inbox import valid_inbox
|
||||||
from inbox import valid_inbox_filenames
|
from inbox import valid_inbox_filenames
|
||||||
from inbox import cache_svg_images
|
from inbox import cache_svg_images
|
||||||
from categories import guess_hashtag_category
|
from categories import guess_hashtag_category
|
||||||
|
from content import format_mixed_right_to_left
|
||||||
from content import replace_remote_hashtags
|
from content import replace_remote_hashtags
|
||||||
from content import add_name_emojis_to_tags
|
from content import add_name_emojis_to_tags
|
||||||
from content import combine_textarea_lines
|
from content import combine_textarea_lines
|
||||||
|
@ -8090,6 +8092,63 @@ def _test_remove_tag() -> None:
|
||||||
assert result == 'This is a test<br>something<br>again'
|
assert result == 'This is a test<br>something<br>again'
|
||||||
|
|
||||||
|
|
||||||
|
def _test_is_right_to_left() -> None:
|
||||||
|
print('is_right_to_left')
|
||||||
|
text = 'This is a test'
|
||||||
|
assert not is_right_to_left_text(text)
|
||||||
|
|
||||||
|
# arabic
|
||||||
|
text = 'هذا اختبار'
|
||||||
|
assert is_right_to_left_text(text)
|
||||||
|
|
||||||
|
text = 'Das ist ein Test'
|
||||||
|
assert not is_right_to_left_text(text)
|
||||||
|
|
||||||
|
# persian
|
||||||
|
text = 'این یک امتحان است'
|
||||||
|
assert is_right_to_left_text(text)
|
||||||
|
|
||||||
|
# chinese
|
||||||
|
text = '这是一个测试'
|
||||||
|
assert not is_right_to_left_text(text)
|
||||||
|
|
||||||
|
# hebrew
|
||||||
|
text = 'זה מבחן'
|
||||||
|
assert is_right_to_left_text(text)
|
||||||
|
|
||||||
|
# yiddish
|
||||||
|
text = 'דאָס איז אַ פּראָבע'
|
||||||
|
assert is_right_to_left_text(text)
|
||||||
|
|
||||||
|
|
||||||
|
def _test_format_mixed_rtl() -> None:
|
||||||
|
print('format_mixed_rtl')
|
||||||
|
content = '<p>This is some English</p>' + \
|
||||||
|
'<p>هذه عربية</p>' + \
|
||||||
|
'<p>And more English</p>'
|
||||||
|
result = format_mixed_right_to_left(content, 'en')
|
||||||
|
expected = '<p>This is some English</p>' + \
|
||||||
|
'<p><div dir="rtl">هذه عربية</div></p>' + \
|
||||||
|
'<p>And more English</p>'
|
||||||
|
assert result == expected
|
||||||
|
|
||||||
|
content = '<p>This is some only English</p>'
|
||||||
|
result = format_mixed_right_to_left(content, 'en')
|
||||||
|
assert result == content
|
||||||
|
|
||||||
|
content = 'This is some only English without markup'
|
||||||
|
result = format_mixed_right_to_left(content, 'en')
|
||||||
|
assert result == content
|
||||||
|
|
||||||
|
content = '<p>هذا عربي فقط</p>'
|
||||||
|
result = format_mixed_right_to_left(content, 'en')
|
||||||
|
expected = '<p><div dir="rtl">هذا عربي فقط</div></p>'
|
||||||
|
assert result == expected
|
||||||
|
|
||||||
|
result = format_mixed_right_to_left(content, 'ar')
|
||||||
|
assert result == content
|
||||||
|
|
||||||
|
|
||||||
def run_all_tests():
|
def run_all_tests():
|
||||||
base_dir = os.getcwd()
|
base_dir = os.getcwd()
|
||||||
print('Running tests...')
|
print('Running tests...')
|
||||||
|
@ -8107,6 +8166,8 @@ def run_all_tests():
|
||||||
_test_checkbox_names()
|
_test_checkbox_names()
|
||||||
_test_thread_functions()
|
_test_thread_functions()
|
||||||
_test_functions()
|
_test_functions()
|
||||||
|
_test_is_right_to_left()
|
||||||
|
_test_format_mixed_rtl()
|
||||||
_test_remove_tag()
|
_test_remove_tag()
|
||||||
_test_featured_tags()
|
_test_featured_tags()
|
||||||
_test_xor_hashes()
|
_test_xor_hashes()
|
||||||
|
|
13
utils.py
13
utils.py
|
@ -4542,6 +4542,19 @@ def language_right_to_left(language: str) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_right_to_left_text(text: str) -> bool:
|
||||||
|
"""Is the given text right to left?
|
||||||
|
Persian \u0600-\u06FF
|
||||||
|
Arabic \u0627-\u064a
|
||||||
|
Hebrew/Yiddish \u0590-\u05FF\uFB2A-\uFB4E
|
||||||
|
"""
|
||||||
|
unicode_str = '[\u0627-\u064a]|[\u0600-\u06FF]|' + \
|
||||||
|
'[\u0590-\u05FF\uFB2A-\uFB4E]'
|
||||||
|
pattern = re.compile(unicode_str)
|
||||||
|
|
||||||
|
return len(re.findall(pattern, text)) > (len(text)/2)
|
||||||
|
|
||||||
|
|
||||||
def binary_is_image(filename: str, media_binary) -> bool:
|
def binary_is_image(filename: str, media_binary) -> bool:
|
||||||
"""Returns true if the given file binary data contains an image
|
"""Returns true if the given file binary data contains an image
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -71,6 +71,7 @@ from utils import acct_dir
|
||||||
from utils import local_actor_url
|
from utils import local_actor_url
|
||||||
from utils import is_unlisted_post
|
from utils import is_unlisted_post
|
||||||
from utils import language_right_to_left
|
from utils import language_right_to_left
|
||||||
|
from content import format_mixed_right_to_left
|
||||||
from content import replace_remote_hashtags
|
from content import replace_remote_hashtags
|
||||||
from content import detect_dogwhistles
|
from content import detect_dogwhistles
|
||||||
from content import create_edits_html
|
from content import create_edits_html
|
||||||
|
@ -2720,6 +2721,8 @@ def individual_post_as_html(signing_priv_key_pem: str,
|
||||||
switch_words(base_dir, nickname, domain, object_content)
|
switch_words(base_dir, nickname, domain, object_content)
|
||||||
object_content = html_replace_email_quote(object_content)
|
object_content = html_replace_email_quote(object_content)
|
||||||
object_content = html_replace_quote_marks(object_content)
|
object_content = html_replace_quote_marks(object_content)
|
||||||
|
object_content = \
|
||||||
|
format_mixed_right_to_left(object_content, system_language)
|
||||||
# append any edits
|
# append any edits
|
||||||
object_content += edits_str
|
object_content += edits_str
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in New Issue