mirror of https://gitlab.com/bashrc2/epicyon
Handling of mixed direction post content
parent
d76f69d162
commit
b77389400d
28
content.py
28
content.py
|
@ -15,6 +15,8 @@ import email.parser
|
|||
import urllib.parse
|
||||
from shutil import copyfile
|
||||
from dateutil.parser import parse
|
||||
from utils import is_right_to_left_text
|
||||
from utils import language_right_to_left
|
||||
from utils import binary_is_image
|
||||
from utils import get_content_from_post
|
||||
from utils import get_full_domain
|
||||
|
@ -2195,3 +2197,29 @@ def add_name_emojis_to_tags(base_dir: str, http_prefix: str,
|
|||
if updated:
|
||||
new_tag['updated'] = updated
|
||||
actor_json['tag'].append(new_tag)
|
||||
|
||||
|
||||
def format_mixed_right_to_left(content: str,
|
||||
language: str) -> str:
|
||||
"""Adds RTL direction formatting for non-RTL language
|
||||
eg. where some paragraphs are English and others are Arabic
|
||||
"""
|
||||
# not a RTL language
|
||||
if language_right_to_left(language):
|
||||
return content
|
||||
paragraphs = content.split('<p>')
|
||||
result = ''
|
||||
changed = False
|
||||
for text_html in paragraphs:
|
||||
if '</p>' not in text_html:
|
||||
continue
|
||||
text_html = '<p>' + text_html
|
||||
text_plain = remove_html(text_html)
|
||||
if is_right_to_left_text(text_plain):
|
||||
text_html = text_html.replace('<p>', '<p><div dir="rtl">', 1)
|
||||
text_html = text_html.replace('</p>', '</div></p>', 1)
|
||||
changed = True
|
||||
result += text_html
|
||||
if not changed:
|
||||
return content
|
||||
return result
|
||||
|
|
|
@ -381,7 +381,7 @@ def get_reply_language(base_dir: str,
|
|||
post_obj = post_json_object['object']
|
||||
if not post_obj.get('contentMap'):
|
||||
return None
|
||||
for lang, content in post_obj['contentMap'].items():
|
||||
for lang, _ in post_obj['contentMap'].items():
|
||||
lang_filename = base_dir + '/translations/' + lang + '.json'
|
||||
if not os.path.isfile(lang_filename):
|
||||
continue
|
||||
|
|
61
tests.py
61
tests.py
|
@ -56,6 +56,7 @@ from follow import clear_followers
|
|||
from follow import send_follow_request_via_server
|
||||
from follow import send_unfollow_request_via_server
|
||||
from siteactive import site_is_active
|
||||
from utils import is_right_to_left_text
|
||||
from utils import remove_markup_tag
|
||||
from utils import remove_style_within_html
|
||||
from utils import html_tag_has_closing
|
||||
|
@ -142,6 +143,7 @@ from inbox import valid_inbox
|
|||
from inbox import valid_inbox_filenames
|
||||
from inbox import cache_svg_images
|
||||
from categories import guess_hashtag_category
|
||||
from content import format_mixed_right_to_left
|
||||
from content import replace_remote_hashtags
|
||||
from content import add_name_emojis_to_tags
|
||||
from content import combine_textarea_lines
|
||||
|
@ -8090,6 +8092,63 @@ def _test_remove_tag() -> None:
|
|||
assert result == 'This is a test<br>something<br>again'
|
||||
|
||||
|
||||
def _test_is_right_to_left() -> None:
|
||||
print('is_right_to_left')
|
||||
text = 'This is a test'
|
||||
assert not is_right_to_left_text(text)
|
||||
|
||||
# arabic
|
||||
text = 'هذا اختبار'
|
||||
assert is_right_to_left_text(text)
|
||||
|
||||
text = 'Das ist ein Test'
|
||||
assert not is_right_to_left_text(text)
|
||||
|
||||
# persian
|
||||
text = 'این یک امتحان است'
|
||||
assert is_right_to_left_text(text)
|
||||
|
||||
# chinese
|
||||
text = '这是一个测试'
|
||||
assert not is_right_to_left_text(text)
|
||||
|
||||
# hebrew
|
||||
text = 'זה מבחן'
|
||||
assert is_right_to_left_text(text)
|
||||
|
||||
# yiddish
|
||||
text = 'דאָס איז אַ פּראָבע'
|
||||
assert is_right_to_left_text(text)
|
||||
|
||||
|
||||
def _test_format_mixed_rtl() -> None:
|
||||
print('format_mixed_rtl')
|
||||
content = '<p>This is some English</p>' + \
|
||||
'<p>هذه عربية</p>' + \
|
||||
'<p>And more English</p>'
|
||||
result = format_mixed_right_to_left(content, 'en')
|
||||
expected = '<p>This is some English</p>' + \
|
||||
'<p><div dir="rtl">هذه عربية</div></p>' + \
|
||||
'<p>And more English</p>'
|
||||
assert result == expected
|
||||
|
||||
content = '<p>This is some only English</p>'
|
||||
result = format_mixed_right_to_left(content, 'en')
|
||||
assert result == content
|
||||
|
||||
content = 'This is some only English without markup'
|
||||
result = format_mixed_right_to_left(content, 'en')
|
||||
assert result == content
|
||||
|
||||
content = '<p>هذا عربي فقط</p>'
|
||||
result = format_mixed_right_to_left(content, 'en')
|
||||
expected = '<p><div dir="rtl">هذا عربي فقط</div></p>'
|
||||
assert result == expected
|
||||
|
||||
result = format_mixed_right_to_left(content, 'ar')
|
||||
assert result == content
|
||||
|
||||
|
||||
def run_all_tests():
|
||||
base_dir = os.getcwd()
|
||||
print('Running tests...')
|
||||
|
@ -8107,6 +8166,8 @@ def run_all_tests():
|
|||
_test_checkbox_names()
|
||||
_test_thread_functions()
|
||||
_test_functions()
|
||||
_test_is_right_to_left()
|
||||
_test_format_mixed_rtl()
|
||||
_test_remove_tag()
|
||||
_test_featured_tags()
|
||||
_test_xor_hashes()
|
||||
|
|
13
utils.py
13
utils.py
|
@ -4542,6 +4542,19 @@ def language_right_to_left(language: str) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def is_right_to_left_text(text: str) -> bool:
|
||||
"""Is the given text right to left?
|
||||
Persian \u0600-\u06FF
|
||||
Arabic \u0627-\u064a
|
||||
Hebrew/Yiddish \u0590-\u05FF\uFB2A-\uFB4E
|
||||
"""
|
||||
unicode_str = '[\u0627-\u064a]|[\u0600-\u06FF]|' + \
|
||||
'[\u0590-\u05FF\uFB2A-\uFB4E]'
|
||||
pattern = re.compile(unicode_str)
|
||||
|
||||
return len(re.findall(pattern, text)) > (len(text)/2)
|
||||
|
||||
|
||||
def binary_is_image(filename: str, media_binary) -> bool:
|
||||
"""Returns true if the given file binary data contains an image
|
||||
"""
|
||||
|
|
|
@ -71,6 +71,7 @@ from utils import acct_dir
|
|||
from utils import local_actor_url
|
||||
from utils import is_unlisted_post
|
||||
from utils import language_right_to_left
|
||||
from content import format_mixed_right_to_left
|
||||
from content import replace_remote_hashtags
|
||||
from content import detect_dogwhistles
|
||||
from content import create_edits_html
|
||||
|
@ -2720,6 +2721,8 @@ def individual_post_as_html(signing_priv_key_pem: str,
|
|||
switch_words(base_dir, nickname, domain, object_content)
|
||||
object_content = html_replace_email_quote(object_content)
|
||||
object_content = html_replace_quote_marks(object_content)
|
||||
object_content = \
|
||||
format_mixed_right_to_left(object_content, system_language)
|
||||
# append any edits
|
||||
object_content += edits_str
|
||||
else:
|
||||
|
|
Loading…
Reference in New Issue