diff --git a/content.py b/content.py
index 5d69dfff4..52eb33391 100644
--- a/content.py
+++ b/content.py
@@ -15,6 +15,8 @@ import email.parser
import urllib.parse
from shutil import copyfile
from dateutil.parser import parse
+from utils import is_right_to_left_text
+from utils import language_right_to_left
from utils import binary_is_image
from utils import get_content_from_post
from utils import get_full_domain
@@ -2195,3 +2197,29 @@ def add_name_emojis_to_tags(base_dir: str, http_prefix: str,
if updated:
new_tag['updated'] = updated
actor_json['tag'].append(new_tag)
+
+
+def format_mixed_right_to_left(content: str,
+ language: str) -> str:
+ """Adds RTL direction formatting for non-RTL language
+ eg. where some paragraphs are English and others are Arabic
+ """
+ # not a RTL language
+ if language_right_to_left(language):
+ return content
+ paragraphs = content.split('
')
+ result = ''
+ changed = False
+ for text_html in paragraphs:
+ if '
' not in text_html:
+ continue
+ text_html = '' + text_html
+ text_plain = remove_html(text_html)
+ if is_right_to_left_text(text_plain):
+ text_html = text_html.replace('
', '
', 1)
+ text_html = text_html.replace('', '
', 1)
+ changed = True
+ result += text_html
+ if not changed:
+ return content
+ return result
diff --git a/languages.py b/languages.py
index 668cddba2..12ce8cc72 100644
--- a/languages.py
+++ b/languages.py
@@ -381,7 +381,7 @@ def get_reply_language(base_dir: str,
post_obj = post_json_object['object']
if not post_obj.get('contentMap'):
return None
- for lang, content in post_obj['contentMap'].items():
+ for lang, _ in post_obj['contentMap'].items():
lang_filename = base_dir + '/translations/' + lang + '.json'
if not os.path.isfile(lang_filename):
continue
diff --git a/tests.py b/tests.py
index 8a74c3814..df9fca23a 100644
--- a/tests.py
+++ b/tests.py
@@ -56,6 +56,7 @@ from follow import clear_followers
from follow import send_follow_request_via_server
from follow import send_unfollow_request_via_server
from siteactive import site_is_active
+from utils import is_right_to_left_text
from utils import remove_markup_tag
from utils import remove_style_within_html
from utils import html_tag_has_closing
@@ -142,6 +143,7 @@ from inbox import valid_inbox
from inbox import valid_inbox_filenames
from inbox import cache_svg_images
from categories import guess_hashtag_category
+from content import format_mixed_right_to_left
from content import replace_remote_hashtags
from content import add_name_emojis_to_tags
from content import combine_textarea_lines
@@ -8090,6 +8092,63 @@ def _test_remove_tag() -> None:
assert result == 'This is a test
something
again'
+def _test_is_right_to_left() -> None:
+ print('is_right_to_left')
+ text = 'This is a test'
+ assert not is_right_to_left_text(text)
+
+ # arabic
+ text = 'هذا اختبار'
+ assert is_right_to_left_text(text)
+
+ text = 'Das ist ein Test'
+ assert not is_right_to_left_text(text)
+
+ # persian
+ text = 'این یک امتحان است'
+ assert is_right_to_left_text(text)
+
+ # chinese
+ text = '这是一个测试'
+ assert not is_right_to_left_text(text)
+
+ # hebrew
+ text = 'זה מבחן'
+ assert is_right_to_left_text(text)
+
+ # yiddish
+ text = 'דאָס איז אַ פּראָבע'
+ assert is_right_to_left_text(text)
+
+
+def _test_format_mixed_rtl() -> None:
+ print('format_mixed_rtl')
+ content = 'This is some English
' + \
+ 'هذه عربية
' + \
+ 'And more English
'
+ result = format_mixed_right_to_left(content, 'en')
+ expected = 'This is some English
' + \
+ 'هذه عربية
' + \
+ 'And more English
'
+ assert result == expected
+
+ content = 'This is some only English
'
+ result = format_mixed_right_to_left(content, 'en')
+ assert result == content
+
+ content = 'This is some only English without markup'
+ result = format_mixed_right_to_left(content, 'en')
+ assert result == content
+
+ content = 'هذا عربي فقط
'
+ result = format_mixed_right_to_left(content, 'en')
+ expected = 'هذا عربي فقط
'
+ assert result == expected
+
+ result = format_mixed_right_to_left(content, 'ar')
+ assert result == content
+
+
def run_all_tests():
base_dir = os.getcwd()
print('Running tests...')
@@ -8107,6 +8166,8 @@ def run_all_tests():
_test_checkbox_names()
_test_thread_functions()
_test_functions()
+ _test_is_right_to_left()
+ _test_format_mixed_rtl()
_test_remove_tag()
_test_featured_tags()
_test_xor_hashes()
diff --git a/utils.py b/utils.py
index ce083f64f..60c6b0557 100644
--- a/utils.py
+++ b/utils.py
@@ -4542,6 +4542,19 @@ def language_right_to_left(language: str) -> bool:
return False
+def is_right_to_left_text(text: str) -> bool:
+ """Is the given text right to left?
+ Persian \u0600-\u06FF
+ Arabic \u0627-\u064a
+ Hebrew/Yiddish \u0590-\u05FF\uFB2A-\uFB4E
+ """
+ unicode_str = '[\u0627-\u064a]|[\u0600-\u06FF]|' + \
+ '[\u0590-\u05FF\uFB2A-\uFB4E]'
+ pattern = re.compile(unicode_str)
+
+ return len(re.findall(pattern, text)) > (len(text)/2)
+
+
def binary_is_image(filename: str, media_binary) -> bool:
"""Returns true if the given file binary data contains an image
"""
diff --git a/webapp_post.py b/webapp_post.py
index e06d14950..87ed82543 100644
--- a/webapp_post.py
+++ b/webapp_post.py
@@ -71,6 +71,7 @@ from utils import acct_dir
from utils import local_actor_url
from utils import is_unlisted_post
from utils import language_right_to_left
+from content import format_mixed_right_to_left
from content import replace_remote_hashtags
from content import detect_dogwhistles
from content import create_edits_html
@@ -2720,6 +2721,8 @@ def individual_post_as_html(signing_priv_key_pem: str,
switch_words(base_dir, nickname, domain, object_content)
object_content = html_replace_email_quote(object_content)
object_content = html_replace_quote_marks(object_content)
+ object_content = \
+ format_mixed_right_to_left(object_content, system_language)
# append any edits
object_content += edits_str
else: