Abandon arabic language detection

merge-requests/30/head
Bob Mottram 2022-12-17 16:28:12 +00:00
parent e6437b50cc
commit 4106000065
4 changed files with 0 additions and 38 deletions

View File

@ -26,7 +26,6 @@ from utils import get_status_number
from utils import get_full_domain
from utils import text_in_file
from utils import remove_eol
from utils import is_arabic
from filters import is_filtered
from context import get_individual_post_context
from session import get_method
@ -289,9 +288,6 @@ def get_todays_events(base_dir: str, nickname: str, domain: str,
if content:
if not _event_text_match(content, text_match):
continue
if content_language != 'ar':
if is_arabic(content):
content_language = 'ar'
public_event = is_public_post(post_json_object)

View File

@ -54,7 +54,6 @@ from follow import clear_followers
from follow import send_follow_request_via_server
from follow import send_unfollow_request_via_server
from siteactive import site_is_active
from utils import is_arabic
from utils import remove_inverted_text
from utils import remove_square_capitals
from utils import standardize_text
@ -7639,16 +7638,6 @@ def _test_reply_language(base_dir: str) -> None:
assert not get_reply_language(base_dir, post_json_object)
def _test_is_arabic() -> None:
print('is_arabic')
test = "Some English. هذا نص عربي"
assert is_arabic(test)
test = "Some English"
assert not is_arabic(test)
test = ""
assert not is_arabic(test)
def run_all_tests():
base_dir = os.getcwd()
print('Running tests...')
@ -7666,7 +7655,6 @@ def run_all_tests():
_test_checkbox_names()
_test_thread_functions()
_test_functions()
_test_is_arabic()
_test_reply_language(base_dir)
_test_emoji_in_actor_name(base_dir)
_test_uninvert()

View File

@ -219,23 +219,6 @@ def get_content_from_post(post_json_object: {}, system_language: str,
return standardize_text(content)
def is_arabic(content: str) -> bool:
"""Returns true if the given text contains arabic
"""
if not content:
return False
result = re.sub(r'[^0-9\u0600-\u06ff\u0750-\u077f' +
'\ufb50-\ufbc1\ufbd3-\ufd3f\ufd50-' +
'\ufd8f\ufd50-\ufd8f\ufe70-\ufefc\uFDF0-\uFDFD]+',
' ', content)
if result:
result = result.strip()
# more than a third of the content
if len(result) > len(content) / 3:
return True
return False
def get_language_from_post(post_json_object: {}, system_language: str,
languages_understood: [],
content_type: str = "content") -> str:

View File

@ -10,7 +10,6 @@ __module_group__ = "Calendar"
import os
from datetime import datetime
from datetime import date
from utils import is_arabic
from utils import get_display_name
from utils import get_config_param
from utils import get_nickname_from_actor
@ -231,7 +230,6 @@ def _html_calendar_day(person_cache: {}, translate: {},
event_place = event_map
# prepend a link to the sender of the calendar item
orig_event_description = event_description
if sender_name and event_description:
# if the sender is also mentioned within the event
# description then this is a reminder
@ -259,9 +257,6 @@ def _html_calendar_day(person_cache: {}, translate: {},
translate['Delete this event'] + '" src="/' + \
'icons/delete.png" /></a></td>\n'
if event_language != 'ar' and orig_event_description:
if is_arabic(orig_event_description):
event_language = 'ar'
is_rtl = language_right_to_left(event_language)
event_class = 'calendar__day__event'