From 890c8856a84c289b8670c2e93226a71e21c3a8db Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Tue, 9 Feb 2021 14:41:32 +0000 Subject: [PATCH] More thorough validation of hashtags and nicknames --- content.py | 4 ++- tests.py | 3 +++ utils.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 75 insertions(+), 9 deletions(-) diff --git a/content.py b/content.py index 84fffeac9..1d8334412 100644 --- a/content.py +++ b/content.py @@ -10,6 +10,7 @@ import os import email.parser import urllib.parse from shutil import copyfile +from utils import isValidLanguage from utils import getImageExtensions from utils import loadJson from utils import fileLastModified @@ -377,7 +378,6 @@ def validHashTag(hashtag: str) -> bool: # long hashtags are not valid if len(hashtag) >= 32: return False - # TODO: this may need to be an international character set validChars = set('0123456789' + 'abcdefghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + @@ -389,6 +389,8 @@ def validHashTag(hashtag: str) -> bool: 'ŴŵÝýŸÿŶŷŹźŽžŻż') if set(hashtag).issubset(validChars): return True + if isValidLanguage(hashtag): + return True return False diff --git a/tests.py b/tests.py index f5549bfbd..284b13678 100644 --- a/tests.py +++ b/tests.py @@ -3094,10 +3094,13 @@ def testValidHashTag(): assert validHashTag('ThisIsValid') assert validHashTag('ThisIsValid12345') assert validHashTag('ThisIsVälid') + assert validHashTag('यहमान्यहै') assert not validHashTag('ThisIsNotValid!') assert not validHashTag('#ThisIsAlsoNotValid') + assert not validHashTag('#यहमान्यहै') assert not validHashTag('ThisIsAlso&NotValid') assert not validHashTag('ThisIsAlsoNotValid"') + assert not validHashTag('This Is Also Not Valid"') assert not validHashTag('This=IsAlsoNotValid"') diff --git a/utils.py b/utils.py index f024a2b98..328278e27 100644 --- a/utils.py +++ b/utils.py @@ -1163,14 +1163,58 @@ def deletePost(baseDir: str, httpPrefix: str, os.remove(postFilename) -def validNickname(domain: str, nickname: str) -> bool: - forbiddenChars = ('.', ' ', '/', '?', ':', ';', '@', '#') - for c in forbiddenChars: - if c in nickname: - return False - # this should only apply for the shared inbox - if nickname == domain: - return False +def isValidLanguage(text: str) -> bool: + """Returns true if the given text contains a valid + natural language string + """ + naturalLanguages = { + "Latin": [65, 866], + "Cyrillic": [1024, 1274], + "Greek": [880, 1280], + "isArmenian": [1328, 1424], + "isHebrew": [1424, 1536], + "Arabic": [1536, 1792], + "Syriac": [1792, 1872], + "Thaan": [1920, 1984], + "Devanagari": [2304, 2432], + "Bengali": [2432, 2560], + "Gurmukhi": [2560, 2688], + "Gujarati": [2688, 2816], + "Oriya": [2816, 2944], + "Tamil": [2944, 3072], + "Telugu": [3072, 3200], + "Kannada": [3200, 3328], + "Malayalam": [3328, 3456], + "Sinhala": [3456, 3584], + "Thai": [3584, 3712], + "Lao": [3712, 3840], + "Tibetan": [3840, 4096], + "Myanmar": [4096, 4256], + "Georgian": [4256, 4352], + "HangulJamo": [4352, 4608], + "Cherokee": [5024, 5120], + "UCAS": [5120, 5760], + "Ogham": [5760, 5792], + "Runic": [5792, 5888], + "Khmer": [6016, 6144], + "Mongolian": [6144, 6320] + } + for langName, langRange in naturalLanguages.items(): + okLang = True + for ch in text: + if ch.isdigit(): + continue + if ord(ch) not in range(langRange[0], langRange[1]): + okLang = False + break + if okLang: + return True + return False + + +def _isReservedName(nickname: str) -> bool: + """Is the given nickname reserved for some special function? + """ reservedNames = ('inbox', 'dm', 'outbox', 'following', 'public', 'followers', 'category', 'channel', 'calendar', @@ -1184,6 +1228,23 @@ def validNickname(domain: str, nickname: str) -> bool: 'updates', 'repeat', 'announce', 'shares', 'fonts', 'icons', 'avatars') if nickname in reservedNames: + return True + return False + + +def validNickname(domain: str, nickname: str) -> bool: + """Is the given nickname valid? + """ + if not isValidLanguage(nickname): + return False + forbiddenChars = ('.', ' ', '/', '?', ':', ';', '@', '#') + for c in forbiddenChars: + if c in nickname: + return False + # this should only apply for the shared inbox + if nickname == domain: + return False + if _isReservedName(nickname): return False return True