More thorough validation of hashtags and nicknames

main
Bob Mottram 2021-02-09 14:41:32 +00:00
parent 7de4e24f2e
commit 890c8856a8
3 changed files with 75 additions and 9 deletions

View File

@ -10,6 +10,7 @@ import os
import email.parser
import urllib.parse
from shutil import copyfile
from utils import isValidLanguage
from utils import getImageExtensions
from utils import loadJson
from utils import fileLastModified
@ -377,7 +378,6 @@ def validHashTag(hashtag: str) -> bool:
# long hashtags are not valid
if len(hashtag) >= 32:
return False
# TODO: this may need to be an international character set
validChars = set('0123456789' +
'abcdefghijklmnopqrstuvwxyz' +
'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +
@ -389,6 +389,8 @@ def validHashTag(hashtag: str) -> bool:
'ŴŵÝýŸÿŶŷŹźŽžŻż')
if set(hashtag).issubset(validChars):
return True
if isValidLanguage(hashtag):
return True
return False

View File

@ -3094,10 +3094,13 @@ def testValidHashTag():
assert validHashTag('ThisIsValid')
assert validHashTag('ThisIsValid12345')
assert validHashTag('ThisIsVälid')
assert validHashTag('यहमान्यहै')
assert not validHashTag('ThisIsNotValid!')
assert not validHashTag('#ThisIsAlsoNotValid')
assert not validHashTag('#यहमान्यहै')
assert not validHashTag('ThisIsAlso&NotValid')
assert not validHashTag('ThisIsAlsoNotValid"')
assert not validHashTag('This Is Also Not Valid"')
assert not validHashTag('This=IsAlsoNotValid"')

View File

@ -1163,14 +1163,58 @@ def deletePost(baseDir: str, httpPrefix: str,
os.remove(postFilename)
def validNickname(domain: str, nickname: str) -> bool:
forbiddenChars = ('.', ' ', '/', '?', ':', ';', '@', '#')
for c in forbiddenChars:
if c in nickname:
return False
# this should only apply for the shared inbox
if nickname == domain:
return False
def isValidLanguage(text: str) -> bool:
"""Returns true if the given text contains a valid
natural language string
"""
naturalLanguages = {
"Latin": [65, 866],
"Cyrillic": [1024, 1274],
"Greek": [880, 1280],
"isArmenian": [1328, 1424],
"isHebrew": [1424, 1536],
"Arabic": [1536, 1792],
"Syriac": [1792, 1872],
"Thaan": [1920, 1984],
"Devanagari": [2304, 2432],
"Bengali": [2432, 2560],
"Gurmukhi": [2560, 2688],
"Gujarati": [2688, 2816],
"Oriya": [2816, 2944],
"Tamil": [2944, 3072],
"Telugu": [3072, 3200],
"Kannada": [3200, 3328],
"Malayalam": [3328, 3456],
"Sinhala": [3456, 3584],
"Thai": [3584, 3712],
"Lao": [3712, 3840],
"Tibetan": [3840, 4096],
"Myanmar": [4096, 4256],
"Georgian": [4256, 4352],
"HangulJamo": [4352, 4608],
"Cherokee": [5024, 5120],
"UCAS": [5120, 5760],
"Ogham": [5760, 5792],
"Runic": [5792, 5888],
"Khmer": [6016, 6144],
"Mongolian": [6144, 6320]
}
for langName, langRange in naturalLanguages.items():
okLang = True
for ch in text:
if ch.isdigit():
continue
if ord(ch) not in range(langRange[0], langRange[1]):
okLang = False
break
if okLang:
return True
return False
def _isReservedName(nickname: str) -> bool:
"""Is the given nickname reserved for some special function?
"""
reservedNames = ('inbox', 'dm', 'outbox', 'following',
'public', 'followers', 'category',
'channel', 'calendar',
@ -1184,6 +1228,23 @@ def validNickname(domain: str, nickname: str) -> bool:
'updates', 'repeat', 'announce',
'shares', 'fonts', 'icons', 'avatars')
if nickname in reservedNames:
return True
return False
def validNickname(domain: str, nickname: str) -> bool:
"""Is the given nickname valid?
"""
if not isValidLanguage(nickname):
return False
forbiddenChars = ('.', ' ', '/', '?', ':', ';', '@', '#')
for c in forbiddenChars:
if c in nickname:
return False
# this should only apply for the shared inbox
if nickname == domain:
return False
if _isReservedName(nickname):
return False
return True