More thorough validation of hashtags and nicknames

merge-requests/30/head
Bob Mottram 2021-02-09 14:41:32 +00:00
parent 7de4e24f2e
commit 890c8856a8
3 changed files with 75 additions and 9 deletions

View File

@ -10,6 +10,7 @@ import os
import email.parser import email.parser
import urllib.parse import urllib.parse
from shutil import copyfile from shutil import copyfile
from utils import isValidLanguage
from utils import getImageExtensions from utils import getImageExtensions
from utils import loadJson from utils import loadJson
from utils import fileLastModified from utils import fileLastModified
@ -377,7 +378,6 @@ def validHashTag(hashtag: str) -> bool:
# long hashtags are not valid # long hashtags are not valid
if len(hashtag) >= 32: if len(hashtag) >= 32:
return False return False
# TODO: this may need to be an international character set
validChars = set('0123456789' + validChars = set('0123456789' +
'abcdefghijklmnopqrstuvwxyz' + 'abcdefghijklmnopqrstuvwxyz' +
'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +
@ -389,6 +389,8 @@ def validHashTag(hashtag: str) -> bool:
'ŴŵÝýŸÿŶŷŹźŽžŻż') 'ŴŵÝýŸÿŶŷŹźŽžŻż')
if set(hashtag).issubset(validChars): if set(hashtag).issubset(validChars):
return True return True
if isValidLanguage(hashtag):
return True
return False return False

View File

@ -3094,10 +3094,13 @@ def testValidHashTag():
assert validHashTag('ThisIsValid') assert validHashTag('ThisIsValid')
assert validHashTag('ThisIsValid12345') assert validHashTag('ThisIsValid12345')
assert validHashTag('ThisIsVälid') assert validHashTag('ThisIsVälid')
assert validHashTag('यहमान्यहै')
assert not validHashTag('ThisIsNotValid!') assert not validHashTag('ThisIsNotValid!')
assert not validHashTag('#ThisIsAlsoNotValid') assert not validHashTag('#ThisIsAlsoNotValid')
assert not validHashTag('#यहमान्यहै')
assert not validHashTag('ThisIsAlso&NotValid') assert not validHashTag('ThisIsAlso&NotValid')
assert not validHashTag('ThisIsAlsoNotValid"') assert not validHashTag('ThisIsAlsoNotValid"')
assert not validHashTag('This Is Also Not Valid"')
assert not validHashTag('This=IsAlsoNotValid"') assert not validHashTag('This=IsAlsoNotValid"')

View File

@ -1163,14 +1163,58 @@ def deletePost(baseDir: str, httpPrefix: str,
os.remove(postFilename) os.remove(postFilename)
def validNickname(domain: str, nickname: str) -> bool: def isValidLanguage(text: str) -> bool:
forbiddenChars = ('.', ' ', '/', '?', ':', ';', '@', '#') """Returns true if the given text contains a valid
for c in forbiddenChars: natural language string
if c in nickname: """
return False naturalLanguages = {
# this should only apply for the shared inbox "Latin": [65, 866],
if nickname == domain: "Cyrillic": [1024, 1274],
return False "Greek": [880, 1280],
"isArmenian": [1328, 1424],
"isHebrew": [1424, 1536],
"Arabic": [1536, 1792],
"Syriac": [1792, 1872],
"Thaan": [1920, 1984],
"Devanagari": [2304, 2432],
"Bengali": [2432, 2560],
"Gurmukhi": [2560, 2688],
"Gujarati": [2688, 2816],
"Oriya": [2816, 2944],
"Tamil": [2944, 3072],
"Telugu": [3072, 3200],
"Kannada": [3200, 3328],
"Malayalam": [3328, 3456],
"Sinhala": [3456, 3584],
"Thai": [3584, 3712],
"Lao": [3712, 3840],
"Tibetan": [3840, 4096],
"Myanmar": [4096, 4256],
"Georgian": [4256, 4352],
"HangulJamo": [4352, 4608],
"Cherokee": [5024, 5120],
"UCAS": [5120, 5760],
"Ogham": [5760, 5792],
"Runic": [5792, 5888],
"Khmer": [6016, 6144],
"Mongolian": [6144, 6320]
}
for langName, langRange in naturalLanguages.items():
okLang = True
for ch in text:
if ch.isdigit():
continue
if ord(ch) not in range(langRange[0], langRange[1]):
okLang = False
break
if okLang:
return True
return False
def _isReservedName(nickname: str) -> bool:
"""Is the given nickname reserved for some special function?
"""
reservedNames = ('inbox', 'dm', 'outbox', 'following', reservedNames = ('inbox', 'dm', 'outbox', 'following',
'public', 'followers', 'category', 'public', 'followers', 'category',
'channel', 'calendar', 'channel', 'calendar',
@ -1184,6 +1228,23 @@ def validNickname(domain: str, nickname: str) -> bool:
'updates', 'repeat', 'announce', 'updates', 'repeat', 'announce',
'shares', 'fonts', 'icons', 'avatars') 'shares', 'fonts', 'icons', 'avatars')
if nickname in reservedNames: if nickname in reservedNames:
return True
return False
def validNickname(domain: str, nickname: str) -> bool:
"""Is the given nickname valid?
"""
if not isValidLanguage(nickname):
return False
forbiddenChars = ('.', ' ', '/', '?', ':', ';', '@', '#')
for c in forbiddenChars:
if c in nickname:
return False
# this should only apply for the shared inbox
if nickname == domain:
return False
if _isReservedName(nickname):
return False return False
return True return True