mirror of https://gitlab.com/bashrc2/epicyon
More thorough validation of hashtags and nicknames
parent
7de4e24f2e
commit
890c8856a8
|
@ -10,6 +10,7 @@ import os
|
||||||
import email.parser
|
import email.parser
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from shutil import copyfile
|
from shutil import copyfile
|
||||||
|
from utils import isValidLanguage
|
||||||
from utils import getImageExtensions
|
from utils import getImageExtensions
|
||||||
from utils import loadJson
|
from utils import loadJson
|
||||||
from utils import fileLastModified
|
from utils import fileLastModified
|
||||||
|
@ -377,7 +378,6 @@ def validHashTag(hashtag: str) -> bool:
|
||||||
# long hashtags are not valid
|
# long hashtags are not valid
|
||||||
if len(hashtag) >= 32:
|
if len(hashtag) >= 32:
|
||||||
return False
|
return False
|
||||||
# TODO: this may need to be an international character set
|
|
||||||
validChars = set('0123456789' +
|
validChars = set('0123456789' +
|
||||||
'abcdefghijklmnopqrstuvwxyz' +
|
'abcdefghijklmnopqrstuvwxyz' +
|
||||||
'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +
|
'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +
|
||||||
|
@ -389,6 +389,8 @@ def validHashTag(hashtag: str) -> bool:
|
||||||
'ŴŵÝýŸÿŶŷŹźŽžŻż')
|
'ŴŵÝýŸÿŶŷŹźŽžŻż')
|
||||||
if set(hashtag).issubset(validChars):
|
if set(hashtag).issubset(validChars):
|
||||||
return True
|
return True
|
||||||
|
if isValidLanguage(hashtag):
|
||||||
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
|
3
tests.py
3
tests.py
|
@ -3094,10 +3094,13 @@ def testValidHashTag():
|
||||||
assert validHashTag('ThisIsValid')
|
assert validHashTag('ThisIsValid')
|
||||||
assert validHashTag('ThisIsValid12345')
|
assert validHashTag('ThisIsValid12345')
|
||||||
assert validHashTag('ThisIsVälid')
|
assert validHashTag('ThisIsVälid')
|
||||||
|
assert validHashTag('यहमान्यहै')
|
||||||
assert not validHashTag('ThisIsNotValid!')
|
assert not validHashTag('ThisIsNotValid!')
|
||||||
assert not validHashTag('#ThisIsAlsoNotValid')
|
assert not validHashTag('#ThisIsAlsoNotValid')
|
||||||
|
assert not validHashTag('#यहमान्यहै')
|
||||||
assert not validHashTag('ThisIsAlso&NotValid')
|
assert not validHashTag('ThisIsAlso&NotValid')
|
||||||
assert not validHashTag('ThisIsAlsoNotValid"')
|
assert not validHashTag('ThisIsAlsoNotValid"')
|
||||||
|
assert not validHashTag('This Is Also Not Valid"')
|
||||||
assert not validHashTag('This=IsAlsoNotValid"')
|
assert not validHashTag('This=IsAlsoNotValid"')
|
||||||
|
|
||||||
|
|
||||||
|
|
77
utils.py
77
utils.py
|
@ -1163,14 +1163,58 @@ def deletePost(baseDir: str, httpPrefix: str,
|
||||||
os.remove(postFilename)
|
os.remove(postFilename)
|
||||||
|
|
||||||
|
|
||||||
def validNickname(domain: str, nickname: str) -> bool:
|
def isValidLanguage(text: str) -> bool:
|
||||||
forbiddenChars = ('.', ' ', '/', '?', ':', ';', '@', '#')
|
"""Returns true if the given text contains a valid
|
||||||
for c in forbiddenChars:
|
natural language string
|
||||||
if c in nickname:
|
"""
|
||||||
return False
|
naturalLanguages = {
|
||||||
# this should only apply for the shared inbox
|
"Latin": [65, 866],
|
||||||
if nickname == domain:
|
"Cyrillic": [1024, 1274],
|
||||||
return False
|
"Greek": [880, 1280],
|
||||||
|
"isArmenian": [1328, 1424],
|
||||||
|
"isHebrew": [1424, 1536],
|
||||||
|
"Arabic": [1536, 1792],
|
||||||
|
"Syriac": [1792, 1872],
|
||||||
|
"Thaan": [1920, 1984],
|
||||||
|
"Devanagari": [2304, 2432],
|
||||||
|
"Bengali": [2432, 2560],
|
||||||
|
"Gurmukhi": [2560, 2688],
|
||||||
|
"Gujarati": [2688, 2816],
|
||||||
|
"Oriya": [2816, 2944],
|
||||||
|
"Tamil": [2944, 3072],
|
||||||
|
"Telugu": [3072, 3200],
|
||||||
|
"Kannada": [3200, 3328],
|
||||||
|
"Malayalam": [3328, 3456],
|
||||||
|
"Sinhala": [3456, 3584],
|
||||||
|
"Thai": [3584, 3712],
|
||||||
|
"Lao": [3712, 3840],
|
||||||
|
"Tibetan": [3840, 4096],
|
||||||
|
"Myanmar": [4096, 4256],
|
||||||
|
"Georgian": [4256, 4352],
|
||||||
|
"HangulJamo": [4352, 4608],
|
||||||
|
"Cherokee": [5024, 5120],
|
||||||
|
"UCAS": [5120, 5760],
|
||||||
|
"Ogham": [5760, 5792],
|
||||||
|
"Runic": [5792, 5888],
|
||||||
|
"Khmer": [6016, 6144],
|
||||||
|
"Mongolian": [6144, 6320]
|
||||||
|
}
|
||||||
|
for langName, langRange in naturalLanguages.items():
|
||||||
|
okLang = True
|
||||||
|
for ch in text:
|
||||||
|
if ch.isdigit():
|
||||||
|
continue
|
||||||
|
if ord(ch) not in range(langRange[0], langRange[1]):
|
||||||
|
okLang = False
|
||||||
|
break
|
||||||
|
if okLang:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _isReservedName(nickname: str) -> bool:
|
||||||
|
"""Is the given nickname reserved for some special function?
|
||||||
|
"""
|
||||||
reservedNames = ('inbox', 'dm', 'outbox', 'following',
|
reservedNames = ('inbox', 'dm', 'outbox', 'following',
|
||||||
'public', 'followers', 'category',
|
'public', 'followers', 'category',
|
||||||
'channel', 'calendar',
|
'channel', 'calendar',
|
||||||
|
@ -1184,6 +1228,23 @@ def validNickname(domain: str, nickname: str) -> bool:
|
||||||
'updates', 'repeat', 'announce',
|
'updates', 'repeat', 'announce',
|
||||||
'shares', 'fonts', 'icons', 'avatars')
|
'shares', 'fonts', 'icons', 'avatars')
|
||||||
if nickname in reservedNames:
|
if nickname in reservedNames:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def validNickname(domain: str, nickname: str) -> bool:
|
||||||
|
"""Is the given nickname valid?
|
||||||
|
"""
|
||||||
|
if not isValidLanguage(nickname):
|
||||||
|
return False
|
||||||
|
forbiddenChars = ('.', ' ', '/', '?', ':', ';', '@', '#')
|
||||||
|
for c in forbiddenChars:
|
||||||
|
if c in nickname:
|
||||||
|
return False
|
||||||
|
# this should only apply for the shared inbox
|
||||||
|
if nickname == domain:
|
||||||
|
return False
|
||||||
|
if _isReservedName(nickname):
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue