mirror of https://gitlab.com/bashrc2/epicyon
				
				
				
			More thorough validation of hashtags and nicknames
							parent
							
								
									7de4e24f2e
								
							
						
					
					
						commit
						890c8856a8
					
				|  | @ -10,6 +10,7 @@ import os | ||||||
| import email.parser | import email.parser | ||||||
| import urllib.parse | import urllib.parse | ||||||
| from shutil import copyfile | from shutil import copyfile | ||||||
|  | from utils import isValidLanguage | ||||||
| from utils import getImageExtensions | from utils import getImageExtensions | ||||||
| from utils import loadJson | from utils import loadJson | ||||||
| from utils import fileLastModified | from utils import fileLastModified | ||||||
|  | @ -377,7 +378,6 @@ def validHashTag(hashtag: str) -> bool: | ||||||
|     # long hashtags are not valid |     # long hashtags are not valid | ||||||
|     if len(hashtag) >= 32: |     if len(hashtag) >= 32: | ||||||
|         return False |         return False | ||||||
|     # TODO: this may need to be an international character set |  | ||||||
|     validChars = set('0123456789' + |     validChars = set('0123456789' + | ||||||
|                      'abcdefghijklmnopqrstuvwxyz' + |                      'abcdefghijklmnopqrstuvwxyz' + | ||||||
|                      'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + |                      'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + | ||||||
|  | @ -389,6 +389,8 @@ def validHashTag(hashtag: str) -> bool: | ||||||
|                      'ŴŵÝýŸÿŶŷŹźŽžŻż') |                      'ŴŵÝýŸÿŶŷŹźŽžŻż') | ||||||
|     if set(hashtag).issubset(validChars): |     if set(hashtag).issubset(validChars): | ||||||
|         return True |         return True | ||||||
|  |     if isValidLanguage(hashtag): | ||||||
|  |         return True | ||||||
|     return False |     return False | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										3
									
								
								tests.py
								
								
								
								
							
							
						
						
									
										3
									
								
								tests.py
								
								
								
								
							|  | @ -3094,10 +3094,13 @@ def testValidHashTag(): | ||||||
|     assert validHashTag('ThisIsValid') |     assert validHashTag('ThisIsValid') | ||||||
|     assert validHashTag('ThisIsValid12345') |     assert validHashTag('ThisIsValid12345') | ||||||
|     assert validHashTag('ThisIsVälid') |     assert validHashTag('ThisIsVälid') | ||||||
|  |     assert validHashTag('यहमान्यहै') | ||||||
|     assert not validHashTag('ThisIsNotValid!') |     assert not validHashTag('ThisIsNotValid!') | ||||||
|     assert not validHashTag('#ThisIsAlsoNotValid') |     assert not validHashTag('#ThisIsAlsoNotValid') | ||||||
|  |     assert not validHashTag('#यहमान्यहै') | ||||||
|     assert not validHashTag('ThisIsAlso&NotValid') |     assert not validHashTag('ThisIsAlso&NotValid') | ||||||
|     assert not validHashTag('ThisIsAlsoNotValid"') |     assert not validHashTag('ThisIsAlsoNotValid"') | ||||||
|  |     assert not validHashTag('This Is Also Not Valid"') | ||||||
|     assert not validHashTag('This=IsAlsoNotValid"') |     assert not validHashTag('This=IsAlsoNotValid"') | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										77
									
								
								utils.py
								
								
								
								
							
							
						
						
									
										77
									
								
								utils.py
								
								
								
								
							|  | @ -1163,14 +1163,58 @@ def deletePost(baseDir: str, httpPrefix: str, | ||||||
|     os.remove(postFilename) |     os.remove(postFilename) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def validNickname(domain: str, nickname: str) -> bool: | def isValidLanguage(text: str) -> bool: | ||||||
|     forbiddenChars = ('.', ' ', '/', '?', ':', ';', '@', '#') |     """Returns true if the given text contains a valid | ||||||
|     for c in forbiddenChars: |     natural language string | ||||||
|         if c in nickname: |     """ | ||||||
|             return False |     naturalLanguages = { | ||||||
|     # this should only apply for the shared inbox |         "Latin": [65, 866], | ||||||
|     if nickname == domain: |         "Cyrillic": [1024, 1274], | ||||||
|         return False |         "Greek": [880, 1280], | ||||||
|  |         "isArmenian": [1328, 1424], | ||||||
|  |         "isHebrew": [1424, 1536], | ||||||
|  |         "Arabic": [1536, 1792], | ||||||
|  |         "Syriac": [1792, 1872], | ||||||
|  |         "Thaan": [1920, 1984], | ||||||
|  |         "Devanagari": [2304, 2432], | ||||||
|  |         "Bengali": [2432, 2560], | ||||||
|  |         "Gurmukhi": [2560, 2688], | ||||||
|  |         "Gujarati": [2688, 2816], | ||||||
|  |         "Oriya": [2816, 2944], | ||||||
|  |         "Tamil": [2944, 3072], | ||||||
|  |         "Telugu": [3072, 3200], | ||||||
|  |         "Kannada": [3200, 3328], | ||||||
|  |         "Malayalam": [3328, 3456], | ||||||
|  |         "Sinhala": [3456, 3584], | ||||||
|  |         "Thai": [3584, 3712], | ||||||
|  |         "Lao": [3712, 3840], | ||||||
|  |         "Tibetan": [3840, 4096], | ||||||
|  |         "Myanmar": [4096, 4256], | ||||||
|  |         "Georgian": [4256, 4352], | ||||||
|  |         "HangulJamo": [4352, 4608], | ||||||
|  |         "Cherokee": [5024, 5120], | ||||||
|  |         "UCAS": [5120, 5760], | ||||||
|  |         "Ogham": [5760, 5792], | ||||||
|  |         "Runic": [5792, 5888], | ||||||
|  |         "Khmer": [6016, 6144], | ||||||
|  |         "Mongolian": [6144, 6320] | ||||||
|  |     } | ||||||
|  |     for langName, langRange in naturalLanguages.items(): | ||||||
|  |         okLang = True | ||||||
|  |         for ch in text: | ||||||
|  |             if ch.isdigit(): | ||||||
|  |                 continue | ||||||
|  |             if ord(ch) not in range(langRange[0], langRange[1]): | ||||||
|  |                 okLang = False | ||||||
|  |                 break | ||||||
|  |         if okLang: | ||||||
|  |             return True | ||||||
|  |     return False | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _isReservedName(nickname: str) -> bool: | ||||||
|  |     """Is the given nickname reserved for some special function? | ||||||
|  |     """ | ||||||
|     reservedNames = ('inbox', 'dm', 'outbox', 'following', |     reservedNames = ('inbox', 'dm', 'outbox', 'following', | ||||||
|                      'public', 'followers', 'category', |                      'public', 'followers', 'category', | ||||||
|                      'channel', 'calendar', |                      'channel', 'calendar', | ||||||
|  | @ -1184,6 +1228,23 @@ def validNickname(domain: str, nickname: str) -> bool: | ||||||
|                      'updates', 'repeat', 'announce', |                      'updates', 'repeat', 'announce', | ||||||
|                      'shares', 'fonts', 'icons', 'avatars') |                      'shares', 'fonts', 'icons', 'avatars') | ||||||
|     if nickname in reservedNames: |     if nickname in reservedNames: | ||||||
|  |         return True | ||||||
|  |     return False | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def validNickname(domain: str, nickname: str) -> bool: | ||||||
|  |     """Is the given nickname valid? | ||||||
|  |     """ | ||||||
|  |     if not isValidLanguage(nickname): | ||||||
|  |         return False | ||||||
|  |     forbiddenChars = ('.', ' ', '/', '?', ':', ';', '@', '#') | ||||||
|  |     for c in forbiddenChars: | ||||||
|  |         if c in nickname: | ||||||
|  |             return False | ||||||
|  |     # this should only apply for the shared inbox | ||||||
|  |     if nickname == domain: | ||||||
|  |         return False | ||||||
|  |     if _isReservedName(nickname): | ||||||
|         return False |         return False | ||||||
|     return True |     return True | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue