| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  | __filename__ = "content.py" | 
					
						
							|  |  |  | __author__ = "Bob Mottram" | 
					
						
							|  |  |  | __license__ = "AGPL3+" | 
					
						
							| 
									
										
										
										
											2021-01-26 10:07:42 +00:00
										 |  |  | __version__ = "1.2.0" | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  | __maintainer__ = "Bob Mottram" | 
					
						
							| 
									
										
										
										
											2021-09-10 16:14:50 +00:00
										 |  |  | __email__ = "bob@libreserver.org" | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  | __status__ = "Production" | 
					
						
							| 
									
										
										
										
											2021-06-25 16:10:09 +00:00
										 |  |  | __module_group__ = "Core" | 
					
						
							| 
									
										
										
										
											2019-07-15 14:11:31 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | import os | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  | import email.parser | 
					
						
							| 
									
										
										
										
											2020-12-03 14:59:07 +00:00
										 |  |  | import urllib.parse | 
					
						
							| 
									
										
										
										
											2019-08-11 16:55:22 +00:00
										 |  |  | from shutil import copyfile | 
					
						
							| 
									
										
										
										
											2021-09-13 17:51:33 +00:00
										 |  |  | from utils import dangerousSVG | 
					
						
							| 
									
										
										
										
											2021-06-26 14:21:24 +00:00
										 |  |  | from utils import removeDomainPort | 
					
						
							| 
									
										
										
										
											2021-02-09 14:41:32 +00:00
										 |  |  | from utils import isValidLanguage | 
					
						
							| 
									
										
										
										
											2020-11-21 11:54:29 +00:00
										 |  |  | from utils import getImageExtensions | 
					
						
							| 
									
										
										
										
											2019-11-23 10:08:00 +00:00
										 |  |  | from utils import loadJson | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  | from utils import saveJson | 
					
						
							| 
									
										
										
										
											2020-02-21 10:19:02 +00:00
										 |  |  | from utils import fileLastModified | 
					
						
							| 
									
										
										
										
											2020-06-11 12:26:15 +00:00
										 |  |  | from utils import getLinkPrefixes | 
					
						
							| 
									
										
										
										
											2021-01-31 11:05:17 +00:00
										 |  |  | from utils import dangerousMarkup | 
					
						
							| 
									
										
										
										
											2021-03-12 12:04:34 +00:00
										 |  |  | from utils import isPGPEncrypted | 
					
						
							|  |  |  | from utils import containsPGPPublicKey | 
					
						
							| 
									
										
										
										
											2021-07-13 21:59:53 +00:00
										 |  |  | from utils import acctDir | 
					
						
							| 
									
										
										
										
											2021-08-07 17:03:41 +00:00
										 |  |  | from utils import isfloat | 
					
						
							| 
									
										
										
										
											2021-08-07 17:44:25 +00:00
										 |  |  | from utils import getCurrencies | 
					
						
							| 
									
										
										
										
											2021-10-14 15:12:35 +00:00
										 |  |  | from utils import removeHtml | 
					
						
							| 
									
										
										
										
											2021-01-29 21:33:23 +00:00
										 |  |  | from petnames import getPetName | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  | from session import downloadImage | 
					
						
							| 
									
										
										
										
											2019-07-15 14:11:31 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-30 22:55:53 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  | def removeHtmlTag(htmlStr: str, tag: str) -> str: | 
					
						
							|  |  |  |     """Removes a given tag from a html string
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     tagFound = True | 
					
						
							|  |  |  |     while tagFound: | 
					
						
							|  |  |  |         matchStr = ' ' + tag + '="' | 
					
						
							|  |  |  |         if matchStr not in htmlStr: | 
					
						
							|  |  |  |             tagFound = False | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         sections = htmlStr.split(matchStr, 1) | 
					
						
							|  |  |  |         if '"' not in sections[1]: | 
					
						
							|  |  |  |             tagFound = False | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         htmlStr = sections[0] + sections[1].split('"', 1)[1] | 
					
						
							|  |  |  |     return htmlStr | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  | def _removeQuotesWithinQuotes(content: str) -> str: | 
					
						
							| 
									
										
										
										
											2020-09-30 22:52:39 +00:00
										 |  |  |     """Removes any blockquote inside blockquote
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if '<blockquote>' not in content: | 
					
						
							|  |  |  |         return content | 
					
						
							|  |  |  |     if '</blockquote>' not in content: | 
					
						
							|  |  |  |         return content | 
					
						
							|  |  |  |     ctr = 1 | 
					
						
							|  |  |  |     found = True | 
					
						
							|  |  |  |     while found: | 
					
						
							|  |  |  |         prefix = content.split('<blockquote>', ctr)[0] + '<blockquote>' | 
					
						
							|  |  |  |         quotedStr = content.split('<blockquote>', ctr)[1] | 
					
						
							|  |  |  |         if '</blockquote>' not in quotedStr: | 
					
						
							|  |  |  |             found = False | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             endStr = quotedStr.split('</blockquote>')[1] | 
					
						
							|  |  |  |             quotedStr = quotedStr.split('</blockquote>')[0] | 
					
						
							|  |  |  |             if '<blockquote>' not in endStr: | 
					
						
							|  |  |  |                 found = False | 
					
						
							|  |  |  |             if '<blockquote>' in quotedStr: | 
					
						
							|  |  |  |                 quotedStr = quotedStr.replace('<blockquote>', '') | 
					
						
							|  |  |  |                 content = prefix + quotedStr + '</blockquote>' + endStr | 
					
						
							|  |  |  |         ctr += 1 | 
					
						
							|  |  |  |     return content | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-14 09:33:42 +00:00
										 |  |  | def htmlReplaceEmailQuote(content: str) -> str: | 
					
						
							|  |  |  |     """Replaces an email style quote "> Some quote" with html blockquote
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-03-12 12:04:34 +00:00
										 |  |  |     if isPGPEncrypted(content) or containsPGPPublicKey(content): | 
					
						
							| 
									
										
										
										
											2021-03-11 17:15:32 +00:00
										 |  |  |         return content | 
					
						
							| 
									
										
										
										
											2020-09-14 11:30:56 +00:00
										 |  |  |     # replace quote paragraph | 
					
						
							|  |  |  |     if '<p>"' in content: | 
					
						
							|  |  |  |         if '"</p>' in content: | 
					
						
							| 
									
										
										
										
											2020-10-30 12:10:57 +00:00
										 |  |  |             if content.count('<p>"') == content.count('"</p>'): | 
					
						
							|  |  |  |                 content = content.replace('<p>"', '<p><blockquote>') | 
					
						
							|  |  |  |                 content = content.replace('"</p>', '</blockquote></p>') | 
					
						
							| 
									
										
										
										
											2020-09-14 12:17:11 +00:00
										 |  |  |     if '>\u201c' in content: | 
					
						
							|  |  |  |         if '\u201d<' in content: | 
					
						
							| 
									
										
										
										
											2020-10-30 12:10:57 +00:00
										 |  |  |             if content.count('>\u201c') == content.count('\u201d<'): | 
					
						
							| 
									
										
										
										
											2020-10-30 12:12:09 +00:00
										 |  |  |                 content = content.replace('>\u201c', '><blockquote>') | 
					
						
							|  |  |  |                 content = content.replace('\u201d<', '</blockquote><') | 
					
						
							| 
									
										
										
										
											2020-09-14 11:30:56 +00:00
										 |  |  |     # replace email style quote | 
					
						
							| 
									
										
										
										
											2020-09-14 09:33:42 +00:00
										 |  |  |     if '>> ' not in content: | 
					
						
							|  |  |  |         return content | 
					
						
							|  |  |  |     contentStr = content.replace('<p>', '') | 
					
						
							|  |  |  |     contentLines = contentStr.split('</p>') | 
					
						
							|  |  |  |     newContent = '' | 
					
						
							|  |  |  |     for lineStr in contentLines: | 
					
						
							|  |  |  |         if not lineStr: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if '>> ' not in lineStr: | 
					
						
							| 
									
										
										
										
											2020-09-14 10:25:12 +00:00
										 |  |  |             if lineStr.startswith('> '): | 
					
						
							|  |  |  |                 lineStr = lineStr.replace('> ', '<blockquote>') | 
					
						
							|  |  |  |                 lineStr = lineStr.replace('>', '<br>') | 
					
						
							|  |  |  |                 newContent += '<p>' + lineStr + '</blockquote></p>' | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 newContent += '<p>' + lineStr + '</p>' | 
					
						
							| 
									
										
										
										
											2020-09-14 09:33:42 +00:00
										 |  |  |         else: | 
					
						
							|  |  |  |             lineStr = lineStr.replace('>> ', '><blockquote>') | 
					
						
							| 
									
										
										
										
											2020-09-30 22:52:39 +00:00
										 |  |  |             if lineStr.startswith('>'): | 
					
						
							|  |  |  |                 lineStr = lineStr.replace('>', '<blockquote>', 1) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 lineStr = lineStr.replace('>', '<br>') | 
					
						
							| 
									
										
										
										
											2020-09-14 09:33:42 +00:00
										 |  |  |             newContent += '<p>' + lineStr + '</blockquote></p>' | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |     return _removeQuotesWithinQuotes(newContent) | 
					
						
							| 
									
										
										
										
											2020-09-14 09:33:42 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-02 17:01:12 +00:00
										 |  |  | def htmlReplaceQuoteMarks(content: str) -> str: | 
					
						
							|  |  |  |     """Replaces quotes with html formatting
 | 
					
						
							|  |  |  |     "hello" becomes <q>hello</q> | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-03-12 12:04:34 +00:00
										 |  |  |     if isPGPEncrypted(content) or containsPGPPublicKey(content): | 
					
						
							| 
									
										
										
										
											2021-03-11 17:15:32 +00:00
										 |  |  |         return content | 
					
						
							| 
									
										
										
										
											2020-08-02 17:01:12 +00:00
										 |  |  |     if '"' not in content: | 
					
						
							| 
									
										
										
										
											2020-08-03 17:03:30 +00:00
										 |  |  |         if '"' not in content: | 
					
						
							|  |  |  |             return content | 
					
						
							| 
									
										
										
										
											2020-10-30 12:03:29 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # only if there are a few quote marks | 
					
						
							|  |  |  |     if content.count('"') > 4: | 
					
						
							|  |  |  |         return content | 
					
						
							|  |  |  |     if content.count('"') > 4: | 
					
						
							|  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2020-08-02 17:01:12 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-03 17:03:30 +00:00
										 |  |  |     newContent = content | 
					
						
							|  |  |  |     if '"' in content: | 
					
						
							|  |  |  |         sections = content.split('"') | 
					
						
							|  |  |  |         if len(sections) > 1: | 
					
						
							|  |  |  |             newContent = '' | 
					
						
							|  |  |  |             openQuote = True | 
					
						
							| 
									
										
										
										
											2020-08-02 17:17:51 +00:00
										 |  |  |             markup = False | 
					
						
							| 
									
										
										
										
											2020-08-03 17:03:30 +00:00
										 |  |  |             for ch in content: | 
					
						
							|  |  |  |                 currChar = ch | 
					
						
							|  |  |  |                 if ch == '<': | 
					
						
							|  |  |  |                     markup = True | 
					
						
							|  |  |  |                 elif ch == '>': | 
					
						
							|  |  |  |                     markup = False | 
					
						
							|  |  |  |                 elif ch == '"' and not markup: | 
					
						
							|  |  |  |                     if openQuote: | 
					
						
							|  |  |  |                         currChar = '“' | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         currChar = '”' | 
					
						
							|  |  |  |                     openQuote = not openQuote | 
					
						
							|  |  |  |                 newContent += currChar | 
					
						
							| 
									
										
										
										
											2020-08-02 19:16:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if '"' in newContent: | 
					
						
							|  |  |  |         openQuote = True | 
					
						
							|  |  |  |         content = newContent | 
					
						
							|  |  |  |         newContent = '' | 
					
						
							|  |  |  |         ctr = 0 | 
					
						
							|  |  |  |         sections = content.split('"') | 
					
						
							|  |  |  |         noOfSections = len(sections) | 
					
						
							|  |  |  |         for s in sections: | 
					
						
							|  |  |  |             newContent += s | 
					
						
							|  |  |  |             if ctr < noOfSections - 1: | 
					
						
							|  |  |  |                 if openQuote: | 
					
						
							|  |  |  |                     newContent += '“' | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     newContent += '”' | 
					
						
							|  |  |  |                 openQuote = not openQuote | 
					
						
							|  |  |  |             ctr += 1 | 
					
						
							| 
									
										
										
										
											2020-08-02 17:01:12 +00:00
										 |  |  |     return newContent | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-20 10:58:49 +00:00
										 |  |  | def dangerousCSS(filename: str, allowLocalNetworkAccess: bool) -> bool: | 
					
						
							| 
									
										
										
										
											2020-11-15 11:01:05 +00:00
										 |  |  |     """Returns true is the css file contains code which
 | 
					
						
							|  |  |  |     can create security problems | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if not os.path.isfile(filename): | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |     content = None | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         with open(filename, 'r') as fp: | 
					
						
							|  |  |  |             content = fp.read().lower() | 
					
						
							|  |  |  |     except OSError: | 
					
						
							|  |  |  |         print('EX: unable to read css file ' + filename) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if content: | 
					
						
							| 
									
										
										
										
											2020-11-15 11:26:23 +00:00
										 |  |  |         cssMatches = ('behavior:', ':expression', '?php', '.php', | 
					
						
							| 
									
										
										
										
											2020-12-12 20:59:52 +00:00
										 |  |  |                       'google', 'regexp', 'localhost', | 
					
						
							| 
									
										
										
										
											2020-12-12 21:42:10 +00:00
										 |  |  |                       '127.0.', '192.168', '10.0.', '@import') | 
					
						
							| 
									
										
										
										
											2021-10-07 19:03:01 +00:00
										 |  |  |         for cssmatch in cssMatches: | 
					
						
							|  |  |  |             if cssmatch in content: | 
					
						
							| 
									
										
										
										
											2020-11-15 11:01:05 +00:00
										 |  |  |                 return True | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-12 21:21:06 +00:00
										 |  |  |         # search for non-local web links | 
					
						
							|  |  |  |         if 'url(' in content: | 
					
						
							|  |  |  |             urlList = content.split('url(') | 
					
						
							|  |  |  |             ctr = 0 | 
					
						
							|  |  |  |             for urlStr in urlList: | 
					
						
							|  |  |  |                 if ctr > 0: | 
					
						
							|  |  |  |                     if ')' in urlStr: | 
					
						
							|  |  |  |                         urlStr = urlStr.split(')')[0] | 
					
						
							|  |  |  |                         if 'http' in urlStr: | 
					
						
							|  |  |  |                             print('ERROR: non-local web link in CSS ' + | 
					
						
							|  |  |  |                                   filename) | 
					
						
							|  |  |  |                             return True | 
					
						
							|  |  |  |                 ctr += 1 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-15 11:01:05 +00:00
										 |  |  |         # an attacker can include html inside of the css | 
					
						
							|  |  |  |         # file as a comment and this may then be run from the html | 
					
						
							| 
									
										
										
										
											2020-11-20 10:58:49 +00:00
										 |  |  |         if dangerousMarkup(content, allowLocalNetworkAccess): | 
					
						
							| 
									
										
										
										
											2020-11-15 11:01:05 +00:00
										 |  |  |             return True | 
					
						
							|  |  |  |     return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-06 16:29:03 +00:00
										 |  |  | def switchWords(baseDir: str, nickname: str, domain: str, content: str, | 
					
						
							|  |  |  |                 rules: [] = []) -> str: | 
					
						
							| 
									
										
										
										
											2020-02-19 18:51:08 +00:00
										 |  |  |     """Performs word replacements. eg. Trump -> The Orange Menace
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-03-12 12:04:34 +00:00
										 |  |  |     if isPGPEncrypted(content) or containsPGPPublicKey(content): | 
					
						
							| 
									
										
										
										
											2021-03-11 17:15:32 +00:00
										 |  |  |         return content | 
					
						
							| 
									
										
										
										
											2021-07-06 16:29:03 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if not rules: | 
					
						
							| 
									
										
										
										
											2021-07-13 21:59:53 +00:00
										 |  |  |         switchWordsFilename = \ | 
					
						
							|  |  |  |             acctDir(baseDir, nickname, domain) + '/replacewords.txt' | 
					
						
							| 
									
										
										
										
											2021-07-06 16:29:03 +00:00
										 |  |  |         if not os.path.isfile(switchWordsFilename): | 
					
						
							|  |  |  |             return content | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |         try: | 
					
						
							|  |  |  |             with open(switchWordsFilename, 'r') as fp: | 
					
						
							|  |  |  |                 rules = fp.readlines() | 
					
						
							|  |  |  |         except OSError: | 
					
						
							|  |  |  |             print('EX: unable to read switches ' + switchWordsFilename) | 
					
						
							| 
									
										
										
										
											2021-07-06 16:29:03 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     for line in rules: | 
					
						
							|  |  |  |         replaceStr = line.replace('\n', '').replace('\r', '') | 
					
						
							|  |  |  |         splitters = ('->', ':', ',', ';', '-') | 
					
						
							|  |  |  |         wordTransform = None | 
					
						
							|  |  |  |         for splitStr in splitters: | 
					
						
							|  |  |  |             if splitStr in replaceStr: | 
					
						
							|  |  |  |                 wordTransform = replaceStr.split(splitStr) | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |         if not wordTransform: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if len(wordTransform) == 2: | 
					
						
							|  |  |  |             replaceStr1 = wordTransform[0].strip().replace('"', '') | 
					
						
							|  |  |  |             replaceStr2 = wordTransform[1].strip().replace('"', '') | 
					
						
							|  |  |  |             content = content.replace(replaceStr1, replaceStr2) | 
					
						
							| 
									
										
										
										
											2020-02-19 18:51:08 +00:00
										 |  |  |     return content | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  | def _saveCustomEmoji(session, baseDir: str, emojiName: str, url: str, | 
					
						
							|  |  |  |                      debug: bool) -> None: | 
					
						
							|  |  |  |     """Saves custom emoji to file
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if not session: | 
					
						
							| 
									
										
										
										
											2021-11-01 17:50:38 +00:00
										 |  |  |         if debug: | 
					
						
							| 
									
										
										
										
											2021-11-01 18:33:32 +00:00
										 |  |  |             print('EX: _saveCustomEmoji no session') | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  |         return | 
					
						
							|  |  |  |     if '.' not in url: | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  |     ext = url.split('.')[-1] | 
					
						
							|  |  |  |     if ext != 'png': | 
					
						
							| 
									
										
										
										
											2021-11-01 17:50:38 +00:00
										 |  |  |         if debug: | 
					
						
							| 
									
										
										
										
											2021-11-01 18:33:32 +00:00
										 |  |  |             print('EX: Custom emoji is wrong format ' + url) | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  |         return | 
					
						
							| 
									
										
										
										
											2021-11-01 20:12:04 +00:00
										 |  |  |     emojiName = emojiName.replace(':', '').strip().lower() | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  |     customEmojiDir = baseDir + '/emojicustom' | 
					
						
							|  |  |  |     if not os.path.isdir(customEmojiDir): | 
					
						
							|  |  |  |         os.mkdir(customEmojiDir) | 
					
						
							|  |  |  |     emojiImageFilename = customEmojiDir + '/' + emojiName + '.' + ext | 
					
						
							|  |  |  |     if not downloadImage(session, baseDir, url, | 
					
						
							|  |  |  |                          emojiImageFilename, debug, False): | 
					
						
							| 
									
										
										
										
											2021-11-01 18:33:32 +00:00
										 |  |  |         if debug: | 
					
						
							|  |  |  |             print('EX: custom emoji not downloaded ' + url) | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  |         return | 
					
						
							|  |  |  |     emojiJsonFilename = customEmojiDir + '/emoji.json' | 
					
						
							|  |  |  |     emojiJson = {} | 
					
						
							|  |  |  |     if os.path.isfile(emojiJsonFilename): | 
					
						
							|  |  |  |         emojiJson = loadJson(emojiJsonFilename, 0, 1) | 
					
						
							|  |  |  |         if not emojiJson: | 
					
						
							|  |  |  |             emojiJson = {} | 
					
						
							|  |  |  |     if not emojiJson.get(emojiName): | 
					
						
							|  |  |  |         emojiJson[emojiName] = emojiName | 
					
						
							|  |  |  |         saveJson(emojiJson, emojiJsonFilename) | 
					
						
							| 
									
										
										
										
											2021-11-01 17:50:38 +00:00
										 |  |  |         if debug: | 
					
						
							| 
									
										
										
										
											2021-11-01 18:33:32 +00:00
										 |  |  |             print('EX: Saved custom emoji ' + emojiJsonFilename) | 
					
						
							|  |  |  |     elif debug: | 
					
						
							|  |  |  |         print('EX: cusom emoji already saved') | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def replaceEmojiFromTags(session, baseDir: str, | 
					
						
							|  |  |  |                          content: str, tag: [], messageType: str, | 
					
						
							|  |  |  |                          debug: bool) -> str: | 
					
						
							| 
									
										
										
										
											2019-09-29 16:28:02 +00:00
										 |  |  |     """Uses the tags to replace :emoji: with html image markup
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2019-09-29 17:20:10 +00:00
										 |  |  |     for tagItem in tag: | 
					
						
							|  |  |  |         if not tagItem.get('type'): | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         if tagItem['type'] != 'Emoji': | 
					
						
							| 
									
										
										
										
											2019-09-29 17:20:10 +00:00
										 |  |  |             continue | 
					
						
							|  |  |  |         if not tagItem.get('name'): | 
					
						
							| 
									
										
										
										
											2019-09-29 16:28:02 +00:00
										 |  |  |             continue | 
					
						
							|  |  |  |         if not tagItem.get('icon'): | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if not tagItem['icon'].get('url'): | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-02-21 15:09:31 +00:00
										 |  |  |         if '/' not in tagItem['icon']['url']: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2019-09-29 16:28:02 +00:00
										 |  |  |         if tagItem['name'] not in content: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         iconName = tagItem['icon']['url'].split('/')[-1] | 
					
						
							| 
									
										
										
										
											2020-02-21 15:09:31 +00:00
										 |  |  |         if iconName: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |             if len(iconName) > 1: | 
					
						
							| 
									
										
										
										
											2020-02-21 17:45:20 +00:00
										 |  |  |                 if iconName[0].isdigit(): | 
					
						
							| 
									
										
										
										
											2020-02-21 21:08:24 +00:00
										 |  |  |                     if '.' in iconName: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |                         iconName = iconName.split('.')[0] | 
					
						
							|  |  |  |                         # see https://unicode.org/ | 
					
						
							|  |  |  |                         # emoji/charts/full-emoji-list.html | 
					
						
							| 
									
										
										
										
											2020-02-21 21:08:24 +00:00
										 |  |  |                         if '-' not in iconName: | 
					
						
							|  |  |  |                             # a single code | 
					
						
							| 
									
										
										
										
											2021-11-01 17:23:39 +00:00
										 |  |  |                             replaced = False | 
					
						
							| 
									
										
										
										
											2020-02-21 21:08:24 +00:00
										 |  |  |                             try: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |                                 replaceChar = chr(int("0x" + iconName, 16)) | 
					
						
							|  |  |  |                                 content = content.replace(tagItem['name'], | 
					
						
							|  |  |  |                                                           replaceChar) | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  |                                 replaced = True | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |                             except BaseException: | 
					
						
							| 
									
										
										
										
											2021-11-01 18:33:32 +00:00
										 |  |  |                                 print('EX: replaceEmojiFromTags 1 ' + | 
					
						
							| 
									
										
										
										
											2021-11-01 11:35:15 +00:00
										 |  |  |                                       'no conversion of ' + | 
					
						
							|  |  |  |                                       str(iconName) + ' to chr ' + | 
					
						
							| 
									
										
										
										
											2021-11-01 10:36:59 +00:00
										 |  |  |                                       tagItem['name'] + ' ' + | 
					
						
							|  |  |  |                                       tagItem['icon']['url']) | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  |                             if not replaced: | 
					
						
							|  |  |  |                                 _saveCustomEmoji(session, baseDir, | 
					
						
							|  |  |  |                                                  tagItem['name'], | 
					
						
							|  |  |  |                                                  tagItem['icon']['url'], | 
					
						
							|  |  |  |                                                  debug) | 
					
						
							| 
									
										
										
										
											2020-02-21 21:08:24 +00:00
										 |  |  |                         else: | 
					
						
							|  |  |  |                             # sequence of codes | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |                             iconCodes = iconName.split('-') | 
					
						
							|  |  |  |                             iconCodeSequence = '' | 
					
						
							| 
									
										
										
										
											2020-02-21 21:08:24 +00:00
										 |  |  |                             for icode in iconCodes: | 
					
						
							| 
									
										
										
										
											2021-11-01 17:23:39 +00:00
										 |  |  |                                 replaced = False | 
					
						
							| 
									
										
										
										
											2020-02-21 21:08:24 +00:00
										 |  |  |                                 try: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |                                     iconCodeSequence += chr(int("0x" + | 
					
						
							|  |  |  |                                                                 icode, 16)) | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  |                                     replaced = True | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |                                 except BaseException: | 
					
						
							|  |  |  |                                     iconCodeSequence = '' | 
					
						
							| 
									
										
										
										
											2021-11-01 18:33:32 +00:00
										 |  |  |                                     print('EX: replaceEmojiFromTags 2 ' + | 
					
						
							| 
									
										
										
										
											2021-11-01 11:35:15 +00:00
										 |  |  |                                           'no conversion of ' + | 
					
						
							|  |  |  |                                           str(icode) + ' to chr ' + | 
					
						
							| 
									
										
										
										
											2021-11-01 10:36:59 +00:00
										 |  |  |                                           tagItem['name'] + ' ' + | 
					
						
							|  |  |  |                                           tagItem['icon']['url']) | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  |                                 if not replaced: | 
					
						
							|  |  |  |                                     _saveCustomEmoji(session, baseDir, | 
					
						
							|  |  |  |                                                      tagItem['name'], | 
					
						
							|  |  |  |                                                      tagItem['icon']['url'], | 
					
						
							|  |  |  |                                                      debug) | 
					
						
							| 
									
										
										
										
											2020-02-21 21:08:24 +00:00
										 |  |  |                             if iconCodeSequence: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |                                 content = content.replace(tagItem['name'], | 
					
						
							|  |  |  |                                                           iconCodeSequence) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         htmlClass = 'emoji' | 
					
						
							|  |  |  |         if messageType == 'post header': | 
					
						
							|  |  |  |             htmlClass = 'emojiheader' | 
					
						
							|  |  |  |         if messageType == 'profile': | 
					
						
							|  |  |  |             htmlClass = 'emojiprofile' | 
					
						
							|  |  |  |         emojiHtml = "<img src=\"" + tagItem['icon']['url'] + "\" alt=\"" + \ | 
					
						
							|  |  |  |             tagItem['name'].replace(':', '') + \ | 
					
						
							|  |  |  |             "\" align=\"middle\" class=\"" + htmlClass + "\"/>" | 
					
						
							|  |  |  |         content = content.replace(tagItem['name'], emojiHtml) | 
					
						
							| 
									
										
										
										
											2019-09-29 16:28:02 +00:00
										 |  |  |     return content | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-02-21 15:09:31 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  | def _addMusicTag(content: str, tag: str) -> str: | 
					
						
							| 
									
										
										
										
											2020-03-29 09:59:54 +00:00
										 |  |  |     """If a music link is found then ensure that the post is
 | 
					
						
							|  |  |  |     tagged appropriately | 
					
						
							| 
									
										
										
										
											2019-09-05 09:54:27 +00:00
										 |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-10-11 09:50:17 +00:00
										 |  |  |     if '#podcast' in content or '#documentary' in content: | 
					
						
							|  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2019-09-05 09:54:27 +00:00
										 |  |  |     if '#' not in tag: | 
					
						
							| 
									
										
										
										
											2020-10-11 09:50:17 +00:00
										 |  |  |         tag = '#' + tag | 
					
						
							| 
									
										
										
										
											2019-09-05 09:54:27 +00:00
										 |  |  |     if tag in content: | 
					
						
							|  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |     musicSites = ('soundcloud.com', 'bandcamp.com') | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     musicSiteFound = False | 
					
						
							| 
									
										
										
										
											2019-09-05 09:54:27 +00:00
										 |  |  |     for site in musicSites: | 
					
						
							| 
									
										
										
										
											2021-06-22 12:42:52 +00:00
										 |  |  |         if site + '/' in content: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |             musicSiteFound = True | 
					
						
							| 
									
										
										
										
											2019-09-05 09:54:27 +00:00
										 |  |  |             break | 
					
						
							|  |  |  |     if not musicSiteFound: | 
					
						
							|  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     return ':music: ' + content + ' ' + tag + ' ' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-09-05 09:54:27 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-21 12:07:30 +00:00
										 |  |  | def addWebLinks(content: str) -> str: | 
					
						
							|  |  |  |     """Adds markup for web links
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-06-11 09:43:48 +00:00
										 |  |  |     if ':' not in content: | 
					
						
							|  |  |  |         return content | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-11 12:26:15 +00:00
										 |  |  |     prefixes = getLinkPrefixes() | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # do any of these prefixes exist within the content? | 
					
						
							|  |  |  |     prefixFound = False | 
					
						
							|  |  |  |     for prefix in prefixes: | 
					
						
							|  |  |  |         if prefix in content: | 
					
						
							|  |  |  |             prefixFound = True | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # if there are no prefixes then just keep the content we have | 
					
						
							|  |  |  |     if not prefixFound: | 
					
						
							| 
									
										
										
										
											2019-08-21 12:07:30 +00:00
										 |  |  |         return content | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     maxLinkLength = 40 | 
					
						
							| 
									
										
										
										
											2020-05-22 11:32:38 +00:00
										 |  |  |     content = content.replace('\r', '') | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     words = content.replace('\n', ' --linebreak-- ').split(' ') | 
					
						
							|  |  |  |     replaceDict = {} | 
					
						
							| 
									
										
										
										
											2019-08-21 12:07:30 +00:00
										 |  |  |     for w in words: | 
					
						
							| 
									
										
										
										
											2020-06-11 09:43:48 +00:00
										 |  |  |         if ':' not in w: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |         # does the word begin with a prefix? | 
					
						
							|  |  |  |         prefixFound = False | 
					
						
							|  |  |  |         for prefix in prefixes: | 
					
						
							|  |  |  |             if w.startswith(prefix): | 
					
						
							|  |  |  |                 prefixFound = True | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |         if not prefixFound: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         # the word contains a prefix | 
					
						
							|  |  |  |         if w.endswith('.') or w.endswith(';'): | 
					
						
							|  |  |  |             w = w[:-1] | 
					
						
							|  |  |  |         markup = '<a href="' + w + \ | 
					
						
							| 
									
										
										
										
											2020-12-11 10:14:58 +00:00
										 |  |  |             '" rel="nofollow noopener noreferrer" target="_blank">' | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |         for prefix in prefixes: | 
					
						
							|  |  |  |             if w.startswith(prefix): | 
					
						
							|  |  |  |                 markup += '<span class="invisible">' + prefix + '</span>' | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |         linkText = w | 
					
						
							|  |  |  |         for prefix in prefixes: | 
					
						
							|  |  |  |             linkText = linkText.replace(prefix, '') | 
					
						
							|  |  |  |         # prevent links from becoming too long | 
					
						
							|  |  |  |         if len(linkText) > maxLinkLength: | 
					
						
							|  |  |  |             markup += '<span class="ellipsis">' + \ | 
					
						
							|  |  |  |                 linkText[:maxLinkLength] + '</span>' | 
					
						
							|  |  |  |             markup += '<span class="invisible">' + \ | 
					
						
							|  |  |  |                 linkText[maxLinkLength:] + '</span></a>' | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             markup += '<span class="ellipsis">' + linkText + '</span></a>' | 
					
						
							|  |  |  |         replaceDict[w] = markup | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # do the replacements | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     for url, markup in replaceDict.items(): | 
					
						
							|  |  |  |         content = content.replace(url, markup) | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # replace any line breaks | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     content = content.replace(' --linebreak-- ', '<br>') | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-21 12:07:30 +00:00
										 |  |  |     return content | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-09 11:12:08 +00:00
										 |  |  | def validHashTag(hashtag: str) -> bool: | 
					
						
							|  |  |  |     """Returns true if the give hashtag contains valid characters
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-08-07 20:43:54 +00:00
										 |  |  |     # long hashtags are not valid | 
					
						
							|  |  |  |     if len(hashtag) >= 32: | 
					
						
							|  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     validChars = set('0123456789' + | 
					
						
							|  |  |  |                      'abcdefghijklmnopqrstuvwxyz' + | 
					
						
							| 
									
										
										
										
											2021-02-09 13:28:42 +00:00
										 |  |  |                      'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + | 
					
						
							|  |  |  |                      '¡¿ÄäÀàÁáÂâÃãÅåǍǎĄąĂăÆæĀā' + | 
					
						
							|  |  |  |                      'ÇçĆćĈĉČčĎđĐďðÈèÉéÊêËëĚěĘęĖėĒē' + | 
					
						
							|  |  |  |                      'ĜĝĢģĞğĤĥÌìÍíÎîÏïıĪīĮįĴĵĶķ' + | 
					
						
							|  |  |  |                      'ĹĺĻļŁłĽľĿŀÑñŃńŇňŅņÖöÒòÓóÔôÕõŐőØøŒœ' + | 
					
						
							|  |  |  |                      'ŔŕŘřẞߌśŜŝŞşŠšȘșŤťŢţÞþȚțÜüÙùÚúÛûŰűŨũŲųŮůŪū' + | 
					
						
							|  |  |  |                      'ŴŵÝýŸÿŶŷŹźŽžŻż') | 
					
						
							| 
									
										
										
										
											2019-08-09 11:12:08 +00:00
										 |  |  |     if set(hashtag).issubset(validChars): | 
					
						
							| 
									
										
										
										
											2021-02-09 13:28:42 +00:00
										 |  |  |         return True | 
					
						
							| 
									
										
										
										
											2021-02-09 14:41:32 +00:00
										 |  |  |     if isValidLanguage(hashtag): | 
					
						
							|  |  |  |         return True | 
					
						
							| 
									
										
										
										
											2019-08-09 11:12:08 +00:00
										 |  |  |     return False | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  | def _addHashTags(wordStr: str, httpPrefix: str, domain: str, | 
					
						
							|  |  |  |                  replaceHashTags: {}, postHashtags: {}) -> bool: | 
					
						
							| 
									
										
										
										
											2019-08-09 11:12:08 +00:00
										 |  |  |     """Detects hashtags and adds them to the replacements dict
 | 
					
						
							|  |  |  |     Also updates the hashtags list to be added to the post | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if replaceHashTags.get(wordStr): | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         return True | 
					
						
							|  |  |  |     hashtag = wordStr[1:] | 
					
						
							| 
									
										
										
										
											2019-08-09 11:12:08 +00:00
										 |  |  |     if not validHashTag(hashtag): | 
					
						
							|  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     hashtagUrl = httpPrefix + "://" + domain + "/tags/" + hashtag | 
					
						
							|  |  |  |     postHashtags[hashtag] = { | 
					
						
							| 
									
										
										
										
											2019-08-09 11:12:08 +00:00
										 |  |  |         'href': hashtagUrl, | 
					
						
							| 
									
										
										
										
											2020-10-16 20:13:23 +00:00
										 |  |  |         'name': '#' + hashtag, | 
					
						
							| 
									
										
										
										
											2019-08-09 11:12:08 +00:00
										 |  |  |         'type': 'Hashtag' | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     replaceHashTags[wordStr] = "<a href=\"" + hashtagUrl + \ | 
					
						
							|  |  |  |         "\" class=\"mention hashtag\" rel=\"tag\">#<span>" + \ | 
					
						
							|  |  |  |         hashtag + "</span></a>" | 
					
						
							| 
									
										
										
										
											2019-08-09 11:12:08 +00:00
										 |  |  |     return True | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  | def _addEmoji(baseDir: str, wordStr: str, | 
					
						
							|  |  |  |               httpPrefix: str, domain: str, | 
					
						
							|  |  |  |               replaceEmoji: {}, postTags: {}, | 
					
						
							|  |  |  |               emojiDict: {}) -> bool: | 
					
						
							| 
									
										
										
										
											2019-08-09 16:18:00 +00:00
										 |  |  |     """Detects Emoji and adds them to the replacements dict
 | 
					
						
							|  |  |  |     Also updates the tags list to be added to the post | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if not wordStr.startswith(':'): | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  |     if not wordStr.endswith(':'): | 
					
						
							|  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     if len(wordStr) < 3: | 
					
						
							| 
									
										
										
										
											2019-08-09 16:18:00 +00:00
										 |  |  |         return False | 
					
						
							|  |  |  |     if replaceEmoji.get(wordStr): | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         return True | 
					
						
							| 
									
										
										
										
											2019-09-23 11:11:13 +00:00
										 |  |  |     # remove leading and trailing : characters | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     emoji = wordStr[1:] | 
					
						
							|  |  |  |     emoji = emoji[:-1] | 
					
						
							| 
									
										
										
										
											2019-09-23 11:11:13 +00:00
										 |  |  |     # is the text of the emoji valid? | 
					
						
							| 
									
										
										
										
											2019-08-09 16:18:00 +00:00
										 |  |  |     if not validHashTag(emoji): | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  |     if not emojiDict.get(emoji): | 
					
						
							|  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     emojiFilename = baseDir + '/emoji/' + emojiDict[emoji] + '.png' | 
					
						
							| 
									
										
										
										
											2019-08-09 16:18:00 +00:00
										 |  |  |     if not os.path.isfile(emojiFilename): | 
					
						
							|  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     emojiUrl = httpPrefix + "://" + domain + \ | 
					
						
							|  |  |  |         "/emoji/" + emojiDict[emoji] + '.png' | 
					
						
							|  |  |  |     postTags[emoji] = { | 
					
						
							| 
									
										
										
										
											2019-08-19 13:35:55 +00:00
										 |  |  |         'icon': { | 
					
						
							|  |  |  |             'mediaType': 'image/png', | 
					
						
							|  |  |  |             'type': 'Image', | 
					
						
							|  |  |  |             'url': emojiUrl | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2021-06-22 12:42:52 +00:00
										 |  |  |         'name': ':' + emoji + ':', | 
					
						
							| 
									
										
										
										
											2020-02-21 10:19:02 +00:00
										 |  |  |         "updated": fileLastModified(emojiFilename), | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         "id": emojiUrl.replace('.png', ''), | 
					
						
							| 
									
										
										
										
											2019-08-09 16:18:00 +00:00
										 |  |  |         'type': 'Emoji' | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return True | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-13 20:07:45 +00:00
										 |  |  | def tagExists(tagType: str, tagName: str, tags: {}) -> bool: | 
					
						
							|  |  |  |     """Returns true if a tag exists in the given dict
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     for tag in tags: | 
					
						
							|  |  |  |         if tag['name'] == tagName and tag['type'] == tagType: | 
					
						
							|  |  |  |             return True | 
					
						
							|  |  |  |     return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-29 21:33:23 +00:00
										 |  |  | def _addMention(wordStr: str, httpPrefix: str, following: str, petnames: str, | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |                 replaceMentions: {}, recipients: [], tags: {}) -> bool: | 
					
						
							| 
									
										
										
										
											2020-03-29 09:59:54 +00:00
										 |  |  |     """Detects mentions and adds them to the replacements dict and
 | 
					
						
							|  |  |  |     recipients list | 
					
						
							| 
									
										
										
										
											2019-08-09 09:09:21 +00:00
										 |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     possibleHandle = wordStr[1:] | 
					
						
							| 
									
										
										
										
											2019-08-19 10:05:50 +00:00
										 |  |  |     # @nick | 
					
						
							| 
									
										
										
										
											2019-08-19 11:41:15 +00:00
										 |  |  |     if following and '@' not in possibleHandle: | 
					
						
							| 
									
										
										
										
											2019-08-09 09:48:51 +00:00
										 |  |  |         # fall back to a best effort match against the following list | 
					
						
							|  |  |  |         # if no domain was specified. eg. @nick | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         possibleNickname = possibleHandle | 
					
						
							| 
									
										
										
										
											2019-08-09 09:48:51 +00:00
										 |  |  |         for follow in following: | 
					
						
							| 
									
										
										
										
											2021-01-29 21:33:23 +00:00
										 |  |  |             if '@' not in follow: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             followNick = follow.split('@')[0] | 
					
						
							|  |  |  |             if possibleNickname == followNick: | 
					
						
							|  |  |  |                 followStr = follow.replace('\n', '').replace('\r', '') | 
					
						
							|  |  |  |                 replaceDomain = followStr.split('@')[1] | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |                 recipientActor = httpPrefix + "://" + \ | 
					
						
							| 
									
										
										
										
											2021-08-14 08:44:58 +00:00
										 |  |  |                     replaceDomain + "/@" + possibleNickname | 
					
						
							| 
									
										
										
										
											2019-08-09 09:48:51 +00:00
										 |  |  |                 if recipientActor not in recipients: | 
					
						
							|  |  |  |                     recipients.append(recipientActor) | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |                 tags[wordStr] = { | 
					
						
							| 
									
										
										
										
											2019-08-19 12:13:18 +00:00
										 |  |  |                     'href': recipientActor, | 
					
						
							|  |  |  |                     'name': wordStr, | 
					
						
							|  |  |  |                     'type': 'Mention' | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |                 replaceMentions[wordStr] = \ | 
					
						
							|  |  |  |                     "<span class=\"h-card\"><a href=\"" + httpPrefix + \ | 
					
						
							|  |  |  |                     "://" + replaceDomain + "/@" + possibleNickname + \ | 
					
						
							|  |  |  |                     "\" class=\"u-url mention\">@<span>" + possibleNickname + \ | 
					
						
							| 
									
										
										
										
											2020-03-29 09:59:54 +00:00
										 |  |  |                     "</span></a></span>" | 
					
						
							| 
									
										
										
										
											2019-08-09 09:48:51 +00:00
										 |  |  |                 return True | 
					
						
							| 
									
										
										
										
											2021-01-29 21:33:23 +00:00
										 |  |  |         # try replacing petnames with mentions | 
					
						
							|  |  |  |         followCtr = 0 | 
					
						
							|  |  |  |         for follow in following: | 
					
						
							|  |  |  |             if '@' not in follow: | 
					
						
							| 
									
										
										
										
											2021-01-29 21:34:08 +00:00
										 |  |  |                 followCtr += 1 | 
					
						
							| 
									
										
										
										
											2021-01-29 21:33:23 +00:00
										 |  |  |                 continue | 
					
						
							|  |  |  |             pet = petnames[followCtr].replace('\n', '') | 
					
						
							|  |  |  |             if pet: | 
					
						
							|  |  |  |                 if possibleNickname == pet: | 
					
						
							|  |  |  |                     followStr = follow.replace('\n', '').replace('\r', '') | 
					
						
							|  |  |  |                     replaceNickname = followStr.split('@')[0] | 
					
						
							|  |  |  |                     replaceDomain = followStr.split('@')[1] | 
					
						
							|  |  |  |                     recipientActor = httpPrefix + "://" + \ | 
					
						
							| 
									
										
										
										
											2021-08-14 08:44:58 +00:00
										 |  |  |                         replaceDomain + "/@" + replaceNickname | 
					
						
							| 
									
										
										
										
											2021-01-29 21:33:23 +00:00
										 |  |  |                     if recipientActor not in recipients: | 
					
						
							|  |  |  |                         recipients.append(recipientActor) | 
					
						
							|  |  |  |                     tags[wordStr] = { | 
					
						
							|  |  |  |                         'href': recipientActor, | 
					
						
							|  |  |  |                         'name': wordStr, | 
					
						
							|  |  |  |                         'type': 'Mention' | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                     replaceMentions[wordStr] = \ | 
					
						
							|  |  |  |                         "<span class=\"h-card\"><a href=\"" + httpPrefix + \ | 
					
						
							|  |  |  |                         "://" + replaceDomain + "/@" + replaceNickname + \ | 
					
						
							|  |  |  |                         "\" class=\"u-url mention\">@<span>" + \ | 
					
						
							|  |  |  |                         replaceNickname + "</span></a></span>" | 
					
						
							|  |  |  |                     return True | 
					
						
							|  |  |  |             followCtr += 1 | 
					
						
							| 
									
										
										
										
											2019-08-09 09:48:51 +00:00
										 |  |  |         return False | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     possibleNickname = None | 
					
						
							|  |  |  |     possibleDomain = None | 
					
						
							| 
									
										
										
										
											2019-10-29 20:15:21 +00:00
										 |  |  |     if '@' not in possibleHandle: | 
					
						
							|  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     possibleNickname = possibleHandle.split('@')[0] | 
					
						
							| 
									
										
										
										
											2019-10-29 20:15:21 +00:00
										 |  |  |     if not possibleNickname: | 
					
						
							|  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2020-05-22 11:32:38 +00:00
										 |  |  |     possibleDomain = \ | 
					
						
							|  |  |  |         possibleHandle.split('@')[1].strip('\n').strip('\r') | 
					
						
							| 
									
										
										
										
											2019-10-29 20:15:21 +00:00
										 |  |  |     if not possibleDomain: | 
					
						
							|  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2019-08-19 11:41:15 +00:00
										 |  |  |     if following: | 
					
						
							|  |  |  |         for follow in following: | 
					
						
							| 
									
										
										
										
											2020-05-22 11:32:38 +00:00
										 |  |  |             if follow.replace('\n', '').replace('\r', '') != possibleHandle: | 
					
						
							| 
									
										
										
										
											2019-08-19 11:41:15 +00:00
										 |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |             recipientActor = httpPrefix + "://" + \ | 
					
						
							| 
									
										
										
										
											2021-08-14 08:44:58 +00:00
										 |  |  |                 possibleDomain + "/@" + possibleNickname | 
					
						
							| 
									
										
										
										
											2019-08-19 11:41:15 +00:00
										 |  |  |             if recipientActor not in recipients: | 
					
						
							|  |  |  |                 recipients.append(recipientActor) | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |             tags[wordStr] = { | 
					
						
							| 
									
										
										
										
											2019-08-19 12:13:18 +00:00
										 |  |  |                 'href': recipientActor, | 
					
						
							|  |  |  |                 'name': wordStr, | 
					
						
							|  |  |  |                 'type': 'Mention' | 
					
						
							|  |  |  |             } | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |             replaceMentions[wordStr] = \ | 
					
						
							|  |  |  |                 "<span class=\"h-card\"><a href=\"" + httpPrefix + \ | 
					
						
							|  |  |  |                 "://" + possibleDomain + "/@" + possibleNickname + \ | 
					
						
							|  |  |  |                 "\" class=\"u-url mention\">@<span>" + possibleNickname + \ | 
					
						
							| 
									
										
										
										
											2020-03-29 09:59:54 +00:00
										 |  |  |                 "</span></a></span>" | 
					
						
							| 
									
										
										
										
											2019-08-19 11:41:15 +00:00
										 |  |  |             return True | 
					
						
							| 
									
										
										
										
											2019-08-19 10:05:50 +00:00
										 |  |  |     # @nick@domain | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     if not (possibleDomain == 'localhost' or '.' in possibleDomain): | 
					
						
							| 
									
										
										
										
											2020-03-22 21:16:02 +00:00
										 |  |  |         return False | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     recipientActor = httpPrefix + "://" + \ | 
					
						
							| 
									
										
										
										
											2021-08-14 08:44:58 +00:00
										 |  |  |         possibleDomain + "/@" + possibleNickname | 
					
						
							| 
									
										
										
										
											2019-10-29 20:15:21 +00:00
										 |  |  |     if recipientActor not in recipients: | 
					
						
							|  |  |  |         recipients.append(recipientActor) | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     tags[wordStr] = { | 
					
						
							| 
									
										
										
										
											2019-10-29 20:15:21 +00:00
										 |  |  |         'href': recipientActor, | 
					
						
							|  |  |  |         'name': wordStr, | 
					
						
							|  |  |  |         'type': 'Mention' | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     replaceMentions[wordStr] = \ | 
					
						
							|  |  |  |         "<span class=\"h-card\"><a href=\"" + httpPrefix + \ | 
					
						
							|  |  |  |         "://" + possibleDomain + "/@" + possibleNickname + \ | 
					
						
							|  |  |  |         "\" class=\"u-url mention\">@<span>" + possibleNickname + \ | 
					
						
							| 
									
										
										
										
											2020-03-29 09:59:54 +00:00
										 |  |  |         "</span></a></span>" | 
					
						
							| 
									
										
										
										
											2019-10-29 20:15:21 +00:00
										 |  |  |     return True | 
					
						
							| 
									
										
										
										
											2019-08-09 09:09:21 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-12 09:34:58 +00:00
										 |  |  | def replaceContentDuplicates(content: str) -> str: | 
					
						
							|  |  |  |     """Replaces invalid duplicates within content
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-03-12 12:04:34 +00:00
										 |  |  |     if isPGPEncrypted(content) or containsPGPPublicKey(content): | 
					
						
							| 
									
										
										
										
											2021-03-11 17:15:32 +00:00
										 |  |  |         return content | 
					
						
							| 
									
										
										
										
											2020-05-12 09:34:58 +00:00
										 |  |  |     while '<<' in content: | 
					
						
							|  |  |  |         content = content.replace('<<', '<') | 
					
						
							|  |  |  |     while '>>' in content: | 
					
						
							|  |  |  |         content = content.replace('>>', '>') | 
					
						
							| 
									
										
										
										
											2020-05-12 09:42:24 +00:00
										 |  |  |     content = content.replace('<\\p>', '') | 
					
						
							| 
									
										
										
										
											2020-05-12 09:34:58 +00:00
										 |  |  |     return content | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-14 13:25:38 +00:00
										 |  |  | def removeTextFormatting(content: str) -> str: | 
					
						
							|  |  |  |     """Removes markup for bold, italics, etc
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-03-12 12:04:34 +00:00
										 |  |  |     if isPGPEncrypted(content) or containsPGPPublicKey(content): | 
					
						
							| 
									
										
										
										
											2021-03-11 17:15:32 +00:00
										 |  |  |         return content | 
					
						
							| 
									
										
										
										
											2020-06-14 13:25:38 +00:00
										 |  |  |     if '<' not in content: | 
					
						
							|  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2020-06-14 13:39:03 +00:00
										 |  |  |     removeMarkup = ('b', 'i', 'ul', 'ol', 'li', 'em', 'strong', | 
					
						
							|  |  |  |                     'blockquote', 'h1', 'h2', 'h3', 'h4', 'h5') | 
					
						
							|  |  |  |     for markup in removeMarkup: | 
					
						
							|  |  |  |         content = content.replace('<' + markup + '>', '') | 
					
						
							|  |  |  |         content = content.replace('</' + markup + '>', '') | 
					
						
							|  |  |  |         content = content.replace('<' + markup.upper() + '>', '') | 
					
						
							|  |  |  |         content = content.replace('</' + markup.upper() + '>', '') | 
					
						
							| 
									
										
										
										
											2020-06-14 13:25:38 +00:00
										 |  |  |     return content | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  | def removeLongWords(content: str, maxWordLength: int, | 
					
						
							|  |  |  |                     longWordsList: []) -> str: | 
					
						
							| 
									
										
										
										
											2020-03-29 09:59:54 +00:00
										 |  |  |     """Breaks up long words so that on mobile screens this doesn't
 | 
					
						
							|  |  |  |     disrupt the layout | 
					
						
							| 
									
										
										
										
											2019-10-09 12:19:17 +00:00
										 |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-03-12 12:04:34 +00:00
										 |  |  |     if isPGPEncrypted(content) or containsPGPPublicKey(content): | 
					
						
							| 
									
										
										
										
											2021-03-11 17:15:32 +00:00
										 |  |  |         return content | 
					
						
							| 
									
										
										
										
											2020-05-12 09:34:58 +00:00
										 |  |  |     content = replaceContentDuplicates(content) | 
					
						
							| 
									
										
										
										
											2019-12-13 12:41:26 +00:00
										 |  |  |     if ' ' not in content: | 
					
						
							|  |  |  |         # handle a single very long string with no spaces | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         contentStr = content.replace('<p>', '').replace(r'<\p>', '') | 
					
						
							| 
									
										
										
										
											2019-12-13 12:41:26 +00:00
										 |  |  |         if '://' not in contentStr: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |             if len(contentStr) > maxWordLength: | 
					
						
							| 
									
										
										
										
											2019-12-13 12:41:26 +00:00
										 |  |  |                 if '<p>' in content: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |                     content = '<p>' + contentStr[:maxWordLength] + r'<\p>' | 
					
						
							| 
									
										
										
										
											2019-12-13 12:41:26 +00:00
										 |  |  |                 else: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |                     content = content[:maxWordLength] | 
					
						
							| 
									
										
										
										
											2019-12-13 12:41:26 +00:00
										 |  |  |                 return content | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     words = content.split(' ') | 
					
						
							| 
									
										
										
										
											2019-11-04 20:39:14 +00:00
										 |  |  |     if not longWordsList: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         longWordsList = [] | 
					
						
							| 
									
										
										
										
											2019-11-04 20:39:14 +00:00
										 |  |  |         for wordStr in words: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |             if len(wordStr) > maxWordLength: | 
					
						
							| 
									
										
										
										
											2019-11-04 20:39:14 +00:00
										 |  |  |                 if wordStr not in longWordsList: | 
					
						
							|  |  |  |                     longWordsList.append(wordStr) | 
					
						
							| 
									
										
										
										
											2019-10-18 12:24:31 +00:00
										 |  |  |     for wordStr in longWordsList: | 
					
						
							| 
									
										
										
										
											2021-03-17 21:17:27 +00:00
										 |  |  |         if wordStr.startswith('<p>'): | 
					
						
							|  |  |  |             wordStr = wordStr.replace('<p>', '') | 
					
						
							| 
									
										
										
										
											2019-10-18 12:24:31 +00:00
										 |  |  |         if wordStr.startswith('<'): | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         if len(wordStr) == 76: | 
					
						
							|  |  |  |             if wordStr.upper() == wordStr: | 
					
						
							| 
									
										
										
										
											2020-03-22 14:29:34 +00:00
										 |  |  |                 # tox address | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2019-11-04 21:08:43 +00:00
										 |  |  |         if '=\"' in wordStr: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if '@' in wordStr: | 
					
						
							| 
									
										
										
										
											2019-11-04 21:11:09 +00:00
										 |  |  |             if '@@' not in wordStr: | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2020-01-25 10:49:59 +00:00
										 |  |  |         if '=.ed25519' in wordStr: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if '.onion' in wordStr: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if '.i2p' in wordStr: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2019-11-04 20:39:14 +00:00
										 |  |  |         if 'https:' in wordStr: | 
					
						
							| 
									
										
										
										
											2019-10-25 18:27:32 +00:00
										 |  |  |             continue | 
					
						
							| 
									
										
										
										
											2019-11-04 20:39:14 +00:00
										 |  |  |         elif 'http:' in wordStr: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-02-17 17:18:21 +00:00
										 |  |  |         elif 'i2p:' in wordStr: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-06-09 11:51:51 +00:00
										 |  |  |         elif 'gnunet:' in wordStr: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2019-11-04 20:39:14 +00:00
										 |  |  |         elif 'dat:' in wordStr: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-12-06 10:18:41 +00:00
										 |  |  |         elif 'rad:' in wordStr: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-05-17 09:37:59 +00:00
										 |  |  |         elif 'hyper:' in wordStr: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         elif 'briar:' in wordStr: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2019-11-04 20:39:14 +00:00
										 |  |  |         if '<' in wordStr: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |             replaceWord = wordStr.split('<', 1)[0] | 
					
						
							| 
									
										
										
										
											2021-03-17 21:17:27 +00:00
										 |  |  |             # if len(replaceWord) > maxWordLength: | 
					
						
							|  |  |  |             #     replaceWord = replaceWord[:maxWordLength] | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |             content = content.replace(wordStr, replaceWord) | 
					
						
							|  |  |  |             wordStr = replaceWord | 
					
						
							| 
									
										
										
										
											2019-10-25 18:27:32 +00:00
										 |  |  |         if '/' in wordStr: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         if len(wordStr[maxWordLength:]) < maxWordLength: | 
					
						
							|  |  |  |             content = content.replace(wordStr, | 
					
						
							|  |  |  |                                       wordStr[:maxWordLength] + '\n' + | 
					
						
							|  |  |  |                                       wordStr[maxWordLength:]) | 
					
						
							| 
									
										
										
										
											2019-10-18 12:24:31 +00:00
										 |  |  |         else: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |             content = content.replace(wordStr, | 
					
						
							|  |  |  |                                       wordStr[:maxWordLength]) | 
					
						
							| 
									
										
										
										
											2020-01-24 11:27:12 +00:00
										 |  |  |     if content.startswith('<p>'): | 
					
						
							|  |  |  |         if not content.endswith('</p>'): | 
					
						
							| 
									
										
										
										
											2020-10-31 23:10:38 +00:00
										 |  |  |             content = content.strip() + '</p>' | 
					
						
							| 
									
										
										
										
											2019-10-09 12:19:17 +00:00
										 |  |  |     return content | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  | def _loadAutoTags(baseDir: str, nickname: str, domain: str) -> []: | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |     """Loads automatic tags file and returns a list containing
 | 
					
						
							|  |  |  |     the lines of the file | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-07-13 21:59:53 +00:00
										 |  |  |     filename = acctDir(baseDir, nickname, domain) + '/autotags.txt' | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |     if not os.path.isfile(filename): | 
					
						
							|  |  |  |         return [] | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |     try: | 
					
						
							|  |  |  |         with open(filename, 'r') as f: | 
					
						
							|  |  |  |             return f.readlines() | 
					
						
							|  |  |  |     except OSError: | 
					
						
							|  |  |  |         print('EX: unable to read auto tags ' + filename) | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |     return [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  | def _autoTag(baseDir: str, nickname: str, domain: str, | 
					
						
							|  |  |  |              wordStr: str, autoTagList: [], | 
					
						
							|  |  |  |              appendTags: []): | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |     """Generates a list of tags to be automatically appended to the content
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     for tagRule in autoTagList: | 
					
						
							|  |  |  |         if wordStr not in tagRule: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if '->' not in tagRule: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-10-07 19:03:01 +00:00
										 |  |  |         rulematch = tagRule.split('->')[0].strip() | 
					
						
							|  |  |  |         if rulematch != wordStr: | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |             continue | 
					
						
							|  |  |  |         tagName = tagRule.split('->')[1].strip() | 
					
						
							|  |  |  |         if tagName.startswith('#'): | 
					
						
							|  |  |  |             if tagName not in appendTags: | 
					
						
							|  |  |  |                 appendTags.append(tagName) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             if '#' + tagName not in appendTags: | 
					
						
							|  |  |  |                 appendTags.append('#' + tagName) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  | def addHtmlTags(baseDir: str, httpPrefix: str, | 
					
						
							|  |  |  |                 nickname: str, domain: str, content: str, | 
					
						
							| 
									
										
										
										
											2021-06-20 11:28:35 +00:00
										 |  |  |                 recipients: [], hashtags: {}, | 
					
						
							|  |  |  |                 isJsonContent: bool = False) -> str: | 
					
						
							| 
									
										
										
										
											2019-07-15 14:11:31 +00:00
										 |  |  |     """ Replaces plaintext mentions such as @nick@domain into html
 | 
					
						
							|  |  |  |     by matching against known following accounts | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if content.startswith('<p>'): | 
					
						
							| 
									
										
										
										
											2020-09-14 09:33:42 +00:00
										 |  |  |         content = htmlReplaceEmailQuote(content) | 
					
						
							| 
									
										
										
										
											2020-08-02 18:30:35 +00:00
										 |  |  |         return htmlReplaceQuoteMarks(content) | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     maxWordLength = 40 | 
					
						
							| 
									
										
										
										
											2020-05-22 11:32:38 +00:00
										 |  |  |     content = content.replace('\r', '') | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     content = content.replace('\n', ' --linebreak-- ') | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |     content = _addMusicTag(content, 'nowplaying') | 
					
						
							| 
									
										
										
										
											2020-10-16 19:49:34 +00:00
										 |  |  |     contentSimplified = \ | 
					
						
							|  |  |  |         content.replace(',', ' ').replace(';', ' ').replace('- ', ' ') | 
					
						
							|  |  |  |     contentSimplified = contentSimplified.replace('. ', ' ').strip() | 
					
						
							|  |  |  |     if contentSimplified.endswith('.'): | 
					
						
							|  |  |  |         contentSimplified = contentSimplified[:len(contentSimplified)-1] | 
					
						
							|  |  |  |     words = contentSimplified.split(' ') | 
					
						
							| 
									
										
										
										
											2020-03-22 21:16:02 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-19 11:07:04 +00:00
										 |  |  |     # remove . for words which are not mentions | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     newWords = [] | 
					
						
							|  |  |  |     for wordIndex in range(0, len(words)): | 
					
						
							|  |  |  |         wordStr = words[wordIndex] | 
					
						
							| 
									
										
										
										
											2019-08-19 11:07:04 +00:00
										 |  |  |         if wordStr.endswith('.'): | 
					
						
							|  |  |  |             if not wordStr.startswith('@'): | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |                 wordStr = wordStr[:-1] | 
					
						
							| 
									
										
										
										
											2019-08-19 11:08:47 +00:00
										 |  |  |         if wordStr.startswith('.'): | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |             wordStr = wordStr[1:] | 
					
						
							| 
									
										
										
										
											2019-08-19 11:14:38 +00:00
										 |  |  |         newWords.append(wordStr) | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     words = newWords | 
					
						
							| 
									
										
										
										
											2019-08-19 11:14:38 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     replaceMentions = {} | 
					
						
							|  |  |  |     replaceHashTags = {} | 
					
						
							|  |  |  |     replaceEmoji = {} | 
					
						
							|  |  |  |     emojiDict = {} | 
					
						
							|  |  |  |     originalDomain = domain | 
					
						
							| 
									
										
										
										
											2021-06-23 21:31:50 +00:00
										 |  |  |     domain = removeDomainPort(domain) | 
					
						
							| 
									
										
										
										
											2021-07-13 21:59:53 +00:00
										 |  |  |     followingFilename = acctDir(baseDir, nickname, domain) + '/following.txt' | 
					
						
							| 
									
										
										
										
											2019-08-09 09:09:21 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # read the following list so that we can detect just @nick | 
					
						
							|  |  |  |     # in addition to @nick@domain | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     following = None | 
					
						
							| 
									
										
										
										
											2021-01-29 21:33:23 +00:00
										 |  |  |     petnames = None | 
					
						
							| 
									
										
										
										
											2019-10-18 12:24:31 +00:00
										 |  |  |     if '@' in words: | 
					
						
							|  |  |  |         if os.path.isfile(followingFilename): | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |             following = [] | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 with open(followingFilename, 'r') as f: | 
					
						
							|  |  |  |                     following = f.readlines() | 
					
						
							|  |  |  |             except OSError: | 
					
						
							|  |  |  |                 print('EX: unable to read ' + followingFilename) | 
					
						
							|  |  |  |             for handle in following: | 
					
						
							|  |  |  |                 pet = getPetName(baseDir, nickname, domain, handle) | 
					
						
							|  |  |  |                 if pet: | 
					
						
							|  |  |  |                     petnames.append(pet + '\n') | 
					
						
							| 
									
										
										
										
											2019-08-09 09:09:21 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # extract mentions and tags from words | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     longWordsList = [] | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |     prevWordStr = '' | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |     autoTagsList = _loadAutoTags(baseDir, nickname, domain) | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |     appendTags = [] | 
					
						
							| 
									
										
										
										
											2019-07-15 14:11:31 +00:00
										 |  |  |     for wordStr in words: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         wordLen = len(wordStr) | 
					
						
							|  |  |  |         if wordLen > 2: | 
					
						
							|  |  |  |             if wordLen > maxWordLength: | 
					
						
							| 
									
										
										
										
											2019-10-18 12:24:31 +00:00
										 |  |  |                 longWordsList.append(wordStr) | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |             firstChar = wordStr[0] | 
					
						
							|  |  |  |             if firstChar == '@': | 
					
						
							| 
									
										
										
										
											2021-01-29 21:33:23 +00:00
										 |  |  |                 if _addMention(wordStr, httpPrefix, following, petnames, | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |                                replaceMentions, recipients, hashtags): | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |                     prevWordStr = '' | 
					
						
							| 
									
										
										
										
											2019-10-18 12:24:31 +00:00
										 |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |             elif firstChar == '#': | 
					
						
							| 
									
										
										
										
											2021-02-13 12:12:06 +00:00
										 |  |  |                 # remove any endings from the hashtag | 
					
						
							|  |  |  |                 hashTagEndings = ('.', ':', ';', '-', '\n') | 
					
						
							|  |  |  |                 for ending in hashTagEndings: | 
					
						
							|  |  |  |                     if wordStr.endswith(ending): | 
					
						
							| 
									
										
										
										
											2021-02-13 12:17:03 +00:00
										 |  |  |                         wordStr = wordStr[:len(wordStr) - 1] | 
					
						
							| 
									
										
										
										
											2021-02-13 12:26:25 +00:00
										 |  |  |                         break | 
					
						
							| 
									
										
										
										
											2021-02-13 12:12:06 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |                 if _addHashTags(wordStr, httpPrefix, originalDomain, | 
					
						
							|  |  |  |                                 replaceHashTags, hashtags): | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |                     prevWordStr = '' | 
					
						
							| 
									
										
										
										
											2019-10-18 12:24:31 +00:00
										 |  |  |                     continue | 
					
						
							|  |  |  |             elif ':' in wordStr: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |                 wordStr2 = wordStr.split(':')[1] | 
					
						
							| 
									
										
										
										
											2021-06-22 12:42:52 +00:00
										 |  |  | #                print('TAG: emoji located - ' + wordStr) | 
					
						
							| 
									
										
										
										
											2019-10-09 18:32:53 +00:00
										 |  |  |                 if not emojiDict: | 
					
						
							| 
									
										
										
										
											2020-03-29 09:59:54 +00:00
										 |  |  |                     # emoji.json is generated so that it can be customized and | 
					
						
							|  |  |  |                     # the changes will be retained even if default_emoji.json | 
					
						
							|  |  |  |                     # is subsequently updated | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |                     if not os.path.isfile(baseDir + '/emoji/emoji.json'): | 
					
						
							|  |  |  |                         copyfile(baseDir + '/emoji/default_emoji.json', | 
					
						
							|  |  |  |                                  baseDir + '/emoji/emoji.json') | 
					
						
							|  |  |  |                 emojiDict = loadJson(baseDir + '/emoji/emoji.json') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-01 22:45:57 +00:00
										 |  |  |                 # append custom emoji to the dict | 
					
						
							|  |  |  |                 if os.path.isfile(baseDir + '/emojicustom/emoji.json'): | 
					
						
							|  |  |  |                     customEmojiDict = \ | 
					
						
							|  |  |  |                         loadJson(baseDir + '/emojicustom/emoji.json') | 
					
						
							| 
									
										
										
										
											2021-11-01 22:50:26 +00:00
										 |  |  |                     if customEmojiDict: | 
					
						
							| 
									
										
										
										
											2021-11-01 23:39:20 +00:00
										 |  |  |                         emojiDict = dict(emojiDict, **customEmojiDict) | 
					
						
							| 
									
										
										
										
											2021-11-01 22:45:57 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-22 12:42:52 +00:00
										 |  |  | #                print('TAG: looking up emoji for :' + wordStr2 + ':') | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |                 _addEmoji(baseDir, ':' + wordStr2 + ':', httpPrefix, | 
					
						
							|  |  |  |                           originalDomain, replaceEmoji, hashtags, | 
					
						
							|  |  |  |                           emojiDict) | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |             else: | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |                 if _autoTag(baseDir, nickname, domain, wordStr, | 
					
						
							|  |  |  |                             autoTagsList, appendTags): | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |                     prevWordStr = '' | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |                 if prevWordStr: | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |                     if _autoTag(baseDir, nickname, domain, | 
					
						
							|  |  |  |                                 prevWordStr + ' ' + wordStr, | 
					
						
							|  |  |  |                                 autoTagsList, appendTags): | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |                         prevWordStr = '' | 
					
						
							|  |  |  |                         continue | 
					
						
							|  |  |  |             prevWordStr = wordStr | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # add any auto generated tags | 
					
						
							|  |  |  |     for appended in appendTags: | 
					
						
							|  |  |  |         content = content + ' ' + appended | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |         _addHashTags(appended, httpPrefix, originalDomain, | 
					
						
							|  |  |  |                      replaceHashTags, hashtags) | 
					
						
							| 
									
										
										
										
											2019-08-09 09:09:21 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # replace words with their html versions | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     for wordStr, replaceStr in replaceMentions.items(): | 
					
						
							|  |  |  |         content = content.replace(wordStr, replaceStr) | 
					
						
							|  |  |  |     for wordStr, replaceStr in replaceHashTags.items(): | 
					
						
							|  |  |  |         content = content.replace(wordStr, replaceStr) | 
					
						
							| 
									
										
										
										
											2019-10-29 13:04:38 +00:00
										 |  |  |     if not isJsonContent: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         for wordStr, replaceStr in replaceEmoji.items(): | 
					
						
							|  |  |  |             content = content.replace(wordStr, replaceStr) | 
					
						
							| 
									
										
										
										
											2019-10-29 13:04:38 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     content = addWebLinks(content) | 
					
						
							| 
									
										
										
										
											2019-10-18 12:24:31 +00:00
										 |  |  |     if longWordsList: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         content = removeLongWords(content, maxWordLength, longWordsList) | 
					
						
							| 
									
										
										
										
											2021-07-10 09:38:59 +00:00
										 |  |  |     content = limitRepeatedWords(content, 6) | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     content = content.replace(' --linebreak-- ', '</p><p>') | 
					
						
							| 
									
										
										
										
											2020-09-14 09:33:42 +00:00
										 |  |  |     content = htmlReplaceEmailQuote(content) | 
					
						
							| 
									
										
										
										
											2020-08-02 18:09:50 +00:00
										 |  |  |     return '<p>' + htmlReplaceQuoteMarks(content) + '</p>' | 
					
						
							| 
									
										
										
										
											2020-03-22 21:16:02 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def getMentionsFromHtml(htmlText: str, | 
					
						
							| 
									
										
										
										
											2020-03-29 09:59:54 +00:00
										 |  |  |                         matchStr="<span class=\"h-card\"><a href=\"") -> []: | 
					
						
							| 
									
										
										
										
											2019-08-05 19:13:15 +00:00
										 |  |  |     """Extracts mentioned actors from the given html content string
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     mentions = [] | 
					
						
							| 
									
										
										
										
											2019-08-05 19:13:15 +00:00
										 |  |  |     if matchStr not in htmlText: | 
					
						
							|  |  |  |         return mentions | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     mentionsList = htmlText.split(matchStr) | 
					
						
							| 
									
										
										
										
											2019-08-05 19:13:15 +00:00
										 |  |  |     for mentionStr in mentionsList: | 
					
						
							|  |  |  |         if '"' not in mentionStr: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         actorStr = mentionStr.split('"')[0] | 
					
						
							| 
									
										
										
										
											2019-08-05 19:13:15 +00:00
										 |  |  |         if actorStr.startswith('http') or \ | 
					
						
							| 
									
										
										
										
											2020-06-09 11:51:51 +00:00
										 |  |  |            actorStr.startswith('gnunet') or \ | 
					
						
							| 
									
										
										
										
											2020-02-17 17:18:21 +00:00
										 |  |  |            actorStr.startswith('i2p') or \ | 
					
						
							| 
									
										
										
										
											2020-05-17 09:37:59 +00:00
										 |  |  |            actorStr.startswith('hyper') or \ | 
					
						
							| 
									
										
										
										
											2019-08-05 19:13:15 +00:00
										 |  |  |            actorStr.startswith('dat:'): | 
					
						
							| 
									
										
										
										
											2019-09-22 17:54:33 +00:00
										 |  |  |             if actorStr not in mentions: | 
					
						
							|  |  |  |                 mentions.append(actorStr) | 
					
						
							| 
									
										
										
										
											2019-08-05 19:13:15 +00:00
										 |  |  |     return mentions | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def extractMediaInFormPOST(postBytes, boundary, name: str): | 
					
						
							| 
									
										
										
										
											2020-03-29 09:59:54 +00:00
										 |  |  |     """Extracts the binary encoding for image/video/audio within a http
 | 
					
						
							|  |  |  |     form POST | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |     Returns the media bytes and the remaining bytes | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     imageStartBoundary = b'Content-Disposition: form-data; name="' + \ | 
					
						
							|  |  |  |         name.encode('utf8', 'ignore') + b'";' | 
					
						
							|  |  |  |     imageStartLocation = postBytes.find(imageStartBoundary) | 
					
						
							|  |  |  |     if imageStartLocation == -1: | 
					
						
							|  |  |  |         return None, postBytes | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # bytes after the start boundary appears | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     mediaBytes = postBytes[imageStartLocation:] | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # look for the next boundary | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     imageEndBoundary = boundary.encode('utf8', 'ignore') | 
					
						
							|  |  |  |     imageEndLocation = mediaBytes.find(imageEndBoundary) | 
					
						
							|  |  |  |     if imageEndLocation == -1: | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |         # no ending boundary | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         return mediaBytes, postBytes[:imageStartLocation] | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # remaining bytes after the end of the image | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     remainder = mediaBytes[imageEndLocation:] | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # remove bytes after the end boundary | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     mediaBytes = mediaBytes[:imageEndLocation] | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # return the media and the before+after bytes | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     return mediaBytes, postBytes[:imageStartLocation] + remainder | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def saveMediaInFormPOST(mediaBytes, debug: bool, | 
					
						
							| 
									
										
										
										
											2021-06-20 11:28:35 +00:00
										 |  |  |                         filenameBase: str = None) -> (str, str): | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |     """Saves the given media bytes extracted from http form POST
 | 
					
						
							|  |  |  |     Returns the filename and attachment type | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if not mediaBytes: | 
					
						
							| 
									
										
										
										
											2021-08-09 21:27:13 +00:00
										 |  |  |         if filenameBase: | 
					
						
							|  |  |  |             # remove any existing files | 
					
						
							|  |  |  |             extensionTypes = getImageExtensions() | 
					
						
							|  |  |  |             for ex in extensionTypes: | 
					
						
							|  |  |  |                 possibleOtherFormat = filenameBase + '.' + ex | 
					
						
							|  |  |  |                 if os.path.isfile(possibleOtherFormat): | 
					
						
							| 
									
										
										
										
											2021-09-05 10:17:43 +00:00
										 |  |  |                     try: | 
					
						
							|  |  |  |                         os.remove(possibleOtherFormat) | 
					
						
							| 
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 |  |  |                     except OSError: | 
					
						
							| 
									
										
										
										
											2021-10-29 16:31:20 +00:00
										 |  |  |                         if debug: | 
					
						
							|  |  |  |                             print('EX: saveMediaInFormPOST ' + | 
					
						
							|  |  |  |                                   'unable to delete other ' + | 
					
						
							|  |  |  |                                   str(possibleOtherFormat)) | 
					
						
							| 
									
										
										
										
											2021-08-09 21:27:13 +00:00
										 |  |  |             if os.path.isfile(filenameBase): | 
					
						
							| 
									
										
										
										
											2021-09-05 10:17:43 +00:00
										 |  |  |                 try: | 
					
						
							|  |  |  |                     os.remove(filenameBase) | 
					
						
							| 
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 |  |  |                 except OSError: | 
					
						
							| 
									
										
										
										
											2021-10-29 16:31:20 +00:00
										 |  |  |                     if debug: | 
					
						
							|  |  |  |                         print('EX: saveMediaInFormPOST ' + | 
					
						
							|  |  |  |                               'unable to delete ' + | 
					
						
							|  |  |  |                               str(filenameBase)) | 
					
						
							| 
									
										
										
										
											2021-08-09 21:27:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |         if debug: | 
					
						
							|  |  |  |             print('DEBUG: No media found within POST') | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         return None, None | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     mediaLocation = -1 | 
					
						
							|  |  |  |     searchStr = '' | 
					
						
							|  |  |  |     filename = None | 
					
						
							| 
									
										
										
										
											2020-03-22 21:16:02 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |     # directly search the binary array for the beginning | 
					
						
							|  |  |  |     # of an image | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     extensionList = { | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |         'png': 'image/png', | 
					
						
							|  |  |  |         'jpeg': 'image/jpeg', | 
					
						
							|  |  |  |         'gif': 'image/gif', | 
					
						
							| 
									
										
										
										
											2021-01-11 22:27:57 +00:00
										 |  |  |         'svg': 'image/svg+xml', | 
					
						
							| 
									
										
										
										
											2019-11-14 13:30:54 +00:00
										 |  |  |         'webp': 'image/webp', | 
					
						
							| 
									
										
										
										
											2020-09-09 15:09:38 +00:00
										 |  |  |         'avif': 'image/avif', | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |         'mp4': 'video/mp4', | 
					
						
							|  |  |  |         'ogv': 'video/ogv', | 
					
						
							|  |  |  |         'mp3': 'audio/mpeg', | 
					
						
							| 
									
										
										
										
											2021-05-29 11:04:03 +00:00
										 |  |  |         'ogg': 'audio/ogg', | 
					
						
							| 
									
										
										
										
											2021-08-03 09:09:04 +00:00
										 |  |  |         'flac': 'audio/flac', | 
					
						
							| 
									
										
										
										
											2021-05-29 11:04:03 +00:00
										 |  |  |         'zip': 'application/zip' | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     detectedExtension = None | 
					
						
							|  |  |  |     for extension, contentType in extensionList.items(): | 
					
						
							|  |  |  |         searchStr = b'Content-Type: ' + contentType.encode('utf8', 'ignore') | 
					
						
							|  |  |  |         mediaLocation = mediaBytes.find(searchStr) | 
					
						
							|  |  |  |         if mediaLocation > -1: | 
					
						
							| 
									
										
										
										
											2020-05-26 19:05:03 +00:00
										 |  |  |             # image/video/audio binaries | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |             if extension == 'jpeg': | 
					
						
							|  |  |  |                 extension = 'jpg' | 
					
						
							|  |  |  |             elif extension == 'mpeg': | 
					
						
							|  |  |  |                 extension = 'mp3' | 
					
						
							| 
									
										
										
										
											2021-05-29 11:04:03 +00:00
										 |  |  |             if filenameBase: | 
					
						
							|  |  |  |                 filename = filenameBase + '.' + extension | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |             attachmentMediaType = \ | 
					
						
							|  |  |  |                 searchStr.decode().split('/')[0].replace('Content-Type: ', '') | 
					
						
							|  |  |  |             detectedExtension = extension | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |             break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if not filename: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         return None, None | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-26 19:29:15 +00:00
										 |  |  |     # locate the beginning of the image, after any | 
					
						
							|  |  |  |     # carriage returns | 
					
						
							|  |  |  |     startPos = mediaLocation + len(searchStr) | 
					
						
							|  |  |  |     for offset in range(1, 8): | 
					
						
							|  |  |  |         if mediaBytes[startPos+offset] != 10: | 
					
						
							|  |  |  |             if mediaBytes[startPos+offset] != 13: | 
					
						
							|  |  |  |                 startPos += offset | 
					
						
							|  |  |  |                 break | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-14 13:30:54 +00:00
										 |  |  |     # remove any existing image files with a different format | 
					
						
							| 
									
										
										
										
											2021-05-29 11:04:03 +00:00
										 |  |  |     if detectedExtension != 'zip': | 
					
						
							|  |  |  |         extensionTypes = getImageExtensions() | 
					
						
							|  |  |  |         for ex in extensionTypes: | 
					
						
							|  |  |  |             if ex == detectedExtension: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             possibleOtherFormat = \ | 
					
						
							|  |  |  |                 filename.replace('.temp', '').replace('.' + | 
					
						
							|  |  |  |                                                       detectedExtension, '.' + | 
					
						
							|  |  |  |                                                       ex) | 
					
						
							|  |  |  |             if os.path.isfile(possibleOtherFormat): | 
					
						
							| 
									
										
										
										
											2021-09-05 10:17:43 +00:00
										 |  |  |                 try: | 
					
						
							|  |  |  |                     os.remove(possibleOtherFormat) | 
					
						
							| 
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 |  |  |                 except OSError: | 
					
						
							| 
									
										
										
										
											2021-10-29 16:31:20 +00:00
										 |  |  |                     if debug: | 
					
						
							|  |  |  |                         print('EX: saveMediaInFormPOST ' + | 
					
						
							|  |  |  |                               'unable to delete other 2 ' + | 
					
						
							|  |  |  |                               str(possibleOtherFormat)) | 
					
						
							| 
									
										
										
										
											2019-11-14 13:30:54 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-13 17:51:33 +00:00
										 |  |  |     # don't allow scripts within svg files | 
					
						
							|  |  |  |     if detectedExtension == 'svg': | 
					
						
							|  |  |  |         svgStr = mediaBytes[startPos:] | 
					
						
							|  |  |  |         svgStr = svgStr.decode() | 
					
						
							|  |  |  |         if dangerousSVG(svgStr, False): | 
					
						
							|  |  |  |             return None, None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 |  |  |     try: | 
					
						
							|  |  |  |         with open(filename, 'wb') as fp: | 
					
						
							|  |  |  |             fp.write(mediaBytes[startPos:]) | 
					
						
							|  |  |  |     except OSError: | 
					
						
							| 
									
										
										
										
											2021-11-25 22:22:54 +00:00
										 |  |  |         print('EX: unable to write media') | 
					
						
							| 
									
										
										
										
											2019-12-04 18:52:27 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-06 23:16:54 +00:00
										 |  |  |     if not os.path.isfile(filename): | 
					
						
							|  |  |  |         print('WARN: Media file could not be written to file: ' + filename) | 
					
						
							|  |  |  |         return None, None | 
					
						
							| 
									
										
										
										
											2021-03-06 23:19:03 +00:00
										 |  |  |     print('Uploaded media file written: ' + filename) | 
					
						
							| 
									
										
										
										
											2021-03-06 23:16:54 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     return filename, attachmentMediaType | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-20 11:28:35 +00:00
										 |  |  | def extractTextFieldsInPOST(postBytes, boundary: str, debug: bool, | 
					
						
							|  |  |  |                             unitTestData: str = None) -> {}: | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |     """Returns a dictionary containing the text fields of a http form POST
 | 
					
						
							|  |  |  |     The boundary argument comes from the http header | 
					
						
							| 
									
										
										
										
											2020-03-22 21:16:02 +00:00
										 |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-03-01 10:02:55 +00:00
										 |  |  |     if not unitTestData: | 
					
						
							|  |  |  |         msgBytes = email.parser.BytesParser().parsebytes(postBytes) | 
					
						
							| 
									
										
										
										
											2021-03-01 10:06:13 +00:00
										 |  |  |         messageFields = msgBytes.get_payload(decode=True).decode('utf-8') | 
					
						
							| 
									
										
										
										
											2021-03-01 10:02:55 +00:00
										 |  |  |     else: | 
					
						
							|  |  |  |         messageFields = unitTestData | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-10 11:54:45 +00:00
										 |  |  |     if debug: | 
					
						
							| 
									
										
										
										
											2021-03-01 10:02:55 +00:00
										 |  |  |         print('DEBUG: POST arriving ' + messageFields) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     messageFields = messageFields.split(boundary) | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |     fields = {} | 
					
						
							| 
									
										
										
										
											2021-03-01 12:15:06 +00:00
										 |  |  |     fieldsWithSemicolonAllowed = ( | 
					
						
							| 
									
										
										
										
											2021-03-01 12:19:49 +00:00
										 |  |  |         'message', 'bio', 'autoCW', 'password', 'passwordconfirm', | 
					
						
							|  |  |  |         'instanceDescription', 'instanceDescriptionShort', | 
					
						
							|  |  |  |         'subject', 'location', 'imageDescription' | 
					
						
							| 
									
										
										
										
											2021-03-01 12:15:06 +00:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |     # examine each section of the POST, separated by the boundary | 
					
						
							|  |  |  |     for f in messageFields: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         if f == '--': | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |             continue | 
					
						
							|  |  |  |         if ' name="' not in f: | 
					
						
							| 
									
										
										
										
											2020-03-22 21:16:02 +00:00
										 |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         postStr = f.split(' name="', 1)[1] | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |         if '"' not in postStr: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         postKey = postStr.split('"', 1)[0] | 
					
						
							|  |  |  |         postValueStr = postStr.split('"', 1)[1] | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |         if ';' in postValueStr: | 
					
						
							| 
									
										
										
										
											2021-03-01 12:15:06 +00:00
										 |  |  |             if postKey not in fieldsWithSemicolonAllowed and \ | 
					
						
							|  |  |  |                not postKey.startswith('edited'): | 
					
						
							| 
									
										
										
										
											2021-03-01 10:02:55 +00:00
										 |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |         if '\r\n' not in postValueStr: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |         postLines = postValueStr.split('\r\n') | 
					
						
							|  |  |  |         postValue = '' | 
					
						
							|  |  |  |         if len(postLines) > 2: | 
					
						
							|  |  |  |             for line in range(2, len(postLines)-1): | 
					
						
							|  |  |  |                 if line > 2: | 
					
						
							|  |  |  |                     postValue += '\n' | 
					
						
							|  |  |  |                 postValue += postLines[line] | 
					
						
							| 
									
										
										
										
											2021-05-20 09:41:36 +00:00
										 |  |  |         fields[postKey] = urllib.parse.unquote(postValue) | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |     return fields | 
					
						
							| 
									
										
										
										
											2021-07-10 09:38:59 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def limitRepeatedWords(text: str, maxRepeats: int) -> str: | 
					
						
							|  |  |  |     """Removes words which are repeated many times
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     words = text.replace('\n', ' ').split(' ') | 
					
						
							|  |  |  |     repeatCtr = 0 | 
					
						
							|  |  |  |     repeatedText = '' | 
					
						
							|  |  |  |     replacements = {} | 
					
						
							|  |  |  |     prevWord = '' | 
					
						
							|  |  |  |     for word in words: | 
					
						
							|  |  |  |         if word == prevWord: | 
					
						
							|  |  |  |             repeatCtr += 1 | 
					
						
							|  |  |  |             if repeatedText: | 
					
						
							|  |  |  |                 repeatedText += ' ' + word | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 repeatedText = word + ' ' + word | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             if repeatCtr > maxRepeats: | 
					
						
							|  |  |  |                 newText = ((prevWord + ' ') * maxRepeats).strip() | 
					
						
							|  |  |  |                 replacements[prevWord] = [repeatedText, newText] | 
					
						
							|  |  |  |             repeatCtr = 0 | 
					
						
							|  |  |  |             repeatedText = '' | 
					
						
							|  |  |  |         prevWord = word | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if repeatCtr > maxRepeats: | 
					
						
							|  |  |  |         newText = ((prevWord + ' ') * maxRepeats).strip() | 
					
						
							|  |  |  |         replacements[prevWord] = [repeatedText, newText] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for word, item in replacements.items(): | 
					
						
							|  |  |  |         text = text.replace(item[0], item[1]) | 
					
						
							|  |  |  |     return text | 
					
						
							| 
									
										
										
										
											2021-08-07 17:03:41 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def getPriceFromString(priceStr: str) -> (str, str): | 
					
						
							|  |  |  |     """Returns the item price and currency
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-08-07 17:44:25 +00:00
										 |  |  |     currencies = getCurrencies() | 
					
						
							| 
									
										
										
										
											2021-08-07 17:03:41 +00:00
										 |  |  |     for symbol, name in currencies.items(): | 
					
						
							|  |  |  |         if symbol in priceStr: | 
					
						
							|  |  |  |             price = priceStr.replace(symbol, '') | 
					
						
							|  |  |  |             if isfloat(price): | 
					
						
							|  |  |  |                 return price, name | 
					
						
							|  |  |  |         elif name in priceStr: | 
					
						
							|  |  |  |             price = priceStr.replace(name, '') | 
					
						
							|  |  |  |             if isfloat(price): | 
					
						
							|  |  |  |                 return price, name | 
					
						
							|  |  |  |     if isfloat(priceStr): | 
					
						
							|  |  |  |         return priceStr, "EUR" | 
					
						
							|  |  |  |     return "0.00", "EUR" | 
					
						
							| 
									
										
										
										
											2021-10-14 15:12:35 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-14 15:40:19 +00:00
										 |  |  | def _wordsSimilarityHistogram(words: []) -> {}: | 
					
						
							|  |  |  |     """Returns a histogram for word combinations
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     histogram = {} | 
					
						
							|  |  |  |     for index in range(1, len(words)): | 
					
						
							|  |  |  |         combinedWords = words[index - 1] + words[index] | 
					
						
							|  |  |  |         if histogram.get(combinedWords): | 
					
						
							|  |  |  |             histogram[combinedWords] += 1 | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             histogram[combinedWords] = 1 | 
					
						
							|  |  |  |     return histogram | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-14 15:53:04 +00:00
										 |  |  | def _wordsSimilarityWordsList(content: str) -> []: | 
					
						
							|  |  |  |     """Returns a list of words for the given content
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-10-14 17:26:24 +00:00
										 |  |  |     removePunctuation = ('.', ',', ';', '-', ':', '"') | 
					
						
							| 
									
										
										
										
											2021-10-14 15:53:04 +00:00
										 |  |  |     content = removeHtml(content).lower() | 
					
						
							|  |  |  |     for p in removePunctuation: | 
					
						
							|  |  |  |         content = content.replace(p, ' ') | 
					
						
							|  |  |  |         content = content.replace('  ', ' ') | 
					
						
							|  |  |  |     return content.split(' ') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-14 15:12:35 +00:00
										 |  |  | def wordsSimilarity(content1: str, content2: str, minWords: int) -> int: | 
					
						
							|  |  |  |     """Returns percentage similarity
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if content1 == content2: | 
					
						
							|  |  |  |         return 100 | 
					
						
							| 
									
										
										
										
											2021-10-14 15:40:19 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-14 15:53:04 +00:00
										 |  |  |     words1 = _wordsSimilarityWordsList(content1) | 
					
						
							| 
									
										
										
										
											2021-10-14 15:12:35 +00:00
										 |  |  |     if len(words1) < minWords: | 
					
						
							|  |  |  |         return 0 | 
					
						
							| 
									
										
										
										
											2021-10-14 15:40:19 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-14 15:53:04 +00:00
										 |  |  |     words2 = _wordsSimilarityWordsList(content2) | 
					
						
							| 
									
										
										
										
											2021-10-14 15:12:35 +00:00
										 |  |  |     if len(words2) < minWords: | 
					
						
							|  |  |  |         return 0 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-14 15:40:19 +00:00
										 |  |  |     histogram1 = _wordsSimilarityHistogram(words1) | 
					
						
							|  |  |  |     histogram2 = _wordsSimilarityHistogram(words2) | 
					
						
							| 
									
										
										
										
											2021-10-14 15:12:35 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     diff = 0 | 
					
						
							|  |  |  |     for combinedWords, hits in histogram1.items(): | 
					
						
							|  |  |  |         if not histogram2.get(combinedWords): | 
					
						
							|  |  |  |             diff += 1 | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             diff += abs(histogram2[combinedWords] - histogram1[combinedWords]) | 
					
						
							|  |  |  |     return 100 - int(diff * 100 / len(histogram1.items())) | 
					
						
							| 
									
										
										
										
											2021-10-26 16:06:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def containsInvalidLocalLinks(content: str) -> bool: | 
					
						
							|  |  |  |     """Returns true if the given content has invalid links
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     invalidStrings = ( | 
					
						
							|  |  |  |         'mute', 'unmute', 'editeventpost', 'notifypost', | 
					
						
							|  |  |  |         'delete', 'options', 'page', 'repeat', | 
					
						
							| 
									
										
										
										
											2021-11-15 10:51:03 +00:00
										 |  |  |         'bm', 'tl', 'actor', 'unrepeat', 'eventid', | 
					
						
							| 
									
										
										
										
											2021-10-26 16:06:22 +00:00
										 |  |  |         'unannounce', 'like', 'unlike', 'bookmark', | 
					
						
							| 
									
										
										
										
											2021-11-15 10:40:39 +00:00
										 |  |  |         'unbookmark', 'likedBy', 'time', | 
					
						
							| 
									
										
										
										
											2021-10-26 16:06:22 +00:00
										 |  |  |         'year', 'month', 'day', 'editnewpost', | 
					
						
							|  |  |  |         'graph', 'showshare', 'category', 'showwanted', | 
					
						
							|  |  |  |         'rmshare', 'rmwanted', 'repeatprivate', | 
					
						
							|  |  |  |         'unrepeatprivate', 'replyto', | 
					
						
							|  |  |  |         'replyfollowers', 'replydm', 'editblogpost', | 
					
						
							|  |  |  |         'handle', 'blockdomain' | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     for invStr in invalidStrings: | 
					
						
							| 
									
										
										
										
											2021-10-26 16:52:17 +00:00
										 |  |  |         if '?' + invStr + '=' in content: | 
					
						
							| 
									
										
										
										
											2021-10-26 16:06:22 +00:00
										 |  |  |             return True | 
					
						
							|  |  |  |     return False |