| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  | __filename__ = "newsdaemon.py" | 
					
						
							|  |  |  | __author__ = "Bob Mottram" | 
					
						
							|  |  |  | __license__ = "AGPL3+" | 
					
						
							|  |  |  | __version__ = "1.1.0" | 
					
						
							|  |  |  | __maintainer__ = "Bob Mottram" | 
					
						
							|  |  |  | __email__ = "bob@freedombone.net" | 
					
						
							|  |  |  | __status__ = "Production" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-17 18:53:08 +00:00
										 |  |  | # Example hashtag logic: | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # if moderated and not #imcoxford then block | 
					
						
							|  |  |  | # if #pol and contains "westminster" then add #britpol | 
					
						
							| 
									
										
										
										
											2020-10-17 19:06:56 +00:00
										 |  |  | # if #unwantedtag then block | 
					
						
							| 
									
										
										
										
											2020-10-17 18:53:08 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  | import os | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  | import time | 
					
						
							| 
									
										
										
										
											2020-10-09 10:05:01 +00:00
										 |  |  | import datetime | 
					
						
							| 
									
										
										
										
											2020-10-20 13:07:02 +00:00
										 |  |  | import html | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  | from shutil import rmtree | 
					
						
							|  |  |  | from subprocess import Popen | 
					
						
							| 
									
										
										
										
											2020-10-07 18:46:42 +00:00
										 |  |  | from collections import OrderedDict | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  | from newswire import getDictFromNewswire | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  | # from posts import sendSignedJson | 
					
						
							| 
									
										
										
										
											2020-10-07 21:26:03 +00:00
										 |  |  | from posts import createNewsPost | 
					
						
							| 
									
										
										
										
											2020-10-21 10:39:09 +00:00
										 |  |  | from posts import archivePostsForPerson | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  | from content import removeHtmlTag | 
					
						
							|  |  |  | from content import dangerousMarkup | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  | from content import validHashTag | 
					
						
							| 
									
										
										
										
											2020-10-09 09:02:01 +00:00
										 |  |  | from utils import loadJson | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  | from utils import saveJson | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  | from utils import getStatusNumber | 
					
						
							| 
									
										
										
										
											2020-10-18 16:19:28 +00:00
										 |  |  | from utils import clearFromPostCaches | 
					
						
							| 
									
										
										
										
											2020-10-17 13:39:04 +00:00
										 |  |  | from inbox import storeHashTags | 
					
						
							| 
									
										
										
										
											2020-11-03 16:08:31 +00:00
										 |  |  | from session import createSession | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-08 12:29:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-09 09:43:34 +00:00
										 |  |  | def updateFeedsOutboxIndex(baseDir: str, domain: str, postId: str) -> None: | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |     """Updates the index used for imported RSS feeds
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |     basePath = baseDir + '/accounts/news@' + domain | 
					
						
							|  |  |  |     indexFilename = basePath + '/outbox.index' | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if os.path.isfile(indexFilename): | 
					
						
							| 
									
										
										
										
											2020-10-07 18:46:42 +00:00
										 |  |  |         if postId not in open(indexFilename).read(): | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 with open(indexFilename, 'r+') as feedsFile: | 
					
						
							|  |  |  |                     content = feedsFile.read() | 
					
						
							|  |  |  |                     feedsFile.seek(0, 0) | 
					
						
							|  |  |  |                     feedsFile.write(postId + '\n' + content) | 
					
						
							|  |  |  |                     print('DEBUG: feeds post added to index') | 
					
						
							|  |  |  |             except Exception as e: | 
					
						
							|  |  |  |                 print('WARN: Failed to write entry to feeds posts index ' + | 
					
						
							|  |  |  |                       indexFilename + ' ' + str(e)) | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |     else: | 
					
						
							|  |  |  |         feedsFile = open(indexFilename, 'w+') | 
					
						
							|  |  |  |         if feedsFile: | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |             feedsFile.write(postId + '\n') | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |             feedsFile.close() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-09 12:15:20 +00:00
										 |  |  | def saveArrivedTime(baseDir: str, postFilename: str, arrived: str) -> None: | 
					
						
							|  |  |  |     """Saves the time when an rss post arrived to a file
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     arrivedFile = open(postFilename + '.arrived', 'w+') | 
					
						
							|  |  |  |     if arrivedFile: | 
					
						
							|  |  |  |         arrivedFile.write(arrived) | 
					
						
							|  |  |  |         arrivedFile.close() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-10 09:36:23 +00:00
										 |  |  | def removeControlCharacters(content: str) -> str: | 
					
						
							| 
									
										
										
										
											2020-10-20 13:07:02 +00:00
										 |  |  |     """Remove escaped html
 | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-10-20 13:07:02 +00:00
										 |  |  |     if '&' in content: | 
					
						
							|  |  |  |         return html.unescape(content) | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  |     return content | 
					
						
							| 
									
										
										
										
											2020-10-10 09:36:23 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-10 08:54:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  | def hashtagRuleResolve(tree: [], hashtags: [], moderated: bool, | 
					
						
							| 
									
										
										
										
											2020-10-20 17:37:15 +00:00
										 |  |  |                        content: str, url: str) -> bool: | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |     """Returns whether the tree for a hashtag rule evaluates to true or false
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if not tree: | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if tree[0] == 'not': | 
					
						
							|  |  |  |         if len(tree) == 2: | 
					
						
							|  |  |  |             if isinstance(tree[1], str): | 
					
						
							|  |  |  |                 return tree[1] not in hashtags | 
					
						
							|  |  |  |             elif isinstance(tree[1], list): | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |                 return not hashtagRuleResolve(tree[1], hashtags, moderated, | 
					
						
							| 
									
										
										
										
											2020-10-20 17:37:15 +00:00
										 |  |  |                                               content, url) | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |     elif tree[0] == 'contains': | 
					
						
							|  |  |  |         if len(tree) == 2: | 
					
						
							| 
									
										
										
										
											2020-10-25 12:17:59 +00:00
										 |  |  |             matchStr = None | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |             if isinstance(tree[1], str): | 
					
						
							|  |  |  |                 matchStr = tree[1] | 
					
						
							|  |  |  |             elif isinstance(tree[1], list): | 
					
						
							|  |  |  |                 matchStr = tree[1][0] | 
					
						
							| 
									
										
										
										
											2020-10-25 12:17:59 +00:00
										 |  |  |             if matchStr: | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |                 if matchStr.startswith('"') and matchStr.endswith('"'): | 
					
						
							|  |  |  |                     matchStr = matchStr[1:] | 
					
						
							|  |  |  |                     matchStr = matchStr[:len(matchStr) - 1] | 
					
						
							| 
									
										
										
										
											2020-10-25 12:15:41 +00:00
										 |  |  |                 matchStrLower = matchStr.lower() | 
					
						
							|  |  |  |                 contentWithoutTags = content.replace('#' + matchStrLower, '') | 
					
						
							|  |  |  |                 return matchStrLower in contentWithoutTags | 
					
						
							| 
									
										
										
										
											2020-10-20 17:37:15 +00:00
										 |  |  |     elif tree[0] == 'from': | 
					
						
							|  |  |  |         if len(tree) == 2: | 
					
						
							| 
									
										
										
										
											2020-10-25 12:22:09 +00:00
										 |  |  |             matchStr = None | 
					
						
							| 
									
										
										
										
											2020-10-20 17:37:15 +00:00
										 |  |  |             if isinstance(tree[1], str): | 
					
						
							|  |  |  |                 matchStr = tree[1] | 
					
						
							|  |  |  |             elif isinstance(tree[1], list): | 
					
						
							|  |  |  |                 matchStr = tree[1][0] | 
					
						
							| 
									
										
										
										
											2020-10-25 12:22:09 +00:00
										 |  |  |             if matchStr: | 
					
						
							| 
									
										
										
										
											2020-10-20 17:37:15 +00:00
										 |  |  |                 if matchStr.startswith('"') and matchStr.endswith('"'): | 
					
						
							|  |  |  |                     matchStr = matchStr[1:] | 
					
						
							|  |  |  |                     matchStr = matchStr[:len(matchStr) - 1] | 
					
						
							|  |  |  |                 return matchStr.lower() in url | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |     elif tree[0] == 'and': | 
					
						
							| 
									
										
										
										
											2020-10-18 15:10:36 +00:00
										 |  |  |         if len(tree) >= 3: | 
					
						
							|  |  |  |             for argIndex in range(1, len(tree)): | 
					
						
							|  |  |  |                 argValue = False | 
					
						
							|  |  |  |                 if isinstance(tree[argIndex], str): | 
					
						
							|  |  |  |                     argValue = (tree[argIndex] in hashtags) | 
					
						
							|  |  |  |                 elif isinstance(tree[argIndex], list): | 
					
						
							|  |  |  |                     argValue = hashtagRuleResolve(tree[argIndex], | 
					
						
							|  |  |  |                                                   hashtags, moderated, | 
					
						
							| 
									
										
										
										
											2020-10-20 17:37:15 +00:00
										 |  |  |                                                   content, url) | 
					
						
							| 
									
										
										
										
											2020-10-18 15:10:36 +00:00
										 |  |  |                 if not argValue: | 
					
						
							|  |  |  |                     return False | 
					
						
							|  |  |  |             return True | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |     elif tree[0] == 'or': | 
					
						
							| 
									
										
										
										
											2020-10-18 15:10:36 +00:00
										 |  |  |         if len(tree) >= 3: | 
					
						
							|  |  |  |             for argIndex in range(1, len(tree)): | 
					
						
							|  |  |  |                 argValue = False | 
					
						
							|  |  |  |                 if isinstance(tree[argIndex], str): | 
					
						
							|  |  |  |                     argValue = (tree[argIndex] in hashtags) | 
					
						
							|  |  |  |                 elif isinstance(tree[argIndex], list): | 
					
						
							|  |  |  |                     argValue = hashtagRuleResolve(tree[argIndex], | 
					
						
							|  |  |  |                                                   hashtags, moderated, | 
					
						
							| 
									
										
										
										
											2020-10-20 17:37:15 +00:00
										 |  |  |                                                   content, url) | 
					
						
							| 
									
										
										
										
											2020-10-18 15:10:36 +00:00
										 |  |  |                 if argValue: | 
					
						
							|  |  |  |                     return True | 
					
						
							|  |  |  |             return False | 
					
						
							| 
									
										
										
										
											2020-10-20 09:43:30 +00:00
										 |  |  |     elif tree[0] == 'xor': | 
					
						
							|  |  |  |         if len(tree) >= 3: | 
					
						
							|  |  |  |             trueCtr = 0 | 
					
						
							|  |  |  |             for argIndex in range(1, len(tree)): | 
					
						
							|  |  |  |                 argValue = False | 
					
						
							|  |  |  |                 if isinstance(tree[argIndex], str): | 
					
						
							|  |  |  |                     argValue = (tree[argIndex] in hashtags) | 
					
						
							|  |  |  |                 elif isinstance(tree[argIndex], list): | 
					
						
							|  |  |  |                     argValue = hashtagRuleResolve(tree[argIndex], | 
					
						
							|  |  |  |                                                   hashtags, moderated, | 
					
						
							| 
									
										
										
										
											2020-10-20 17:37:15 +00:00
										 |  |  |                                                   content, url) | 
					
						
							| 
									
										
										
										
											2020-10-20 09:43:30 +00:00
										 |  |  |                 if argValue: | 
					
						
							|  |  |  |                     trueCtr += 1 | 
					
						
							|  |  |  |             if trueCtr == 1: | 
					
						
							|  |  |  |                 return True | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |     elif tree[0].startswith('#') and len(tree) == 1: | 
					
						
							|  |  |  |         return tree[0] in hashtags | 
					
						
							| 
									
										
										
										
											2020-10-17 17:36:10 +00:00
										 |  |  |     elif tree[0].startswith('moderated'): | 
					
						
							|  |  |  |         return moderated | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |     elif tree[0].startswith('"') and tree[0].endswith('"'): | 
					
						
							|  |  |  |         return True | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def hashtagRuleTree(operators: [], | 
					
						
							|  |  |  |                     conditionsStr: str, | 
					
						
							| 
									
										
										
										
											2020-10-17 17:36:10 +00:00
										 |  |  |                     tagsInConditions: [], | 
					
						
							|  |  |  |                     moderated: bool) -> []: | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |     """Walks the tree
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if not operators and conditionsStr: | 
					
						
							|  |  |  |         conditionsStr = conditionsStr.strip() | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |         isStr = conditionsStr.startswith('"') and conditionsStr.endswith('"') | 
					
						
							|  |  |  |         if conditionsStr.startswith('#') or isStr or \ | 
					
						
							| 
									
										
										
										
											2020-10-17 17:36:10 +00:00
										 |  |  |            conditionsStr in operators or \ | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |            conditionsStr == 'moderated' or \ | 
					
						
							|  |  |  |            conditionsStr == 'contains': | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |             if conditionsStr.startswith('#'): | 
					
						
							|  |  |  |                 if conditionsStr not in tagsInConditions: | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |                     if ' ' not in conditionsStr or \ | 
					
						
							|  |  |  |                        conditionsStr.startswith('"'): | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |                         tagsInConditions.append(conditionsStr) | 
					
						
							|  |  |  |             return [conditionsStr.strip()] | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             return None | 
					
						
							|  |  |  |     if not operators or not conditionsStr: | 
					
						
							|  |  |  |         return None | 
					
						
							|  |  |  |     tree = None | 
					
						
							|  |  |  |     conditionsStr = conditionsStr.strip() | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |     isStr = conditionsStr.startswith('"') and conditionsStr.endswith('"') | 
					
						
							|  |  |  |     if conditionsStr.startswith('#') or isStr or \ | 
					
						
							| 
									
										
										
										
											2020-10-17 17:36:10 +00:00
										 |  |  |        conditionsStr in operators or \ | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |        conditionsStr == 'moderated' or \ | 
					
						
							|  |  |  |        conditionsStr == 'contains': | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |         if conditionsStr.startswith('#'): | 
					
						
							|  |  |  |             if conditionsStr not in tagsInConditions: | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |                 if ' ' not in conditionsStr or \ | 
					
						
							|  |  |  |                    conditionsStr.startswith('"'): | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |                     tagsInConditions.append(conditionsStr) | 
					
						
							|  |  |  |         tree = [conditionsStr.strip()] | 
					
						
							|  |  |  |     ctr = 0 | 
					
						
							|  |  |  |     while ctr < len(operators): | 
					
						
							|  |  |  |         op = operators[ctr] | 
					
						
							| 
									
										
										
										
											2020-10-18 15:10:36 +00:00
										 |  |  |         opMatch = ' ' + op + ' ' | 
					
						
							|  |  |  |         if opMatch not in conditionsStr and \ | 
					
						
							|  |  |  |            not conditionsStr.startswith(op + ' '): | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |             ctr += 1 | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             tree = [op] | 
					
						
							| 
									
										
										
										
											2020-10-18 15:10:36 +00:00
										 |  |  |             if opMatch in conditionsStr: | 
					
						
							|  |  |  |                 sections = conditionsStr.split(opMatch) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 sections = conditionsStr.split(op + ' ', 1) | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |             for subConditionStr in sections: | 
					
						
							| 
									
										
										
										
											2020-10-18 15:10:36 +00:00
										 |  |  |                 result = hashtagRuleTree(operators[ctr + 1:], | 
					
						
							|  |  |  |                                          subConditionStr, | 
					
						
							| 
									
										
										
										
											2020-10-17 17:36:10 +00:00
										 |  |  |                                          tagsInConditions, moderated) | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |                 if result: | 
					
						
							|  |  |  |                     tree.append(result) | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |     return tree | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  | def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, | 
					
						
							| 
									
										
										
										
											2020-10-17 16:46:21 +00:00
										 |  |  |                               hashtags: [], httpPrefix: str, | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  |                               domain: str, port: int, | 
					
						
							|  |  |  |                               personCache: {}, | 
					
						
							|  |  |  |                               cachedWebfingers: {}, | 
					
						
							|  |  |  |                               federationList: [], | 
					
						
							| 
									
										
										
										
											2020-10-17 17:36:10 +00:00
										 |  |  |                               sendThreads: [], postLog: [], | 
					
						
							| 
									
										
										
										
											2020-10-20 17:37:15 +00:00
										 |  |  |                               moderated: bool, url: str) -> bool: | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  |     """Applies hashtag rules to a news post.
 | 
					
						
							|  |  |  |     Returns true if the post should be saved to the news timeline | 
					
						
							|  |  |  |     of this instance | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |     rulesFilename = baseDir + '/accounts/hashtagrules.txt' | 
					
						
							|  |  |  |     if not os.path.isfile(rulesFilename): | 
					
						
							|  |  |  |         return True | 
					
						
							|  |  |  |     rules = [] | 
					
						
							|  |  |  |     with open(rulesFilename, "r") as f: | 
					
						
							|  |  |  |         rules = f.readlines() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     domainFull = domain | 
					
						
							|  |  |  |     if port: | 
					
						
							|  |  |  |         if port != 80 and port != 443: | 
					
						
							|  |  |  |             domainFull = domain + ':' + str(port) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |     # get the full text content of the post | 
					
						
							|  |  |  |     content = '' | 
					
						
							|  |  |  |     if postJsonObject['object'].get('content'): | 
					
						
							|  |  |  |         content += postJsonObject['object']['content'] | 
					
						
							|  |  |  |     if postJsonObject['object'].get('summary'): | 
					
						
							|  |  |  |         content += ' ' + postJsonObject['object']['summary'] | 
					
						
							| 
									
										
										
										
											2020-10-17 19:04:39 +00:00
										 |  |  |     content = content.lower() | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-17 13:41:20 +00:00
										 |  |  |     # actionOccurred = False | 
					
						
							| 
									
										
										
										
											2020-10-20 17:37:15 +00:00
										 |  |  |     operators = ('not', 'and', 'or', 'xor', 'from', 'contains') | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |     for ruleStr in rules: | 
					
						
							|  |  |  |         if not ruleStr: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if not ruleStr.startswith('if '): | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if ' then ' not in ruleStr: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         conditionsStr = ruleStr.split('if ', 1)[1] | 
					
						
							|  |  |  |         conditionsStr = conditionsStr.split(' then ')[0] | 
					
						
							|  |  |  |         tagsInConditions = [] | 
					
						
							| 
									
										
										
										
											2020-10-17 17:36:10 +00:00
										 |  |  |         tree = hashtagRuleTree(operators, conditionsStr, | 
					
						
							|  |  |  |                                tagsInConditions, moderated) | 
					
						
							| 
									
										
										
										
											2020-10-20 17:37:15 +00:00
										 |  |  |         if not hashtagRuleResolve(tree, hashtags, moderated, content, url): | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |             continue | 
					
						
							|  |  |  |         # the condition matches, so do something | 
					
						
							|  |  |  |         actionStr = ruleStr.split(' then ')[1].strip() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # add a hashtag | 
					
						
							|  |  |  |         if actionStr.startswith('add '): | 
					
						
							|  |  |  |             addHashtag = actionStr.split('add ', 1)[1].strip() | 
					
						
							|  |  |  |             if addHashtag.startswith('#'): | 
					
						
							|  |  |  |                 if addHashtag not in hashtags: | 
					
						
							|  |  |  |                     hashtags.append(addHashtag) | 
					
						
							| 
									
										
										
										
											2020-10-18 12:53:23 +00:00
										 |  |  |                 htId = addHashtag.replace('#', '') | 
					
						
							|  |  |  |                 if validHashTag(htId): | 
					
						
							|  |  |  |                     hashtagUrl = \ | 
					
						
							|  |  |  |                         httpPrefix + "://" + domainFull + "/tags/" + htId | 
					
						
							|  |  |  |                     newTag = { | 
					
						
							|  |  |  |                         'href': hashtagUrl, | 
					
						
							|  |  |  |                         'name': addHashtag, | 
					
						
							|  |  |  |                         'type': 'Hashtag' | 
					
						
							|  |  |  |                     } | 
					
						
							| 
									
										
										
										
											2020-10-18 13:31:50 +00:00
										 |  |  |                     # does the tag already exist? | 
					
						
							|  |  |  |                     addTagObject = None | 
					
						
							|  |  |  |                     for t in postJsonObject['object']['tag']: | 
					
						
							|  |  |  |                         if t.get('type') and t.get('name'): | 
					
						
							|  |  |  |                             if t['type'] == 'Hashtag' and \ | 
					
						
							|  |  |  |                                t['name'] == addHashtag: | 
					
						
							|  |  |  |                                 addTagObject = t | 
					
						
							|  |  |  |                                 break | 
					
						
							|  |  |  |                     # append the tag if it wasn't found | 
					
						
							|  |  |  |                     if not addTagObject: | 
					
						
							|  |  |  |                         postJsonObject['object']['tag'].append(newTag) | 
					
						
							|  |  |  |                     # add corresponding html to the post content | 
					
						
							|  |  |  |                     hashtagHtml = \ | 
					
						
							|  |  |  |                         " <a href=\"" + hashtagUrl + \ | 
					
						
							| 
									
										
										
										
											2020-10-23 17:13:02 +00:00
										 |  |  |                         "\" class=\"addedHashtag\" " + \ | 
					
						
							| 
									
										
										
										
											2020-10-18 13:31:50 +00:00
										 |  |  |                         "rel=\"tag\">#<span>" + \ | 
					
						
							|  |  |  |                         htId + "</span></a>" | 
					
						
							|  |  |  |                     content = postJsonObject['object']['content'] | 
					
						
							|  |  |  |                     if hashtagHtml not in content: | 
					
						
							|  |  |  |                         if content.endswith('</p>'): | 
					
						
							|  |  |  |                             content = \ | 
					
						
							|  |  |  |                                 content[:len(content) - len('</p>')] + \ | 
					
						
							|  |  |  |                                 hashtagHtml + '</p>' | 
					
						
							|  |  |  |                         else: | 
					
						
							|  |  |  |                             content += hashtagHtml | 
					
						
							|  |  |  |                         postJsonObject['object']['content'] = content | 
					
						
							|  |  |  |                         storeHashTags(baseDir, 'news', postJsonObject) | 
					
						
							|  |  |  |                         # actionOccurred = True | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # remove a hashtag | 
					
						
							|  |  |  |         if actionStr.startswith('remove '): | 
					
						
							|  |  |  |             rmHashtag = actionStr.split('remove ', 1)[1].strip() | 
					
						
							|  |  |  |             if rmHashtag.startswith('#'): | 
					
						
							|  |  |  |                 if rmHashtag in hashtags: | 
					
						
							|  |  |  |                     hashtags.remove(rmHashtag) | 
					
						
							| 
									
										
										
										
											2020-10-18 12:53:23 +00:00
										 |  |  |                 htId = rmHashtag.replace('#', '') | 
					
						
							|  |  |  |                 hashtagUrl = \ | 
					
						
							|  |  |  |                     httpPrefix + "://" + domainFull + "/tags/" + htId | 
					
						
							|  |  |  |                 # remove tag html from the post content | 
					
						
							|  |  |  |                 hashtagHtml = \ | 
					
						
							|  |  |  |                     "<a href=\"" + hashtagUrl + \ | 
					
						
							| 
									
										
										
										
											2020-10-23 17:13:02 +00:00
										 |  |  |                     "\" class=\"addedHashtag\" " + \ | 
					
						
							| 
									
										
										
										
											2020-10-18 12:53:23 +00:00
										 |  |  |                     "rel=\"tag\">#<span>" + \ | 
					
						
							|  |  |  |                     htId + "</span></a>" | 
					
						
							|  |  |  |                 content = postJsonObject['object']['content'] | 
					
						
							|  |  |  |                 if hashtagHtml in content: | 
					
						
							|  |  |  |                     content = \ | 
					
						
							|  |  |  |                         content.replace(hashtagHtml, '').replace('  ', ' ') | 
					
						
							|  |  |  |                     postJsonObject['object']['content'] = content | 
					
						
							|  |  |  |                 rmTagObject = None | 
					
						
							|  |  |  |                 for t in postJsonObject['object']['tag']: | 
					
						
							|  |  |  |                     if t.get('type') and t.get('name'): | 
					
						
							|  |  |  |                         if t['type'] == 'Hashtag' and \ | 
					
						
							|  |  |  |                            t['name'] == rmHashtag: | 
					
						
							|  |  |  |                             rmTagObject = t | 
					
						
							|  |  |  |                             break | 
					
						
							|  |  |  |                 if rmTagObject: | 
					
						
							|  |  |  |                     postJsonObject['object']['tag'].remove(rmTagObject) | 
					
						
							|  |  |  |                     # actionOccurred = True | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-17 16:24:47 +00:00
										 |  |  |         # Block this item | 
					
						
							|  |  |  |         if actionStr.startswith('block') or actionStr.startswith('drop'): | 
					
						
							|  |  |  |             return False | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  |     # TODO | 
					
						
							|  |  |  |     # If routing to another instance | 
					
						
							|  |  |  |     # sendSignedJson(postJsonObject: {}, session, baseDir: str, | 
					
						
							|  |  |  |     #                nickname: str, domain: str, port: int, | 
					
						
							|  |  |  |     #                toNickname: str, toDomain: str, toPort: int, cc: str, | 
					
						
							|  |  |  |     #                httpPrefix: str, False, False, | 
					
						
							|  |  |  |     #                federationList: [], | 
					
						
							|  |  |  |     #                sendThreads: [], postLog: [], cachedWebfingers: {}, | 
					
						
							|  |  |  |     #                personCache: {}, False, __version__) -> int: | 
					
						
							| 
									
										
										
										
											2020-10-17 13:40:36 +00:00
										 |  |  |     # if actionOccurred: | 
					
						
							|  |  |  |     #     return True | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  |     return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-20 09:27:58 +00:00
										 |  |  | def createNewsMirror(baseDir: str, domain: str, | 
					
						
							|  |  |  |                      postIdNumber: str, url: str, | 
					
						
							| 
									
										
										
										
											2020-10-19 16:33:58 +00:00
										 |  |  |                      maxMirroredArticles: int) -> bool: | 
					
						
							|  |  |  |     """Creates a local mirror of a news article
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  |     if '|' in url or '>' in url: | 
					
						
							|  |  |  |         return True | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-19 16:33:58 +00:00
										 |  |  |     mirrorDir = baseDir + '/accounts/newsmirror' | 
					
						
							|  |  |  |     if not os.path.isdir(mirrorDir): | 
					
						
							|  |  |  |         os.mkdir(mirrorDir) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  |     # count the directories | 
					
						
							|  |  |  |     noOfDirs = 0 | 
					
						
							|  |  |  |     for subdir, dirs, files in os.walk(mirrorDir): | 
					
						
							|  |  |  |         noOfDirs = len(dirs) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     mirrorIndexFilename = baseDir + '/accounts/newsmirror.txt' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if maxMirroredArticles > 0 and noOfDirs > maxMirroredArticles: | 
					
						
							|  |  |  |         if not os.path.isfile(mirrorIndexFilename): | 
					
						
							|  |  |  |             # no index for mirrors found | 
					
						
							|  |  |  |             return True | 
					
						
							|  |  |  |         removals = [] | 
					
						
							|  |  |  |         with open(mirrorIndexFilename, 'r') as indexFile: | 
					
						
							|  |  |  |             # remove the oldest directories | 
					
						
							|  |  |  |             ctr = 0 | 
					
						
							|  |  |  |             while noOfDirs > maxMirroredArticles: | 
					
						
							|  |  |  |                 ctr += 1 | 
					
						
							|  |  |  |                 if ctr > 5000: | 
					
						
							|  |  |  |                     # escape valve | 
					
						
							|  |  |  |                     break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 postId = indexFile.readline() | 
					
						
							|  |  |  |                 if not postId: | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |                 postId = postId.strip() | 
					
						
							|  |  |  |                 mirrorArticleDir = mirrorDir + '/' + postId | 
					
						
							|  |  |  |                 if os.path.isdir(mirrorArticleDir): | 
					
						
							|  |  |  |                     rmtree(mirrorArticleDir) | 
					
						
							|  |  |  |                     removals.append(postId) | 
					
						
							|  |  |  |                     noOfDirs -= 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # remove the corresponding index entries | 
					
						
							|  |  |  |         if removals: | 
					
						
							|  |  |  |             indexContent = '' | 
					
						
							|  |  |  |             with open(mirrorIndexFilename, 'r') as indexFile: | 
					
						
							|  |  |  |                 indexContent = indexFile.read() | 
					
						
							|  |  |  |                 for removePostId in removals: | 
					
						
							|  |  |  |                     indexContent = \ | 
					
						
							|  |  |  |                         indexContent.replace(removePostId + '\n', '') | 
					
						
							|  |  |  |             with open(mirrorIndexFilename, "w+") as indexFile: | 
					
						
							|  |  |  |                 indexFile.write(indexContent) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     mirrorArticleDir = mirrorDir + '/' + postIdNumber | 
					
						
							|  |  |  |     if os.path.isdir(mirrorArticleDir): | 
					
						
							|  |  |  |         # already mirrored | 
					
						
							|  |  |  |         return True | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-20 09:27:58 +00:00
										 |  |  |     # for onion instances mirror via tor | 
					
						
							|  |  |  |     prefixStr = '' | 
					
						
							|  |  |  |     if domain.endswith('.onion'): | 
					
						
							|  |  |  |         prefixStr = '/usr/bin/torsocks ' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  |     # download the files | 
					
						
							|  |  |  |     commandStr = \ | 
					
						
							| 
									
										
										
										
											2020-10-20 09:27:58 +00:00
										 |  |  |         prefixStr + '/usr/bin/wget -mkEpnp -e robots=off ' + url + \ | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  |         ' -P ' + mirrorArticleDir | 
					
						
							|  |  |  |     p = Popen(commandStr, shell=True) | 
					
						
							|  |  |  |     os.waitpid(p.pid, 0) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if not os.path.isdir(mirrorArticleDir): | 
					
						
							| 
									
										
										
										
											2020-10-20 09:27:58 +00:00
										 |  |  |         print('WARN: failed to mirror ' + url) | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  |         return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # append the post Id number to the index file | 
					
						
							|  |  |  |     if os.path.isfile(mirrorIndexFilename): | 
					
						
							|  |  |  |         indexFile = open(mirrorIndexFilename, "a+") | 
					
						
							|  |  |  |         if indexFile: | 
					
						
							|  |  |  |             indexFile.write(postIdNumber + '\n') | 
					
						
							|  |  |  |             indexFile.close() | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         indexFile = open(mirrorIndexFilename, "w+") | 
					
						
							|  |  |  |         if indexFile: | 
					
						
							|  |  |  |             indexFile.write(postIdNumber + '\n') | 
					
						
							|  |  |  |             indexFile.close() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-19 16:33:58 +00:00
										 |  |  |     return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  | def convertRSStoActivityPub(baseDir: str, httpPrefix: str, | 
					
						
							|  |  |  |                             domain: str, port: int, | 
					
						
							|  |  |  |                             newswire: {}, | 
					
						
							| 
									
										
										
										
											2020-10-08 12:13:42 +00:00
										 |  |  |                             translate: {}, | 
					
						
							|  |  |  |                             recentPostsCache: {}, maxRecentPosts: int, | 
					
						
							|  |  |  |                             session, cachedWebfingers: {}, | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  |                             personCache: {}, | 
					
						
							|  |  |  |                             federationList: [], | 
					
						
							| 
									
										
										
										
											2020-10-19 16:33:58 +00:00
										 |  |  |                             sendThreads: [], postLog: [], | 
					
						
							|  |  |  |                             maxMirroredArticles: int) -> None: | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |     """Converts rss items in a newswire into posts
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-11-03 14:41:28 +00:00
										 |  |  |     if not newswire: | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |     basePath = baseDir + '/accounts/news@' + domain + '/outbox' | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |     if not os.path.isdir(basePath): | 
					
						
							|  |  |  |         os.mkdir(basePath) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-09 10:05:01 +00:00
										 |  |  |     # oldest items first | 
					
						
							| 
									
										
										
										
											2020-10-07 18:46:42 +00:00
										 |  |  |     newswireReverse = \ | 
					
						
							| 
									
										
										
										
											2020-10-07 19:41:55 +00:00
										 |  |  |         OrderedDict(sorted(newswire.items(), reverse=False)) | 
					
						
							| 
									
										
										
										
											2020-10-07 18:46:42 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     for dateStr, item in newswireReverse.items(): | 
					
						
							| 
									
										
										
										
											2020-10-07 20:03:39 +00:00
										 |  |  |         originalDateStr = dateStr | 
					
						
							| 
									
										
										
										
											2020-10-07 14:10:06 +00:00
										 |  |  |         # convert the date to the format used by ActivityPub | 
					
						
							| 
									
										
										
										
											2020-10-20 12:37:32 +00:00
										 |  |  |         if '+00:00' in dateStr: | 
					
						
							|  |  |  |             dateStr = dateStr.replace(' ', 'T') | 
					
						
							|  |  |  |             dateStr = dateStr.replace('+00:00', 'Z') | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             dateStrWithOffset = \ | 
					
						
							|  |  |  |                 datetime.datetime.strptime(dateStr, "%Y-%m-%d %H:%M:%S%z") | 
					
						
							|  |  |  |             dateStr = dateStrWithOffset.strftime("%Y-%m-%dT%H:%M:%SZ") | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |         statusNumber, published = getStatusNumber(dateStr) | 
					
						
							|  |  |  |         newPostId = \ | 
					
						
							|  |  |  |             httpPrefix + '://' + domain + \ | 
					
						
							|  |  |  |             '/users/news/statuses/' + statusNumber | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 14:10:06 +00:00
										 |  |  |         # file where the post is stored | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |         filename = basePath + '/' + newPostId.replace('/', '#') + '.json' | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |         if os.path.isfile(filename): | 
					
						
							| 
									
										
										
										
											2020-10-08 12:52:15 +00:00
										 |  |  |             # don't create the post if it already exists | 
					
						
							| 
									
										
										
										
											2020-10-08 14:35:26 +00:00
										 |  |  |             # set the url | 
					
						
							| 
									
										
										
										
											2020-10-08 12:37:14 +00:00
										 |  |  |             newswire[originalDateStr][1] = \ | 
					
						
							|  |  |  |                 '/users/news/statuses/' + statusNumber | 
					
						
							| 
									
										
										
										
											2020-10-08 14:35:26 +00:00
										 |  |  |             # set the filename | 
					
						
							|  |  |  |             newswire[originalDateStr][3] = filename | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |             continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-11 11:00:28 +00:00
										 |  |  |         rssTitle = removeControlCharacters(item[0]) | 
					
						
							| 
									
										
										
										
											2020-10-10 09:53:56 +00:00
										 |  |  |         url = item[1] | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  |         if dangerousMarkup(url) or dangerousMarkup(rssTitle): | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-10-07 13:55:27 +00:00
										 |  |  |         rssDescription = '' | 
					
						
							| 
									
										
										
										
											2020-10-07 14:10:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # get the rss description if it exists | 
					
						
							| 
									
										
										
										
											2020-10-11 11:00:28 +00:00
										 |  |  |         rssDescription = removeControlCharacters(item[4]) | 
					
						
							| 
									
										
										
										
											2020-10-10 10:16:06 +00:00
										 |  |  |         if rssDescription.startswith('<![CDATA['): | 
					
						
							|  |  |  |             rssDescription = rssDescription.replace('<![CDATA[', '') | 
					
						
							|  |  |  |             rssDescription = rssDescription.replace(']]>', '') | 
					
						
							| 
									
										
										
										
											2020-10-20 13:07:02 +00:00
										 |  |  |         if '&' in rssDescription: | 
					
						
							|  |  |  |             rssDescription = html.unescape(rssDescription) | 
					
						
							| 
									
										
										
										
											2020-10-10 10:24:40 +00:00
										 |  |  |         rssDescription = '<p>' + rssDescription + '<p>' | 
					
						
							| 
									
										
										
										
											2020-10-07 14:10:06 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-19 20:43:27 +00:00
										 |  |  |         mirrored = item[7] | 
					
						
							|  |  |  |         postUrl = url | 
					
						
							|  |  |  |         if mirrored and '://' in url: | 
					
						
							| 
									
										
										
										
											2020-10-19 22:17:06 +00:00
										 |  |  |             postUrl = '/newsmirror/' + statusNumber + '/' + \ | 
					
						
							| 
									
										
										
										
											2020-10-19 22:21:30 +00:00
										 |  |  |                 url.split('://')[1] | 
					
						
							|  |  |  |             if postUrl.endswith('/'): | 
					
						
							|  |  |  |                 postUrl += 'index.html' | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 postUrl += '/index.html' | 
					
						
							| 
									
										
										
										
											2020-10-19 20:43:27 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 14:10:06 +00:00
										 |  |  |         # add the off-site link to the description | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  |         if rssDescription and not dangerousMarkup(rssDescription): | 
					
						
							| 
									
										
										
										
											2020-10-10 10:10:20 +00:00
										 |  |  |             rssDescription += \ | 
					
						
							| 
									
										
										
										
											2020-10-19 20:43:27 +00:00
										 |  |  |                 '<br><a href="' + postUrl + '">' + \ | 
					
						
							| 
									
										
										
										
											2020-10-10 10:10:20 +00:00
										 |  |  |                 translate['Read more...'] + '</a>' | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |         else: | 
					
						
							| 
									
										
										
										
											2020-10-10 10:10:20 +00:00
										 |  |  |             rssDescription = \ | 
					
						
							| 
									
										
										
										
											2020-10-19 20:43:27 +00:00
										 |  |  |                 '<a href="' + postUrl + '">' + \ | 
					
						
							| 
									
										
										
										
											2020-10-10 10:10:20 +00:00
										 |  |  |                 translate['Read more...'] + '</a>' | 
					
						
							| 
									
										
										
										
											2020-10-07 14:10:06 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  |         # remove image dimensions | 
					
						
							| 
									
										
										
										
											2020-10-11 10:05:06 +00:00
										 |  |  |         if '<img' in rssDescription: | 
					
						
							| 
									
										
										
										
											2020-10-11 10:31:26 +00:00
										 |  |  |             rssDescription = removeHtmlTag(rssDescription, 'width') | 
					
						
							| 
									
										
										
										
											2020-10-11 10:05:06 +00:00
										 |  |  |             rssDescription = removeHtmlTag(rssDescription, 'height') | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |         followersOnly = False | 
					
						
							|  |  |  |         useBlurhash = False | 
					
						
							| 
									
										
										
										
											2020-10-09 10:08:01 +00:00
										 |  |  |         # NOTE: the id when the post is created will not be | 
					
						
							|  |  |  |         # consistent (it's based on the current time, not the | 
					
						
							|  |  |  |         # published time), so we change that later | 
					
						
							| 
									
										
										
										
											2020-10-07 21:26:03 +00:00
										 |  |  |         blog = createNewsPost(baseDir, | 
					
						
							| 
									
										
										
										
											2020-10-07 22:25:30 +00:00
										 |  |  |                               domain, port, httpPrefix, | 
					
						
							| 
									
										
										
										
											2020-10-11 11:00:28 +00:00
										 |  |  |                               rssDescription, | 
					
						
							| 
									
										
										
										
											2020-10-11 10:57:18 +00:00
										 |  |  |                               followersOnly, False, | 
					
						
							| 
									
										
										
										
											2020-10-07 21:26:03 +00:00
										 |  |  |                               None, None, None, useBlurhash, | 
					
						
							| 
									
										
										
										
											2020-10-11 11:00:28 +00:00
										 |  |  |                               rssTitle) | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |         if not blog: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-19 16:33:58 +00:00
										 |  |  |         if mirrored: | 
					
						
							| 
									
										
										
										
											2020-10-20 09:27:58 +00:00
										 |  |  |             if not createNewsMirror(baseDir, domain, statusNumber, | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  |                                     url, maxMirroredArticles): | 
					
						
							| 
									
										
										
										
											2020-10-19 16:33:58 +00:00
										 |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |         idStr = \ | 
					
						
							|  |  |  |             httpPrefix + '://' + domain + '/users/news' + \ | 
					
						
							|  |  |  |             '/statuses/' + statusNumber + '/replies' | 
					
						
							| 
									
										
										
										
											2020-10-08 09:07:45 +00:00
										 |  |  |         blog['news'] = True | 
					
						
							| 
									
										
										
										
											2020-10-09 10:05:01 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # note the time of arrival | 
					
						
							|  |  |  |         currTime = datetime.datetime.utcnow() | 
					
						
							|  |  |  |         blog['object']['arrived'] = currTime.strftime("%Y-%m-%dT%H:%M:%SZ") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-09 10:08:01 +00:00
										 |  |  |         # change the id, based upon the published time | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |         blog['object']['replies']['id'] = idStr | 
					
						
							|  |  |  |         blog['object']['replies']['first']['partOf'] = idStr | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         blog['id'] = newPostId + '/activity' | 
					
						
							|  |  |  |         blog['object']['id'] = newPostId | 
					
						
							|  |  |  |         blog['object']['atomUri'] = newPostId | 
					
						
							|  |  |  |         blog['object']['url'] = \ | 
					
						
							|  |  |  |             httpPrefix + '://' + domain + '/@news/' + statusNumber | 
					
						
							|  |  |  |         blog['object']['published'] = dateStr | 
					
						
							| 
									
										
										
										
											2020-10-20 13:07:02 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-20 12:49:12 +00:00
										 |  |  |         blog['object']['content'] = rssDescription | 
					
						
							|  |  |  |         blog['object']['contentMap']['en'] = rssDescription | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-17 13:59:47 +00:00
										 |  |  |         domainFull = domain | 
					
						
							|  |  |  |         if port: | 
					
						
							|  |  |  |             if port != 80 and port != 443: | 
					
						
							|  |  |  |                 domainFull = domain + ':' + str(port) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         hashtags = item[6] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |         postId = newPostId.replace('/', '#') | 
					
						
							| 
									
										
										
										
											2020-10-07 14:10:06 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-09 12:15:20 +00:00
										 |  |  |         moderated = item[5] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  |         savePost = newswireHashtagProcessing(session, baseDir, blog, hashtags, | 
					
						
							|  |  |  |                                              httpPrefix, domain, port, | 
					
						
							|  |  |  |                                              personCache, cachedWebfingers, | 
					
						
							|  |  |  |                                              federationList, | 
					
						
							| 
									
										
										
										
											2020-10-20 17:37:15 +00:00
										 |  |  |                                              sendThreads, postLog, | 
					
						
							|  |  |  |                                              moderated, url) | 
					
						
							| 
									
										
										
										
											2020-10-09 12:15:20 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  |         # save the post and update the index | 
					
						
							|  |  |  |         if savePost: | 
					
						
							| 
									
										
										
										
											2020-10-25 12:00:55 +00:00
										 |  |  |             # ensure that all hashtags are stored in the json | 
					
						
							|  |  |  |             # and appended to the content | 
					
						
							|  |  |  |             blog['object']['tag'] = [] | 
					
						
							| 
									
										
										
										
											2020-10-25 11:22:52 +00:00
										 |  |  |             for tagName in hashtags: | 
					
						
							|  |  |  |                 htId = tagName.replace('#', '') | 
					
						
							|  |  |  |                 hashtagUrl = \ | 
					
						
							|  |  |  |                     httpPrefix + "://" + domainFull + "/tags/" + htId | 
					
						
							|  |  |  |                 newTag = { | 
					
						
							|  |  |  |                     'href': hashtagUrl, | 
					
						
							|  |  |  |                     'name': tagName, | 
					
						
							|  |  |  |                     'type': 'Hashtag' | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 blog['object']['tag'].append(newTag) | 
					
						
							| 
									
										
										
										
											2020-10-25 12:00:55 +00:00
										 |  |  |                 hashtagHtml = \ | 
					
						
							| 
									
										
										
										
											2020-10-25 12:57:14 +00:00
										 |  |  |                     " <a href=\"" + hashtagUrl + \ | 
					
						
							| 
									
										
										
										
											2020-10-25 12:00:55 +00:00
										 |  |  |                     "\" class=\"addedHashtag\" " + \ | 
					
						
							|  |  |  |                     "rel=\"tag\">#<span>" + \ | 
					
						
							|  |  |  |                     htId + "</span></a>" | 
					
						
							| 
									
										
										
										
											2020-10-25 14:37:51 +00:00
										 |  |  |                 content = blog['object']['content'] | 
					
						
							|  |  |  |                 if hashtagHtml not in content: | 
					
						
							|  |  |  |                     if content.endswith('</p>'): | 
					
						
							|  |  |  |                         content = \ | 
					
						
							|  |  |  |                             content[:len(content) - len('</p>')] + \ | 
					
						
							|  |  |  |                             hashtagHtml + '</p>' | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         content += hashtagHtml | 
					
						
							|  |  |  |                     blog['object']['content'] = content | 
					
						
							| 
									
										
										
										
											2020-10-25 11:22:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-25 14:21:29 +00:00
										 |  |  |             # update the newswire tags if new ones have been found by | 
					
						
							|  |  |  |             # newswireHashtagProcessing | 
					
						
							|  |  |  |             for tag in hashtags: | 
					
						
							|  |  |  |                 if tag not in newswire[originalDateStr][6]: | 
					
						
							|  |  |  |                     newswire[originalDateStr][6].append(tag) | 
					
						
							| 
									
										
										
										
											2020-10-17 13:39:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |             storeHashTags(baseDir, 'news', blog) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-18 20:06:52 +00:00
										 |  |  |             clearFromPostCaches(baseDir, recentPostsCache, postId) | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  |             if saveJson(blog, filename): | 
					
						
							|  |  |  |                 updateFeedsOutboxIndex(baseDir, domain, postId + '.json') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 # Save a file containing the time when the post arrived | 
					
						
							|  |  |  |                 # this can then later be used to construct the news timeline | 
					
						
							|  |  |  |                 # excluding items during the voting period | 
					
						
							|  |  |  |                 if moderated: | 
					
						
							|  |  |  |                     saveArrivedTime(baseDir, filename, | 
					
						
							|  |  |  |                                     blog['object']['arrived']) | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     if os.path.isfile(filename + '.arrived'): | 
					
						
							|  |  |  |                         os.remove(filename + '.arrived') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 # set the url | 
					
						
							|  |  |  |                 newswire[originalDateStr][1] = \ | 
					
						
							|  |  |  |                     '/users/news/statuses/' + statusNumber | 
					
						
							|  |  |  |                 # set the filename | 
					
						
							|  |  |  |                 newswire[originalDateStr][3] = filename | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-09 09:02:01 +00:00
										 |  |  | def mergeWithPreviousNewswire(oldNewswire: {}, newNewswire: {}) -> None: | 
					
						
							|  |  |  |     """Preserve any votes or generated activitypub post filename
 | 
					
						
							|  |  |  |     as rss feeds are updated | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-11-03 14:41:28 +00:00
										 |  |  |     if not oldNewswire: | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-09 09:02:01 +00:00
										 |  |  |     for published, fields in oldNewswire.items(): | 
					
						
							|  |  |  |         if not newNewswire.get(published): | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-10-13 08:53:59 +00:00
										 |  |  |         for i in range(1, 5): | 
					
						
							|  |  |  |             newNewswire[published][i] = fields[i] | 
					
						
							| 
									
										
										
										
											2020-10-09 09:02:01 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  | def runNewswireDaemon(baseDir: str, httpd, | 
					
						
							|  |  |  |                       httpPrefix: str, domain: str, port: int, | 
					
						
							|  |  |  |                       translate: {}) -> None: | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  |     """Periodically updates RSS feeds
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-10-09 09:13:16 +00:00
										 |  |  |     newswireStateFilename = baseDir + '/accounts/.newswirestate.json' | 
					
						
							| 
									
										
										
										
											2020-10-09 09:02:01 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  |     # initial sleep to allow the system to start up | 
					
						
							|  |  |  |     time.sleep(50) | 
					
						
							|  |  |  |     while True: | 
					
						
							|  |  |  |         # has the session been created yet? | 
					
						
							|  |  |  |         if not httpd.session: | 
					
						
							| 
									
										
										
										
											2020-11-03 16:10:54 +00:00
										 |  |  |             print('Newswire daemon waiting for session') | 
					
						
							| 
									
										
										
										
											2020-11-03 16:08:31 +00:00
										 |  |  |             httpd.session = createSession(httpd.proxyType) | 
					
						
							|  |  |  |             if not httpd.session: | 
					
						
							| 
									
										
										
										
											2020-11-03 16:10:54 +00:00
										 |  |  |                 print('Newswire daemon has no session') | 
					
						
							| 
									
										
										
										
											2020-11-03 16:08:31 +00:00
										 |  |  |                 time.sleep(60) | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 print('Newswire daemon session established') | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # try to update the feeds | 
					
						
							|  |  |  |         newNewswire = None | 
					
						
							|  |  |  |         try: | 
					
						
							| 
									
										
										
										
											2020-10-16 10:26:06 +00:00
										 |  |  |             newNewswire = \ | 
					
						
							| 
									
										
										
										
											2020-10-17 16:08:07 +00:00
										 |  |  |                 getDictFromNewswire(httpd.session, baseDir, domain, | 
					
						
							| 
									
										
										
										
											2020-10-16 11:40:01 +00:00
										 |  |  |                                     httpd.maxNewswirePostsPerSource, | 
					
						
							| 
									
										
										
										
											2020-10-23 14:41:29 +00:00
										 |  |  |                                     httpd.maxNewswireFeedSizeKb, | 
					
						
							| 
									
										
										
										
											2020-11-03 16:04:25 +00:00
										 |  |  |                                     httpd.maxTags, | 
					
						
							|  |  |  |                                     httpd.maxFeedItemSizeKb) | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  |         except Exception as e: | 
					
						
							|  |  |  |             print('WARN: unable to update newswire ' + str(e)) | 
					
						
							|  |  |  |             time.sleep(120) | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-09 09:02:01 +00:00
										 |  |  |         if not httpd.newswire: | 
					
						
							| 
									
										
										
										
											2020-10-09 09:13:16 +00:00
										 |  |  |             if os.path.isfile(newswireStateFilename): | 
					
						
							|  |  |  |                 httpd.newswire = loadJson(newswireStateFilename) | 
					
						
							| 
									
										
										
										
											2020-10-09 09:02:01 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         mergeWithPreviousNewswire(httpd.newswire, newNewswire) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  |         httpd.newswire = newNewswire | 
					
						
							| 
									
										
										
										
											2020-11-03 21:53:29 +00:00
										 |  |  |         if newNewswire: | 
					
						
							|  |  |  |             saveJson(httpd.newswire, newswireStateFilename) | 
					
						
							|  |  |  |             print('Newswire updated') | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         convertRSStoActivityPub(baseDir, | 
					
						
							|  |  |  |                                 httpPrefix, domain, port, | 
					
						
							| 
									
										
										
										
											2020-10-08 12:13:42 +00:00
										 |  |  |                                 newNewswire, translate, | 
					
						
							|  |  |  |                                 httpd.recentPostsCache, | 
					
						
							|  |  |  |                                 httpd.maxRecentPosts, | 
					
						
							|  |  |  |                                 httpd.session, | 
					
						
							|  |  |  |                                 httpd.cachedWebfingers, | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  |                                 httpd.personCache, | 
					
						
							|  |  |  |                                 httpd.federationList, | 
					
						
							|  |  |  |                                 httpd.sendThreads, | 
					
						
							| 
									
										
										
										
											2020-10-19 16:33:58 +00:00
										 |  |  |                                 httpd.postLog, | 
					
						
							|  |  |  |                                 httpd.maxMirroredArticles) | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |         print('Newswire feed converted to ActivityPub') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-21 10:39:09 +00:00
										 |  |  |         if httpd.maxNewsPosts > 0: | 
					
						
							|  |  |  |             archiveDir = baseDir + '/archive' | 
					
						
							|  |  |  |             archiveSubdir = \ | 
					
						
							|  |  |  |                 archiveDir + '/accounts/news@' + domain + '/outbox' | 
					
						
							|  |  |  |             archivePostsForPerson(httpPrefix, 'news', | 
					
						
							|  |  |  |                                   domain, baseDir, 'outbox', | 
					
						
							|  |  |  |                                   archiveSubdir, | 
					
						
							|  |  |  |                                   httpd.recentPostsCache, | 
					
						
							|  |  |  |                                   httpd.maxNewsPosts) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  |         # wait a while before the next feeds update | 
					
						
							|  |  |  |         time.sleep(1200) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def runNewswireWatchdog(projectVersion: str, httpd) -> None: | 
					
						
							|  |  |  |     """This tries to keep the newswire update thread running even if it dies
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     print('Starting newswire watchdog') | 
					
						
							|  |  |  |     newswireOriginal = \ | 
					
						
							|  |  |  |         httpd.thrPostSchedule.clone(runNewswireDaemon) | 
					
						
							|  |  |  |     httpd.thrNewswireDaemon.start() | 
					
						
							|  |  |  |     while True: | 
					
						
							|  |  |  |         time.sleep(50) | 
					
						
							|  |  |  |         if not httpd.thrNewswireDaemon.isAlive(): | 
					
						
							|  |  |  |             httpd.thrNewswireDaemon.kill() | 
					
						
							|  |  |  |             httpd.thrNewswireDaemon = \ | 
					
						
							|  |  |  |                 newswireOriginal.clone(runNewswireDaemon) | 
					
						
							|  |  |  |             httpd.thrNewswireDaemon.start() | 
					
						
							|  |  |  |             print('Restarting newswire daemon...') |