| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  | __filename__ = "newsdaemon.py" | 
					
						
							|  |  |  | __author__ = "Bob Mottram" | 
					
						
							|  |  |  | __license__ = "AGPL3+" | 
					
						
							| 
									
										
										
										
											2021-01-26 10:07:42 +00:00
										 |  |  | __version__ = "1.2.0" | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  | __maintainer__ = "Bob Mottram" | 
					
						
							| 
									
										
										
										
											2021-09-10 16:14:50 +00:00
										 |  |  | __email__ = "bob@libreserver.org" | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  | __status__ = "Production" | 
					
						
							| 
									
										
										
										
											2021-06-26 11:27:14 +00:00
										 |  |  | __module_group__ = "Web Interface Columns" | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-17 18:53:08 +00:00
										 |  |  | # Example hashtag logic: | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # if moderated and not #imcoxford then block | 
					
						
							|  |  |  | # if #pol and contains "westminster" then add #britpol | 
					
						
							| 
									
										
										
										
											2020-10-17 19:06:56 +00:00
										 |  |  | # if #unwantedtag then block | 
					
						
							| 
									
										
										
										
											2020-10-17 18:53:08 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  | import os | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  | import time | 
					
						
							| 
									
										
										
										
											2020-10-09 10:05:01 +00:00
										 |  |  | import datetime | 
					
						
							| 
									
										
										
										
											2020-10-20 13:07:02 +00:00
										 |  |  | import html | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  | from shutil import rmtree | 
					
						
							|  |  |  | from subprocess import Popen | 
					
						
							| 
									
										
										
										
											2020-10-07 18:46:42 +00:00
										 |  |  | from collections import OrderedDict | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  | from newswire import getDictFromNewswire | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  | # from posts import sendSignedJson | 
					
						
							| 
									
										
										
										
											2020-10-07 21:26:03 +00:00
										 |  |  | from posts import createNewsPost | 
					
						
							| 
									
										
										
										
											2020-10-21 10:39:09 +00:00
										 |  |  | from posts import archivePostsForPerson | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  | from content import validHashTag | 
					
						
							| 
									
										
										
										
											2021-12-26 11:29:40 +00:00
										 |  |  | from utils import get_base_content_from_post | 
					
						
							| 
									
										
										
										
											2021-01-11 21:38:31 +00:00
										 |  |  | from utils import removeHtml | 
					
						
							| 
									
										
										
										
											2020-12-16 11:04:46 +00:00
										 |  |  | from utils import getFullDomain | 
					
						
							| 
									
										
										
										
											2020-10-09 09:02:01 +00:00
										 |  |  | from utils import loadJson | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  | from utils import saveJson | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  | from utils import getStatusNumber | 
					
						
							| 
									
										
										
										
											2020-10-18 16:19:28 +00:00
										 |  |  | from utils import clearFromPostCaches | 
					
						
							| 
									
										
										
										
											2021-01-31 11:05:17 +00:00
										 |  |  | from utils import dangerousMarkup | 
					
						
							| 
									
										
										
										
											2021-12-26 10:19:59 +00:00
										 |  |  | from utils import local_actor_url | 
					
						
							| 
									
										
										
										
											2020-10-17 13:39:04 +00:00
										 |  |  | from inbox import storeHashTags | 
					
						
							| 
									
										
										
										
											2020-11-03 16:08:31 +00:00
										 |  |  | from session import createSession | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-08 12:29:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  | def _updateFeedsOutboxIndex(base_dir: str, domain: str, postId: str) -> None: | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |     """Updates the index used for imported RSS feeds
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |     basePath = base_dir + '/accounts/news@' + domain | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |     indexFilename = basePath + '/outbox.index' | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if os.path.isfile(indexFilename): | 
					
						
							| 
									
										
										
										
											2020-10-07 18:46:42 +00:00
										 |  |  |         if postId not in open(indexFilename).read(): | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 with open(indexFilename, 'r+') as feedsFile: | 
					
						
							|  |  |  |                     content = feedsFile.read() | 
					
						
							| 
									
										
										
										
											2020-12-29 20:22:28 +00:00
										 |  |  |                     if postId + '\n' not in content: | 
					
						
							|  |  |  |                         feedsFile.seek(0, 0) | 
					
						
							|  |  |  |                         feedsFile.write(postId + '\n' + content) | 
					
						
							|  |  |  |                         print('DEBUG: feeds post added to index') | 
					
						
							| 
									
										
										
										
											2021-12-25 15:28:52 +00:00
										 |  |  |             except Exception as ex: | 
					
						
							| 
									
										
										
										
											2020-10-07 18:46:42 +00:00
										 |  |  |                 print('WARN: Failed to write entry to feeds posts index ' + | 
					
						
							| 
									
										
										
										
											2021-12-25 15:28:52 +00:00
										 |  |  |                       indexFilename + ' ' + str(ex)) | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |     else: | 
					
						
							| 
									
										
										
										
											2021-11-25 21:18:53 +00:00
										 |  |  |         try: | 
					
						
							|  |  |  |             with open(indexFilename, 'w+') as feedsFile: | 
					
						
							|  |  |  |                 feedsFile.write(postId + '\n') | 
					
						
							|  |  |  |         except OSError: | 
					
						
							| 
									
										
										
										
											2021-11-25 22:22:54 +00:00
										 |  |  |             print('EX: unable to write ' + indexFilename) | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  | def _saveArrivedTime(base_dir: str, postFilename: str, arrived: str) -> None: | 
					
						
							| 
									
										
										
										
											2020-10-09 12:15:20 +00:00
										 |  |  |     """Saves the time when an rss post arrived to a file
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-11-25 21:18:53 +00:00
										 |  |  |     try: | 
					
						
							|  |  |  |         with open(postFilename + '.arrived', 'w+') as arrivedFile: | 
					
						
							|  |  |  |             arrivedFile.write(arrived) | 
					
						
							|  |  |  |     except OSError: | 
					
						
							| 
									
										
										
										
											2021-11-25 22:22:54 +00:00
										 |  |  |         print('EX: unable to write ' + postFilename + '.arrived') | 
					
						
							| 
									
										
										
										
											2020-10-09 12:15:20 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  | def _removeControlCharacters(content: str) -> str: | 
					
						
							| 
									
										
										
										
											2020-10-20 13:07:02 +00:00
										 |  |  |     """Remove escaped html
 | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-10-20 13:07:02 +00:00
										 |  |  |     if '&' in content: | 
					
						
							|  |  |  |         return html.unescape(content) | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  |     return content | 
					
						
							| 
									
										
										
										
											2020-10-10 09:36:23 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-10 08:54:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-04 09:24:35 +00:00
										 |  |  | def _hashtagLogicalNot(tree: [], hashtags: [], moderated: bool, | 
					
						
							|  |  |  |                        content: str, url: str) -> bool: | 
					
						
							|  |  |  |     """ NOT
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if len(tree) != 2: | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  |     if isinstance(tree[1], str): | 
					
						
							|  |  |  |         return tree[1] not in hashtags | 
					
						
							|  |  |  |     elif isinstance(tree[1], list): | 
					
						
							|  |  |  |         return not hashtagRuleResolve(tree[1], hashtags, | 
					
						
							|  |  |  |                                       moderated, content, url) | 
					
						
							|  |  |  |     return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _hashtagLogicalContains(tree: [], hashtags: [], moderated: bool, | 
					
						
							|  |  |  |                             content: str, url: str) -> bool: | 
					
						
							|  |  |  |     """ Contains
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if len(tree) != 2: | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  |     matchStr = None | 
					
						
							|  |  |  |     if isinstance(tree[1], str): | 
					
						
							|  |  |  |         matchStr = tree[1] | 
					
						
							|  |  |  |     elif isinstance(tree[1], list): | 
					
						
							|  |  |  |         matchStr = tree[1][0] | 
					
						
							|  |  |  |     if matchStr: | 
					
						
							|  |  |  |         if matchStr.startswith('"') and matchStr.endswith('"'): | 
					
						
							|  |  |  |             matchStr = matchStr[1:] | 
					
						
							|  |  |  |             matchStr = matchStr[:len(matchStr) - 1] | 
					
						
							|  |  |  |         matchStrLower = matchStr.lower() | 
					
						
							|  |  |  |         contentWithoutTags = content.replace('#' + matchStrLower, '') | 
					
						
							|  |  |  |         return matchStrLower in contentWithoutTags | 
					
						
							|  |  |  |     return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _hashtagLogicalFrom(tree: [], hashtags: [], moderated: bool, | 
					
						
							|  |  |  |                         content: str, url: str) -> bool: | 
					
						
							|  |  |  |     """ FROM
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if len(tree) != 2: | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  |     matchStr = None | 
					
						
							|  |  |  |     if isinstance(tree[1], str): | 
					
						
							|  |  |  |         matchStr = tree[1] | 
					
						
							|  |  |  |     elif isinstance(tree[1], list): | 
					
						
							|  |  |  |         matchStr = tree[1][0] | 
					
						
							|  |  |  |     if matchStr: | 
					
						
							|  |  |  |         if matchStr.startswith('"') and matchStr.endswith('"'): | 
					
						
							|  |  |  |             matchStr = matchStr[1:] | 
					
						
							|  |  |  |             matchStr = matchStr[:len(matchStr) - 1] | 
					
						
							|  |  |  |         return matchStr.lower() in url | 
					
						
							|  |  |  |     return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _hashtagLogicalAnd(tree: [], hashtags: [], moderated: bool, | 
					
						
							|  |  |  |                        content: str, url: str) -> bool: | 
					
						
							|  |  |  |     """ AND
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if len(tree) < 3: | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  |     for argIndex in range(1, len(tree)): | 
					
						
							|  |  |  |         argValue = False | 
					
						
							|  |  |  |         if isinstance(tree[argIndex], str): | 
					
						
							|  |  |  |             argValue = (tree[argIndex] in hashtags) | 
					
						
							|  |  |  |         elif isinstance(tree[argIndex], list): | 
					
						
							|  |  |  |             argValue = hashtagRuleResolve(tree[argIndex], | 
					
						
							|  |  |  |                                           hashtags, moderated, | 
					
						
							|  |  |  |                                           content, url) | 
					
						
							|  |  |  |         if not argValue: | 
					
						
							|  |  |  |             return False | 
					
						
							|  |  |  |     return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _hashtagLogicalOr(tree: [], hashtags: [], moderated: bool, | 
					
						
							|  |  |  |                       content: str, url: str) -> bool: | 
					
						
							|  |  |  |     """ OR
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if len(tree) < 3: | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  |     for argIndex in range(1, len(tree)): | 
					
						
							|  |  |  |         argValue = False | 
					
						
							|  |  |  |         if isinstance(tree[argIndex], str): | 
					
						
							|  |  |  |             argValue = (tree[argIndex] in hashtags) | 
					
						
							|  |  |  |         elif isinstance(tree[argIndex], list): | 
					
						
							|  |  |  |             argValue = hashtagRuleResolve(tree[argIndex], | 
					
						
							|  |  |  |                                           hashtags, moderated, | 
					
						
							|  |  |  |                                           content, url) | 
					
						
							|  |  |  |         if argValue: | 
					
						
							|  |  |  |             return True | 
					
						
							|  |  |  |     return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _hashtagLogicalXor(tree: [], hashtags: [], moderated: bool, | 
					
						
							|  |  |  |                        content: str, url: str) -> bool: | 
					
						
							|  |  |  |     """ XOR
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if len(tree) < 3: | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  |     trueCtr = 0 | 
					
						
							|  |  |  |     for argIndex in range(1, len(tree)): | 
					
						
							|  |  |  |         argValue = False | 
					
						
							|  |  |  |         if isinstance(tree[argIndex], str): | 
					
						
							|  |  |  |             argValue = (tree[argIndex] in hashtags) | 
					
						
							|  |  |  |         elif isinstance(tree[argIndex], list): | 
					
						
							|  |  |  |             argValue = hashtagRuleResolve(tree[argIndex], | 
					
						
							|  |  |  |                                           hashtags, moderated, | 
					
						
							|  |  |  |                                           content, url) | 
					
						
							|  |  |  |         if argValue: | 
					
						
							|  |  |  |             trueCtr += 1 | 
					
						
							|  |  |  |     if trueCtr == 1: | 
					
						
							|  |  |  |         return True | 
					
						
							|  |  |  |     return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  | def hashtagRuleResolve(tree: [], hashtags: [], moderated: bool, | 
					
						
							| 
									
										
										
										
											2020-10-20 17:37:15 +00:00
										 |  |  |                        content: str, url: str) -> bool: | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |     """Returns whether the tree for a hashtag rule evaluates to true or false
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if not tree: | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if tree[0] == 'not': | 
					
						
							| 
									
										
										
										
											2021-07-04 09:24:35 +00:00
										 |  |  |         return _hashtagLogicalNot(tree, hashtags, moderated, content, url) | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |     elif tree[0] == 'contains': | 
					
						
							| 
									
										
										
										
											2021-07-04 09:24:35 +00:00
										 |  |  |         return _hashtagLogicalContains(tree, hashtags, moderated, content, url) | 
					
						
							| 
									
										
										
										
											2020-10-20 17:37:15 +00:00
										 |  |  |     elif tree[0] == 'from': | 
					
						
							| 
									
										
										
										
											2021-07-04 09:24:35 +00:00
										 |  |  |         return _hashtagLogicalFrom(tree, hashtags, moderated, content, url) | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |     elif tree[0] == 'and': | 
					
						
							| 
									
										
										
										
											2021-07-04 09:24:35 +00:00
										 |  |  |         return _hashtagLogicalAnd(tree, hashtags, moderated, content, url) | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |     elif tree[0] == 'or': | 
					
						
							| 
									
										
										
										
											2021-07-04 09:24:35 +00:00
										 |  |  |         return _hashtagLogicalOr(tree, hashtags, moderated, content, url) | 
					
						
							| 
									
										
										
										
											2020-10-20 09:43:30 +00:00
										 |  |  |     elif tree[0] == 'xor': | 
					
						
							| 
									
										
										
										
											2021-07-04 09:24:35 +00:00
										 |  |  |         return _hashtagLogicalXor(tree, hashtags, moderated, content, url) | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |     elif tree[0].startswith('#') and len(tree) == 1: | 
					
						
							|  |  |  |         return tree[0] in hashtags | 
					
						
							| 
									
										
										
										
											2020-10-17 17:36:10 +00:00
										 |  |  |     elif tree[0].startswith('moderated'): | 
					
						
							|  |  |  |         return moderated | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |     elif tree[0].startswith('"') and tree[0].endswith('"'): | 
					
						
							|  |  |  |         return True | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def hashtagRuleTree(operators: [], | 
					
						
							|  |  |  |                     conditionsStr: str, | 
					
						
							| 
									
										
										
										
											2020-10-17 17:36:10 +00:00
										 |  |  |                     tagsInConditions: [], | 
					
						
							|  |  |  |                     moderated: bool) -> []: | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |     """Walks the tree
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if not operators and conditionsStr: | 
					
						
							|  |  |  |         conditionsStr = conditionsStr.strip() | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |         isStr = conditionsStr.startswith('"') and conditionsStr.endswith('"') | 
					
						
							|  |  |  |         if conditionsStr.startswith('#') or isStr or \ | 
					
						
							| 
									
										
										
										
											2020-10-17 17:36:10 +00:00
										 |  |  |            conditionsStr in operators or \ | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |            conditionsStr == 'moderated' or \ | 
					
						
							|  |  |  |            conditionsStr == 'contains': | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |             if conditionsStr.startswith('#'): | 
					
						
							|  |  |  |                 if conditionsStr not in tagsInConditions: | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |                     if ' ' not in conditionsStr or \ | 
					
						
							|  |  |  |                        conditionsStr.startswith('"'): | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |                         tagsInConditions.append(conditionsStr) | 
					
						
							|  |  |  |             return [conditionsStr.strip()] | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             return None | 
					
						
							|  |  |  |     if not operators or not conditionsStr: | 
					
						
							|  |  |  |         return None | 
					
						
							|  |  |  |     tree = None | 
					
						
							|  |  |  |     conditionsStr = conditionsStr.strip() | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |     isStr = conditionsStr.startswith('"') and conditionsStr.endswith('"') | 
					
						
							|  |  |  |     if conditionsStr.startswith('#') or isStr or \ | 
					
						
							| 
									
										
										
										
											2020-10-17 17:36:10 +00:00
										 |  |  |        conditionsStr in operators or \ | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |        conditionsStr == 'moderated' or \ | 
					
						
							|  |  |  |        conditionsStr == 'contains': | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |         if conditionsStr.startswith('#'): | 
					
						
							|  |  |  |             if conditionsStr not in tagsInConditions: | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |                 if ' ' not in conditionsStr or \ | 
					
						
							|  |  |  |                    conditionsStr.startswith('"'): | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |                     tagsInConditions.append(conditionsStr) | 
					
						
							|  |  |  |         tree = [conditionsStr.strip()] | 
					
						
							|  |  |  |     ctr = 0 | 
					
						
							|  |  |  |     while ctr < len(operators): | 
					
						
							|  |  |  |         op = operators[ctr] | 
					
						
							| 
									
										
										
										
											2020-10-18 15:10:36 +00:00
										 |  |  |         opMatch = ' ' + op + ' ' | 
					
						
							|  |  |  |         if opMatch not in conditionsStr and \ | 
					
						
							|  |  |  |            not conditionsStr.startswith(op + ' '): | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |             ctr += 1 | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             tree = [op] | 
					
						
							| 
									
										
										
										
											2020-10-18 15:10:36 +00:00
										 |  |  |             if opMatch in conditionsStr: | 
					
						
							|  |  |  |                 sections = conditionsStr.split(opMatch) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 sections = conditionsStr.split(op + ' ', 1) | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |             for subConditionStr in sections: | 
					
						
							| 
									
										
										
										
											2020-10-18 15:10:36 +00:00
										 |  |  |                 result = hashtagRuleTree(operators[ctr + 1:], | 
					
						
							|  |  |  |                                          subConditionStr, | 
					
						
							| 
									
										
										
										
											2020-10-17 17:36:10 +00:00
										 |  |  |                                          tagsInConditions, moderated) | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |                 if result: | 
					
						
							|  |  |  |                     tree.append(result) | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |     return tree | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-26 10:00:46 +00:00
										 |  |  | def _hashtagAdd(base_dir: str, http_prefix: str, domain_full: str, | 
					
						
							| 
									
										
										
										
											2021-12-25 22:09:19 +00:00
										 |  |  |                 post_json_object: {}, | 
					
						
							| 
									
										
										
										
											2021-12-25 23:03:28 +00:00
										 |  |  |                 actionStr: str, hashtags: [], system_language: str, | 
					
						
							| 
									
										
										
										
											2021-10-20 13:33:34 +00:00
										 |  |  |                 translate: {}) -> None: | 
					
						
							| 
									
										
										
										
											2021-07-04 09:46:48 +00:00
										 |  |  |     """Adds a hashtag via a hashtag rule
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     addHashtag = actionStr.split('add ', 1)[1].strip() | 
					
						
							|  |  |  |     if not addHashtag.startswith('#'): | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if addHashtag not in hashtags: | 
					
						
							|  |  |  |         hashtags.append(addHashtag) | 
					
						
							|  |  |  |     htId = addHashtag.replace('#', '') | 
					
						
							|  |  |  |     if not validHashTag(htId): | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-26 10:00:46 +00:00
										 |  |  |     hashtagUrl = http_prefix + "://" + domain_full + "/tags/" + htId | 
					
						
							| 
									
										
										
										
											2021-07-04 09:46:48 +00:00
										 |  |  |     newTag = { | 
					
						
							|  |  |  |         'href': hashtagUrl, | 
					
						
							|  |  |  |         'name': addHashtag, | 
					
						
							|  |  |  |         'type': 'Hashtag' | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     # does the tag already exist? | 
					
						
							|  |  |  |     addTagObject = None | 
					
						
							| 
									
										
										
										
											2021-12-25 22:09:19 +00:00
										 |  |  |     for t in post_json_object['object']['tag']: | 
					
						
							| 
									
										
										
										
											2021-07-04 09:46:48 +00:00
										 |  |  |         if t.get('type') and t.get('name'): | 
					
						
							|  |  |  |             if t['type'] == 'Hashtag' and \ | 
					
						
							|  |  |  |                t['name'] == addHashtag: | 
					
						
							|  |  |  |                 addTagObject = t | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |     # append the tag if it wasn't found | 
					
						
							|  |  |  |     if not addTagObject: | 
					
						
							| 
									
										
										
										
											2021-12-25 22:09:19 +00:00
										 |  |  |         post_json_object['object']['tag'].append(newTag) | 
					
						
							| 
									
										
										
										
											2021-07-04 09:46:48 +00:00
										 |  |  |     # add corresponding html to the post content | 
					
						
							|  |  |  |     hashtagHtml = \ | 
					
						
							|  |  |  |         " <a href=\"" + hashtagUrl + "\" class=\"addedHashtag\" " + \ | 
					
						
							|  |  |  |         "rel=\"tag\">#<span>" + htId + "</span></a>" | 
					
						
							| 
									
										
										
										
											2021-12-26 11:29:40 +00:00
										 |  |  |     content = get_base_content_from_post(post_json_object, system_language) | 
					
						
							| 
									
										
										
										
											2021-07-04 09:46:48 +00:00
										 |  |  |     if hashtagHtml in content: | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if content.endswith('</p>'): | 
					
						
							|  |  |  |         content = \ | 
					
						
							|  |  |  |             content[:len(content) - len('</p>')] + \ | 
					
						
							|  |  |  |             hashtagHtml + '</p>' | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         content += hashtagHtml | 
					
						
							| 
									
										
										
										
											2021-12-25 22:09:19 +00:00
										 |  |  |     post_json_object['object']['content'] = content | 
					
						
							| 
									
										
										
										
											2021-12-26 10:00:46 +00:00
										 |  |  |     domain = domain_full | 
					
						
							| 
									
										
										
										
											2021-10-20 13:33:34 +00:00
										 |  |  |     if ':' in domain: | 
					
						
							|  |  |  |         domain = domain.split(':')[0] | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |     storeHashTags(base_dir, 'news', domain, | 
					
						
							| 
									
										
										
										
											2021-12-26 10:00:46 +00:00
										 |  |  |                   http_prefix, domain_full, | 
					
						
							| 
									
										
										
										
											2021-12-25 22:09:19 +00:00
										 |  |  |                   post_json_object, translate) | 
					
						
							| 
									
										
										
										
											2021-07-04 09:46:48 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-26 10:00:46 +00:00
										 |  |  | def _hashtagRemove(http_prefix: str, domain_full: str, post_json_object: {}, | 
					
						
							| 
									
										
										
										
											2021-12-25 23:03:28 +00:00
										 |  |  |                    actionStr: str, hashtags: [], system_language: str) -> None: | 
					
						
							| 
									
										
										
										
											2021-07-04 09:46:48 +00:00
										 |  |  |     """Removes a hashtag via a hashtag rule
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     rmHashtag = actionStr.split('remove ', 1)[1].strip() | 
					
						
							|  |  |  |     if not rmHashtag.startswith('#'): | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if rmHashtag in hashtags: | 
					
						
							|  |  |  |         hashtags.remove(rmHashtag) | 
					
						
							|  |  |  |     htId = rmHashtag.replace('#', '') | 
					
						
							| 
									
										
										
										
											2021-12-26 10:00:46 +00:00
										 |  |  |     hashtagUrl = http_prefix + "://" + domain_full + "/tags/" + htId | 
					
						
							| 
									
										
										
										
											2021-07-04 09:46:48 +00:00
										 |  |  |     # remove tag html from the post content | 
					
						
							|  |  |  |     hashtagHtml = \ | 
					
						
							|  |  |  |         "<a href=\"" + hashtagUrl + "\" class=\"addedHashtag\" " + \ | 
					
						
							|  |  |  |         "rel=\"tag\">#<span>" + htId + "</span></a>" | 
					
						
							| 
									
										
										
										
											2021-12-26 11:29:40 +00:00
										 |  |  |     content = get_base_content_from_post(post_json_object, system_language) | 
					
						
							| 
									
										
										
										
											2021-07-04 09:46:48 +00:00
										 |  |  |     if hashtagHtml in content: | 
					
						
							|  |  |  |         content = content.replace(hashtagHtml, '').replace('  ', ' ') | 
					
						
							| 
									
										
										
										
											2021-12-25 22:09:19 +00:00
										 |  |  |         post_json_object['object']['content'] = content | 
					
						
							| 
									
										
										
										
											2021-12-25 23:03:28 +00:00
										 |  |  |         post_json_object['object']['contentMap'][system_language] = content | 
					
						
							| 
									
										
										
										
											2021-07-04 09:46:48 +00:00
										 |  |  |     rmTagObject = None | 
					
						
							| 
									
										
										
										
											2021-12-25 22:09:19 +00:00
										 |  |  |     for t in post_json_object['object']['tag']: | 
					
						
							| 
									
										
										
										
											2021-07-04 09:46:48 +00:00
										 |  |  |         if t.get('type') and t.get('name'): | 
					
						
							|  |  |  |             if t['type'] == 'Hashtag' and \ | 
					
						
							|  |  |  |                t['name'] == rmHashtag: | 
					
						
							|  |  |  |                 rmTagObject = t | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |     if rmTagObject: | 
					
						
							| 
									
										
										
										
											2021-12-25 22:09:19 +00:00
										 |  |  |         post_json_object['object']['tag'].remove(rmTagObject) | 
					
						
							| 
									
										
										
										
											2021-07-04 09:46:48 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-25 22:09:19 +00:00
										 |  |  | def _newswireHashtagProcessing(session, base_dir: str, post_json_object: {}, | 
					
						
							| 
									
										
										
										
											2021-12-25 17:09:22 +00:00
										 |  |  |                                hashtags: [], http_prefix: str, | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |                                domain: str, port: int, | 
					
						
							| 
									
										
										
										
											2021-12-25 22:17:49 +00:00
										 |  |  |                                person_cache: {}, | 
					
						
							| 
									
										
										
										
											2021-12-25 22:28:18 +00:00
										 |  |  |                                cached_webfingers: {}, | 
					
						
							| 
									
										
										
										
											2021-12-25 23:45:30 +00:00
										 |  |  |                                federation_list: [], | 
					
						
							| 
									
										
										
										
											2021-12-25 21:37:41 +00:00
										 |  |  |                                send_threads: [], postLog: [], | 
					
						
							| 
									
										
										
										
											2021-07-18 14:15:16 +00:00
										 |  |  |                                moderated: bool, url: str, | 
					
						
							| 
									
										
										
										
											2021-12-25 23:03:28 +00:00
										 |  |  |                                system_language: str, | 
					
						
							| 
									
										
										
										
											2021-10-20 13:33:34 +00:00
										 |  |  |                                translate: {}) -> bool: | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  |     """Applies hashtag rules to a news post.
 | 
					
						
							|  |  |  |     Returns true if the post should be saved to the news timeline | 
					
						
							|  |  |  |     of this instance | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |     rulesFilename = base_dir + '/accounts/hashtagrules.txt' | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |     if not os.path.isfile(rulesFilename): | 
					
						
							|  |  |  |         return True | 
					
						
							|  |  |  |     rules = [] | 
					
						
							| 
									
										
										
										
											2021-07-13 14:40:49 +00:00
										 |  |  |     with open(rulesFilename, 'r') as f: | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |         rules = f.readlines() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-26 10:00:46 +00:00
										 |  |  |     domain_full = getFullDomain(domain, port) | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  |     # get the full text content of the post | 
					
						
							|  |  |  |     content = '' | 
					
						
							| 
									
										
										
										
											2021-12-25 22:09:19 +00:00
										 |  |  |     if post_json_object['object'].get('content'): | 
					
						
							| 
									
										
										
										
											2021-12-26 11:29:40 +00:00
										 |  |  |         content += get_base_content_from_post(post_json_object, | 
					
						
							|  |  |  |                                               system_language) | 
					
						
							| 
									
										
										
										
											2021-12-25 22:09:19 +00:00
										 |  |  |     if post_json_object['object'].get('summary'): | 
					
						
							|  |  |  |         content += ' ' + post_json_object['object']['summary'] | 
					
						
							| 
									
										
										
										
											2020-10-17 19:04:39 +00:00
										 |  |  |     content = content.lower() | 
					
						
							| 
									
										
										
										
											2020-10-17 18:49:43 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-17 13:41:20 +00:00
										 |  |  |     # actionOccurred = False | 
					
						
							| 
									
										
										
										
											2020-10-20 17:37:15 +00:00
										 |  |  |     operators = ('not', 'and', 'or', 'xor', 'from', 'contains') | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |     for ruleStr in rules: | 
					
						
							|  |  |  |         if not ruleStr: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if not ruleStr.startswith('if '): | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if ' then ' not in ruleStr: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         conditionsStr = ruleStr.split('if ', 1)[1] | 
					
						
							|  |  |  |         conditionsStr = conditionsStr.split(' then ')[0] | 
					
						
							|  |  |  |         tagsInConditions = [] | 
					
						
							| 
									
										
										
										
											2020-10-17 17:36:10 +00:00
										 |  |  |         tree = hashtagRuleTree(operators, conditionsStr, | 
					
						
							|  |  |  |                                tagsInConditions, moderated) | 
					
						
							| 
									
										
										
										
											2020-10-20 17:37:15 +00:00
										 |  |  |         if not hashtagRuleResolve(tree, hashtags, moderated, content, url): | 
					
						
							| 
									
										
										
										
											2020-10-17 12:05:41 +00:00
										 |  |  |             continue | 
					
						
							|  |  |  |         # the condition matches, so do something | 
					
						
							|  |  |  |         actionStr = ruleStr.split(' then ')[1].strip() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if actionStr.startswith('add '): | 
					
						
							| 
									
										
										
										
											2021-07-04 09:46:48 +00:00
										 |  |  |             # add a hashtag | 
					
						
							| 
									
										
										
										
											2021-12-26 10:00:46 +00:00
										 |  |  |             _hashtagAdd(base_dir, http_prefix, domain_full, | 
					
						
							| 
									
										
										
										
											2021-12-25 23:03:28 +00:00
										 |  |  |                         post_json_object, actionStr, hashtags, system_language, | 
					
						
							| 
									
										
										
										
											2021-10-20 13:33:34 +00:00
										 |  |  |                         translate) | 
					
						
							| 
									
										
										
										
											2021-07-04 09:46:48 +00:00
										 |  |  |         elif actionStr.startswith('remove '): | 
					
						
							|  |  |  |             # remove a hashtag | 
					
						
							| 
									
										
										
										
											2021-12-26 10:00:46 +00:00
										 |  |  |             _hashtagRemove(http_prefix, domain_full, post_json_object, | 
					
						
							| 
									
										
										
										
											2021-12-25 23:03:28 +00:00
										 |  |  |                            actionStr, hashtags, system_language) | 
					
						
							| 
									
										
										
										
											2021-07-04 09:46:48 +00:00
										 |  |  |         elif actionStr.startswith('block') or actionStr.startswith('drop'): | 
					
						
							|  |  |  |             # Block this item | 
					
						
							| 
									
										
										
										
											2020-10-17 16:24:47 +00:00
										 |  |  |             return False | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  |     return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  | def _createNewsMirror(base_dir: str, domain: str, | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |                       postIdNumber: str, url: str, | 
					
						
							| 
									
										
										
										
											2021-12-25 19:42:14 +00:00
										 |  |  |                       max_mirrored_articles: int) -> bool: | 
					
						
							| 
									
										
										
										
											2020-10-19 16:33:58 +00:00
										 |  |  |     """Creates a local mirror of a news article
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  |     if '|' in url or '>' in url: | 
					
						
							|  |  |  |         return True | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |     mirrorDir = base_dir + '/accounts/newsmirror' | 
					
						
							| 
									
										
										
										
											2020-10-19 16:33:58 +00:00
										 |  |  |     if not os.path.isdir(mirrorDir): | 
					
						
							|  |  |  |         os.mkdir(mirrorDir) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  |     # count the directories | 
					
						
							|  |  |  |     noOfDirs = 0 | 
					
						
							|  |  |  |     for subdir, dirs, files in os.walk(mirrorDir): | 
					
						
							|  |  |  |         noOfDirs = len(dirs) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |     mirrorIndexFilename = base_dir + '/accounts/newsmirror.txt' | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-25 19:42:14 +00:00
										 |  |  |     if max_mirrored_articles > 0 and noOfDirs > max_mirrored_articles: | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  |         if not os.path.isfile(mirrorIndexFilename): | 
					
						
							|  |  |  |             # no index for mirrors found | 
					
						
							|  |  |  |             return True | 
					
						
							|  |  |  |         removals = [] | 
					
						
							|  |  |  |         with open(mirrorIndexFilename, 'r') as indexFile: | 
					
						
							|  |  |  |             # remove the oldest directories | 
					
						
							|  |  |  |             ctr = 0 | 
					
						
							| 
									
										
										
										
											2021-12-25 19:42:14 +00:00
										 |  |  |             while noOfDirs > max_mirrored_articles: | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  |                 ctr += 1 | 
					
						
							|  |  |  |                 if ctr > 5000: | 
					
						
							|  |  |  |                     # escape valve | 
					
						
							|  |  |  |                     break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 postId = indexFile.readline() | 
					
						
							|  |  |  |                 if not postId: | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |                 postId = postId.strip() | 
					
						
							|  |  |  |                 mirrorArticleDir = mirrorDir + '/' + postId | 
					
						
							|  |  |  |                 if os.path.isdir(mirrorArticleDir): | 
					
						
							| 
									
										
										
										
											2021-10-29 18:48:15 +00:00
										 |  |  |                     rmtree(mirrorArticleDir, ignore_errors=False, onerror=None) | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  |                     removals.append(postId) | 
					
						
							|  |  |  |                     noOfDirs -= 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # remove the corresponding index entries | 
					
						
							|  |  |  |         if removals: | 
					
						
							| 
									
										
										
										
											2021-06-21 22:52:04 +00:00
										 |  |  |             indexContent = '' | 
					
						
							|  |  |  |             with open(mirrorIndexFilename, 'r') as indexFile: | 
					
						
							|  |  |  |                 indexContent = indexFile.read() | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  |                 for removePostId in removals: | 
					
						
							|  |  |  |                     indexContent = \ | 
					
						
							|  |  |  |                         indexContent.replace(removePostId + '\n', '') | 
					
						
							| 
									
										
										
										
											2021-11-25 21:18:53 +00:00
										 |  |  |             try: | 
					
						
							|  |  |  |                 with open(mirrorIndexFilename, 'w+') as indexFile: | 
					
						
							|  |  |  |                     indexFile.write(indexContent) | 
					
						
							|  |  |  |             except OSError: | 
					
						
							| 
									
										
										
										
											2021-11-25 22:22:54 +00:00
										 |  |  |                 print('EX: unable to write ' + mirrorIndexFilename) | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     mirrorArticleDir = mirrorDir + '/' + postIdNumber | 
					
						
							|  |  |  |     if os.path.isdir(mirrorArticleDir): | 
					
						
							|  |  |  |         # already mirrored | 
					
						
							|  |  |  |         return True | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-20 09:27:58 +00:00
										 |  |  |     # for onion instances mirror via tor | 
					
						
							|  |  |  |     prefixStr = '' | 
					
						
							|  |  |  |     if domain.endswith('.onion'): | 
					
						
							|  |  |  |         prefixStr = '/usr/bin/torsocks ' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  |     # download the files | 
					
						
							|  |  |  |     commandStr = \ | 
					
						
							| 
									
										
										
										
											2020-10-20 09:27:58 +00:00
										 |  |  |         prefixStr + '/usr/bin/wget -mkEpnp -e robots=off ' + url + \ | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  |         ' -P ' + mirrorArticleDir | 
					
						
							|  |  |  |     p = Popen(commandStr, shell=True) | 
					
						
							|  |  |  |     os.waitpid(p.pid, 0) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if not os.path.isdir(mirrorArticleDir): | 
					
						
							| 
									
										
										
										
											2020-10-20 09:27:58 +00:00
										 |  |  |         print('WARN: failed to mirror ' + url) | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  |         return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # append the post Id number to the index file | 
					
						
							|  |  |  |     if os.path.isfile(mirrorIndexFilename): | 
					
						
							| 
									
										
										
										
											2021-11-25 21:18:53 +00:00
										 |  |  |         try: | 
					
						
							|  |  |  |             with open(mirrorIndexFilename, 'a+') as indexFile: | 
					
						
							|  |  |  |                 indexFile.write(postIdNumber + '\n') | 
					
						
							|  |  |  |         except OSError: | 
					
						
							| 
									
										
										
										
											2021-11-25 22:22:54 +00:00
										 |  |  |             print('EX: unable to append ' + mirrorIndexFilename) | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  |     else: | 
					
						
							| 
									
										
										
										
											2021-11-25 21:18:53 +00:00
										 |  |  |         try: | 
					
						
							|  |  |  |             with open(mirrorIndexFilename, 'w+') as indexFile: | 
					
						
							|  |  |  |                 indexFile.write(postIdNumber + '\n') | 
					
						
							|  |  |  |         except OSError: | 
					
						
							| 
									
										
										
										
											2021-11-25 22:22:54 +00:00
										 |  |  |             print('EX: unable to write ' + mirrorIndexFilename) | 
					
						
							| 
									
										
										
										
											2020-10-19 19:26:58 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-19 16:33:58 +00:00
										 |  |  |     return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-25 17:09:22 +00:00
										 |  |  | def _convertRSStoActivityPub(base_dir: str, http_prefix: str, | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |                              domain: str, port: int, | 
					
						
							|  |  |  |                              newswire: {}, | 
					
						
							|  |  |  |                              translate: {}, | 
					
						
							| 
									
										
										
										
											2021-12-25 20:28:06 +00:00
										 |  |  |                              recentPostsCache: {}, max_recent_posts: int, | 
					
						
							| 
									
										
										
										
											2021-12-25 22:28:18 +00:00
										 |  |  |                              session, cached_webfingers: {}, | 
					
						
							| 
									
										
										
										
											2021-12-25 22:17:49 +00:00
										 |  |  |                              person_cache: {}, | 
					
						
							| 
									
										
										
										
											2021-12-25 23:45:30 +00:00
										 |  |  |                              federation_list: [], | 
					
						
							| 
									
										
										
										
											2021-12-25 21:37:41 +00:00
										 |  |  |                              send_threads: [], postLog: [], | 
					
						
							| 
									
										
										
										
											2021-12-25 19:42:14 +00:00
										 |  |  |                              max_mirrored_articles: int, | 
					
						
							| 
									
										
										
										
											2021-12-25 18:54:50 +00:00
										 |  |  |                              allow_local_network_access: bool, | 
					
						
							| 
									
										
										
										
											2021-12-25 23:03:28 +00:00
										 |  |  |                              system_language: str, | 
					
						
							| 
									
										
										
										
											2021-12-25 18:20:56 +00:00
										 |  |  |                              low_bandwidth: bool, | 
					
						
							| 
									
										
										
										
											2021-12-25 17:13:38 +00:00
										 |  |  |                              content_license_url: str) -> None: | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |     """Converts rss items in a newswire into posts
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-11-03 14:41:28 +00:00
										 |  |  |     if not newswire: | 
					
						
							| 
									
										
										
										
											2021-09-15 17:43:06 +00:00
										 |  |  |         print('No newswire to convert') | 
					
						
							| 
									
										
										
										
											2020-11-03 14:41:28 +00:00
										 |  |  |         return | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |     basePath = base_dir + '/accounts/news@' + domain + '/outbox' | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |     if not os.path.isdir(basePath): | 
					
						
							|  |  |  |         os.mkdir(basePath) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-09 10:05:01 +00:00
										 |  |  |     # oldest items first | 
					
						
							| 
									
										
										
										
											2021-07-13 21:59:53 +00:00
										 |  |  |     newswireReverse = OrderedDict(sorted(newswire.items(), reverse=False)) | 
					
						
							| 
									
										
										
										
											2020-10-07 18:46:42 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     for dateStr, item in newswireReverse.items(): | 
					
						
							| 
									
										
										
										
											2020-10-07 20:03:39 +00:00
										 |  |  |         originalDateStr = dateStr | 
					
						
							| 
									
										
										
										
											2020-10-07 14:10:06 +00:00
										 |  |  |         # convert the date to the format used by ActivityPub | 
					
						
							| 
									
										
										
										
											2020-10-20 12:37:32 +00:00
										 |  |  |         if '+00:00' in dateStr: | 
					
						
							|  |  |  |             dateStr = dateStr.replace(' ', 'T') | 
					
						
							|  |  |  |             dateStr = dateStr.replace('+00:00', 'Z') | 
					
						
							|  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2021-09-15 17:43:06 +00:00
										 |  |  |             try: | 
					
						
							|  |  |  |                 dateStrWithOffset = \ | 
					
						
							|  |  |  |                     datetime.datetime.strptime(dateStr, "%Y-%m-%d %H:%M:%S%z") | 
					
						
							|  |  |  |             except BaseException: | 
					
						
							| 
									
										
										
										
											2021-10-29 18:48:15 +00:00
										 |  |  |                 print('EX: Newswire strptime failed ' + str(dateStr)) | 
					
						
							| 
									
										
										
										
											2021-09-15 17:43:06 +00:00
										 |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2021-09-15 19:04:29 +00:00
										 |  |  |             try: | 
					
						
							|  |  |  |                 dateStr = dateStrWithOffset.strftime("%Y-%m-%dT%H:%M:%SZ") | 
					
						
							|  |  |  |             except BaseException: | 
					
						
							| 
									
										
										
										
											2021-10-29 18:48:15 +00:00
										 |  |  |                 print('EX: Newswire dateStrWithOffset failed ' + | 
					
						
							| 
									
										
										
										
											2021-09-15 19:04:29 +00:00
										 |  |  |                       str(dateStrWithOffset)) | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |         statusNumber, published = getStatusNumber(dateStr) | 
					
						
							|  |  |  |         newPostId = \ | 
					
						
							| 
									
										
										
										
											2021-12-26 10:19:59 +00:00
										 |  |  |             local_actor_url(http_prefix, 'news', domain) + \ | 
					
						
							| 
									
										
										
										
											2021-08-14 11:13:39 +00:00
										 |  |  |             '/statuses/' + statusNumber | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 14:10:06 +00:00
										 |  |  |         # file where the post is stored | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |         filename = basePath + '/' + newPostId.replace('/', '#') + '.json' | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |         if os.path.isfile(filename): | 
					
						
							| 
									
										
										
										
											2020-10-08 12:52:15 +00:00
										 |  |  |             # don't create the post if it already exists | 
					
						
							| 
									
										
										
										
											2020-10-08 14:35:26 +00:00
										 |  |  |             # set the url | 
					
						
							| 
									
										
										
										
											2020-11-08 18:29:01 +00:00
										 |  |  |             # newswire[originalDateStr][1] = \ | 
					
						
							|  |  |  |             #     '/users/news/statuses/' + statusNumber | 
					
						
							| 
									
										
										
										
											2020-10-08 14:35:26 +00:00
										 |  |  |             # set the filename | 
					
						
							|  |  |  |             newswire[originalDateStr][3] = filename | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |             continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |         rssTitle = _removeControlCharacters(item[0]) | 
					
						
							| 
									
										
										
										
											2020-10-10 09:53:56 +00:00
										 |  |  |         url = item[1] | 
					
						
							| 
									
										
										
										
											2021-12-25 18:54:50 +00:00
										 |  |  |         if dangerousMarkup(url, allow_local_network_access) or \ | 
					
						
							|  |  |  |            dangerousMarkup(rssTitle, allow_local_network_access): | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-10-07 13:55:27 +00:00
										 |  |  |         rssDescription = '' | 
					
						
							| 
									
										
										
										
											2020-10-07 14:10:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # get the rss description if it exists | 
					
						
							| 
									
										
										
										
											2021-01-11 21:38:31 +00:00
										 |  |  |         rssDescription = '<p>' + removeHtml(item[4]) + '<p>' | 
					
						
							| 
									
										
										
										
											2020-10-07 14:10:06 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-19 20:43:27 +00:00
										 |  |  |         mirrored = item[7] | 
					
						
							|  |  |  |         postUrl = url | 
					
						
							|  |  |  |         if mirrored and '://' in url: | 
					
						
							| 
									
										
										
										
											2020-10-19 22:17:06 +00:00
										 |  |  |             postUrl = '/newsmirror/' + statusNumber + '/' + \ | 
					
						
							| 
									
										
										
										
											2020-10-19 22:21:30 +00:00
										 |  |  |                 url.split('://')[1] | 
					
						
							|  |  |  |             if postUrl.endswith('/'): | 
					
						
							|  |  |  |                 postUrl += 'index.html' | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 postUrl += '/index.html' | 
					
						
							| 
									
										
										
										
											2020-10-19 20:43:27 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 14:10:06 +00:00
										 |  |  |         # add the off-site link to the description | 
					
						
							| 
									
										
										
										
											2021-01-11 21:38:31 +00:00
										 |  |  |         rssDescription += \ | 
					
						
							|  |  |  |             '<br><a href="' + postUrl + '">' + \ | 
					
						
							|  |  |  |             translate['Read more...'] + '</a>' | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |         followersOnly = False | 
					
						
							| 
									
										
										
										
											2020-10-09 10:08:01 +00:00
										 |  |  |         # NOTE: the id when the post is created will not be | 
					
						
							|  |  |  |         # consistent (it's based on the current time, not the | 
					
						
							|  |  |  |         # published time), so we change that later | 
					
						
							| 
									
										
										
										
											2021-05-09 19:11:05 +00:00
										 |  |  |         saveToFile = False | 
					
						
							|  |  |  |         attachImageFilename = None | 
					
						
							|  |  |  |         mediaType = None | 
					
						
							|  |  |  |         imageDescription = None | 
					
						
							| 
									
										
										
										
											2021-05-09 19:29:53 +00:00
										 |  |  |         city = 'London, England' | 
					
						
							| 
									
										
										
										
											2021-08-08 16:52:32 +00:00
										 |  |  |         conversationId = None | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |         blog = createNewsPost(base_dir, | 
					
						
							| 
									
										
										
										
											2021-12-25 17:09:22 +00:00
										 |  |  |                               domain, port, http_prefix, | 
					
						
							| 
									
										
										
										
											2020-10-11 11:00:28 +00:00
										 |  |  |                               rssDescription, | 
					
						
							| 
									
										
										
										
											2021-05-09 19:11:05 +00:00
										 |  |  |                               followersOnly, saveToFile, | 
					
						
							|  |  |  |                               attachImageFilename, mediaType, | 
					
						
							|  |  |  |                               imageDescription, city, | 
					
						
							| 
									
										
										
										
											2021-12-25 23:03:28 +00:00
										 |  |  |                               rssTitle, system_language, | 
					
						
							| 
									
										
										
										
											2021-12-25 18:20:56 +00:00
										 |  |  |                               conversationId, low_bandwidth, | 
					
						
							| 
									
										
										
										
											2021-12-25 17:13:38 +00:00
										 |  |  |                               content_license_url) | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |         if not blog: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-19 16:33:58 +00:00
										 |  |  |         if mirrored: | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |             if not _createNewsMirror(base_dir, domain, statusNumber, | 
					
						
							| 
									
										
										
										
											2021-12-25 19:42:14 +00:00
										 |  |  |                                      url, max_mirrored_articles): | 
					
						
							| 
									
										
										
										
											2020-10-19 16:33:58 +00:00
										 |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |         idStr = \ | 
					
						
							| 
									
										
										
										
											2021-12-26 10:19:59 +00:00
										 |  |  |             local_actor_url(http_prefix, 'news', domain) + \ | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |             '/statuses/' + statusNumber + '/replies' | 
					
						
							| 
									
										
										
										
											2020-10-08 09:07:45 +00:00
										 |  |  |         blog['news'] = True | 
					
						
							| 
									
										
										
										
											2020-10-09 10:05:01 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # note the time of arrival | 
					
						
							|  |  |  |         currTime = datetime.datetime.utcnow() | 
					
						
							|  |  |  |         blog['object']['arrived'] = currTime.strftime("%Y-%m-%dT%H:%M:%SZ") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-09 10:08:01 +00:00
										 |  |  |         # change the id, based upon the published time | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |         blog['object']['replies']['id'] = idStr | 
					
						
							|  |  |  |         blog['object']['replies']['first']['partOf'] = idStr | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         blog['id'] = newPostId + '/activity' | 
					
						
							|  |  |  |         blog['object']['id'] = newPostId | 
					
						
							|  |  |  |         blog['object']['atomUri'] = newPostId | 
					
						
							|  |  |  |         blog['object']['url'] = \ | 
					
						
							| 
									
										
										
										
											2021-12-25 17:09:22 +00:00
										 |  |  |             http_prefix + '://' + domain + '/@news/' + statusNumber | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |         blog['object']['published'] = dateStr | 
					
						
							| 
									
										
										
										
											2020-10-20 13:07:02 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-20 12:49:12 +00:00
										 |  |  |         blog['object']['content'] = rssDescription | 
					
						
							| 
									
										
										
										
											2021-12-25 23:03:28 +00:00
										 |  |  |         blog['object']['contentMap'][system_language] = rssDescription | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-26 10:00:46 +00:00
										 |  |  |         domain_full = getFullDomain(domain, port) | 
					
						
							| 
									
										
										
										
											2020-10-17 13:59:47 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         hashtags = item[6] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 16:55:15 +00:00
										 |  |  |         postId = newPostId.replace('/', '#') | 
					
						
							| 
									
										
										
										
											2020-10-07 14:10:06 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-09 12:15:20 +00:00
										 |  |  |         moderated = item[5] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |         savePost = _newswireHashtagProcessing(session, base_dir, | 
					
						
							|  |  |  |                                               blog, hashtags, | 
					
						
							| 
									
										
										
										
											2021-12-25 17:09:22 +00:00
										 |  |  |                                               http_prefix, domain, port, | 
					
						
							| 
									
										
										
										
											2021-12-25 22:28:18 +00:00
										 |  |  |                                               person_cache, cached_webfingers, | 
					
						
							| 
									
										
										
										
											2021-12-25 23:45:30 +00:00
										 |  |  |                                               federation_list, | 
					
						
							| 
									
										
										
										
											2021-12-25 21:37:41 +00:00
										 |  |  |                                               send_threads, postLog, | 
					
						
							| 
									
										
										
										
											2021-12-25 23:03:28 +00:00
										 |  |  |                                               moderated, url, system_language, | 
					
						
							| 
									
										
										
										
											2021-10-20 13:33:34 +00:00
										 |  |  |                                               translate) | 
					
						
							| 
									
										
										
										
											2020-10-09 12:15:20 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  |         # save the post and update the index | 
					
						
							|  |  |  |         if savePost: | 
					
						
							| 
									
										
										
										
											2020-10-25 12:00:55 +00:00
										 |  |  |             # ensure that all hashtags are stored in the json | 
					
						
							|  |  |  |             # and appended to the content | 
					
						
							|  |  |  |             blog['object']['tag'] = [] | 
					
						
							| 
									
										
										
										
											2020-10-25 11:22:52 +00:00
										 |  |  |             for tagName in hashtags: | 
					
						
							|  |  |  |                 htId = tagName.replace('#', '') | 
					
						
							|  |  |  |                 hashtagUrl = \ | 
					
						
							| 
									
										
										
										
											2021-12-26 10:00:46 +00:00
										 |  |  |                     http_prefix + "://" + domain_full + "/tags/" + htId | 
					
						
							| 
									
										
										
										
											2020-10-25 11:22:52 +00:00
										 |  |  |                 newTag = { | 
					
						
							|  |  |  |                     'href': hashtagUrl, | 
					
						
							|  |  |  |                     'name': tagName, | 
					
						
							|  |  |  |                     'type': 'Hashtag' | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 blog['object']['tag'].append(newTag) | 
					
						
							| 
									
										
										
										
											2020-10-25 12:00:55 +00:00
										 |  |  |                 hashtagHtml = \ | 
					
						
							| 
									
										
										
										
											2020-10-25 12:57:14 +00:00
										 |  |  |                     " <a href=\"" + hashtagUrl + \ | 
					
						
							| 
									
										
										
										
											2020-10-25 12:00:55 +00:00
										 |  |  |                     "\" class=\"addedHashtag\" " + \ | 
					
						
							|  |  |  |                     "rel=\"tag\">#<span>" + \ | 
					
						
							|  |  |  |                     htId + "</span></a>" | 
					
						
							| 
									
										
										
										
											2021-12-26 11:29:40 +00:00
										 |  |  |                 content = get_base_content_from_post(blog, system_language) | 
					
						
							| 
									
										
										
										
											2020-10-25 14:37:51 +00:00
										 |  |  |                 if hashtagHtml not in content: | 
					
						
							|  |  |  |                     if content.endswith('</p>'): | 
					
						
							|  |  |  |                         content = \ | 
					
						
							|  |  |  |                             content[:len(content) - len('</p>')] + \ | 
					
						
							|  |  |  |                             hashtagHtml + '</p>' | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         content += hashtagHtml | 
					
						
							|  |  |  |                     blog['object']['content'] = content | 
					
						
							| 
									
										
										
										
											2021-12-25 23:03:28 +00:00
										 |  |  |                     blog['object']['contentMap'][system_language] = content | 
					
						
							| 
									
										
										
										
											2020-10-25 11:22:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-25 14:21:29 +00:00
										 |  |  |             # update the newswire tags if new ones have been found by | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |             # _newswireHashtagProcessing | 
					
						
							| 
									
										
										
										
											2020-10-25 14:21:29 +00:00
										 |  |  |             for tag in hashtags: | 
					
						
							|  |  |  |                 if tag not in newswire[originalDateStr][6]: | 
					
						
							|  |  |  |                     newswire[originalDateStr][6].append(tag) | 
					
						
							| 
									
										
										
										
											2020-10-17 13:39:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |             storeHashTags(base_dir, 'news', domain, | 
					
						
							| 
									
										
										
										
											2021-12-26 10:00:46 +00:00
										 |  |  |                           http_prefix, domain_full, | 
					
						
							| 
									
										
										
										
											2021-10-20 13:33:34 +00:00
										 |  |  |                           blog, translate) | 
					
						
							| 
									
										
										
										
											2020-10-17 13:39:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |             clearFromPostCaches(base_dir, recentPostsCache, postId) | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  |             if saveJson(blog, filename): | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |                 _updateFeedsOutboxIndex(base_dir, domain, postId + '.json') | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 # Save a file containing the time when the post arrived | 
					
						
							|  |  |  |                 # this can then later be used to construct the news timeline | 
					
						
							|  |  |  |                 # excluding items during the voting period | 
					
						
							|  |  |  |                 if moderated: | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |                     _saveArrivedTime(base_dir, filename, | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |                                      blog['object']['arrived']) | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  |                 else: | 
					
						
							|  |  |  |                     if os.path.isfile(filename + '.arrived'): | 
					
						
							| 
									
										
										
										
											2021-09-05 10:17:43 +00:00
										 |  |  |                         try: | 
					
						
							|  |  |  |                             os.remove(filename + '.arrived') | 
					
						
							| 
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 |  |  |                         except OSError: | 
					
						
							| 
									
										
										
										
											2021-10-29 18:48:15 +00:00
										 |  |  |                             print('EX: _convertRSStoActivityPub ' + | 
					
						
							|  |  |  |                                   'unable to delete ' + filename + '.arrived') | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-08 16:52:57 +00:00
										 |  |  |                 # setting the url here links to the activitypub object | 
					
						
							|  |  |  |                 # stored locally | 
					
						
							| 
									
										
										
										
											2020-11-08 16:50:50 +00:00
										 |  |  |                 # newswire[originalDateStr][1] = \ | 
					
						
							|  |  |  |                 #     '/users/news/statuses/' + statusNumber | 
					
						
							| 
									
										
										
										
											2020-11-08 16:52:57 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-16 21:33:18 +00:00
										 |  |  |                 # set the filename | 
					
						
							|  |  |  |                 newswire[originalDateStr][3] = filename | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  | def _mergeWithPreviousNewswire(oldNewswire: {}, newNewswire: {}) -> None: | 
					
						
							| 
									
										
										
										
											2020-10-09 09:02:01 +00:00
										 |  |  |     """Preserve any votes or generated activitypub post filename
 | 
					
						
							|  |  |  |     as rss feeds are updated | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-11-03 14:41:28 +00:00
										 |  |  |     if not oldNewswire: | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-09 09:02:01 +00:00
										 |  |  |     for published, fields in oldNewswire.items(): | 
					
						
							|  |  |  |         if not newNewswire.get(published): | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2020-10-13 08:53:59 +00:00
										 |  |  |         for i in range(1, 5): | 
					
						
							|  |  |  |             newNewswire[published][i] = fields[i] | 
					
						
							| 
									
										
										
										
											2020-10-09 09:02:01 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  | def runNewswireDaemon(base_dir: str, httpd, | 
					
						
							| 
									
										
										
										
											2021-12-25 17:09:22 +00:00
										 |  |  |                       http_prefix: str, domain: str, port: int, | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |                       translate: {}) -> None: | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  |     """Periodically updates RSS feeds
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |     newswireStateFilename = base_dir + '/accounts/.newswirestate.json' | 
					
						
							|  |  |  |     refreshFilename = base_dir + '/accounts/.refresh_newswire' | 
					
						
							| 
									
										
										
										
											2020-10-09 09:02:01 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  |     # initial sleep to allow the system to start up | 
					
						
							|  |  |  |     time.sleep(50) | 
					
						
							|  |  |  |     while True: | 
					
						
							|  |  |  |         # has the session been created yet? | 
					
						
							|  |  |  |         if not httpd.session: | 
					
						
							| 
									
										
										
										
											2020-11-03 16:10:54 +00:00
										 |  |  |             print('Newswire daemon waiting for session') | 
					
						
							| 
									
										
										
										
											2021-12-25 21:09:22 +00:00
										 |  |  |             httpd.session = createSession(httpd.proxy_type) | 
					
						
							| 
									
										
										
										
											2020-11-03 16:08:31 +00:00
										 |  |  |             if not httpd.session: | 
					
						
							| 
									
										
										
										
											2020-11-03 16:10:54 +00:00
										 |  |  |                 print('Newswire daemon has no session') | 
					
						
							| 
									
										
										
										
											2020-11-03 16:08:31 +00:00
										 |  |  |                 time.sleep(60) | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 print('Newswire daemon session established') | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # try to update the feeds | 
					
						
							| 
									
										
										
										
											2021-09-15 17:03:20 +00:00
										 |  |  |         print('Updating newswire feeds') | 
					
						
							| 
									
										
										
										
											2020-11-22 20:28:32 +00:00
										 |  |  |         newNewswire = \ | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |             getDictFromNewswire(httpd.session, base_dir, domain, | 
					
						
							| 
									
										
										
										
											2021-12-25 18:49:19 +00:00
										 |  |  |                                 httpd.max_newswire_postsPerSource, | 
					
						
							| 
									
										
										
										
											2021-12-25 20:09:29 +00:00
										 |  |  |                                 httpd.max_newswire_feed_size_kb, | 
					
						
							| 
									
										
										
										
											2020-11-22 20:28:32 +00:00
										 |  |  |                                 httpd.maxTags, | 
					
						
							| 
									
										
										
										
											2021-12-25 18:57:13 +00:00
										 |  |  |                                 httpd.max_feed_item_size_kb, | 
					
						
							| 
									
										
										
										
											2021-12-25 18:49:19 +00:00
										 |  |  |                                 httpd.max_newswire_posts, | 
					
						
							| 
									
										
										
										
											2021-07-18 14:15:16 +00:00
										 |  |  |                                 httpd.maxCategoriesFeedItemSizeKb, | 
					
						
							| 
									
										
										
										
											2021-12-25 23:03:28 +00:00
										 |  |  |                                 httpd.system_language, | 
					
						
							| 
									
										
										
										
											2021-12-16 20:57:30 +00:00
										 |  |  |                                 httpd.debug) | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-09 09:02:01 +00:00
										 |  |  |         if not httpd.newswire: | 
					
						
							| 
									
										
										
										
											2021-09-15 17:03:20 +00:00
										 |  |  |             print('Newswire feeds not updated') | 
					
						
							| 
									
										
										
										
											2020-10-09 09:13:16 +00:00
										 |  |  |             if os.path.isfile(newswireStateFilename): | 
					
						
							| 
									
										
										
										
											2021-09-15 17:03:20 +00:00
										 |  |  |                 print('Loading newswire from file') | 
					
						
							| 
									
										
										
										
											2020-10-09 09:13:16 +00:00
										 |  |  |                 httpd.newswire = loadJson(newswireStateFilename) | 
					
						
							| 
									
										
										
										
											2020-10-09 09:02:01 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-15 17:03:20 +00:00
										 |  |  |         print('Merging with previous newswire') | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |         _mergeWithPreviousNewswire(httpd.newswire, newNewswire) | 
					
						
							| 
									
										
										
										
											2020-10-09 09:02:01 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  |         httpd.newswire = newNewswire | 
					
						
							| 
									
										
										
										
											2020-11-03 21:53:29 +00:00
										 |  |  |         if newNewswire: | 
					
						
							|  |  |  |             saveJson(httpd.newswire, newswireStateFilename) | 
					
						
							|  |  |  |             print('Newswire updated') | 
					
						
							| 
									
										
										
										
											2021-09-15 17:03:20 +00:00
										 |  |  |         else: | 
					
						
							|  |  |  |             print('No new newswire') | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-15 17:03:20 +00:00
										 |  |  |         print('Converting newswire to activitypub format') | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |         _convertRSStoActivityPub(base_dir, | 
					
						
							| 
									
										
										
										
											2021-12-25 17:09:22 +00:00
										 |  |  |                                  http_prefix, domain, port, | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |                                  newNewswire, translate, | 
					
						
							|  |  |  |                                  httpd.recentPostsCache, | 
					
						
							| 
									
										
										
										
											2021-12-25 20:28:06 +00:00
										 |  |  |                                  httpd.max_recent_posts, | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |                                  httpd.session, | 
					
						
							| 
									
										
										
										
											2021-12-25 22:28:18 +00:00
										 |  |  |                                  httpd.cached_webfingers, | 
					
						
							| 
									
										
										
										
											2021-12-25 22:17:49 +00:00
										 |  |  |                                  httpd.person_cache, | 
					
						
							| 
									
										
										
										
											2021-12-25 23:45:30 +00:00
										 |  |  |                                  httpd.federation_list, | 
					
						
							| 
									
										
										
										
											2021-12-25 21:37:41 +00:00
										 |  |  |                                  httpd.send_threads, | 
					
						
							| 
									
										
										
										
											2020-12-22 18:06:23 +00:00
										 |  |  |                                  httpd.postLog, | 
					
						
							| 
									
										
										
										
											2021-12-25 19:42:14 +00:00
										 |  |  |                                  httpd.max_mirrored_articles, | 
					
						
							| 
									
										
										
										
											2021-12-25 18:54:50 +00:00
										 |  |  |                                  httpd.allow_local_network_access, | 
					
						
							| 
									
										
										
										
											2021-12-25 23:03:28 +00:00
										 |  |  |                                  httpd.system_language, | 
					
						
							| 
									
										
										
										
											2021-12-25 18:20:56 +00:00
										 |  |  |                                  httpd.low_bandwidth, | 
					
						
							| 
									
										
										
										
											2021-12-25 17:13:38 +00:00
										 |  |  |                                  httpd.content_license_url) | 
					
						
							| 
									
										
										
										
											2020-10-07 13:51:29 +00:00
										 |  |  |         print('Newswire feed converted to ActivityPub') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-25 19:39:45 +00:00
										 |  |  |         if httpd.max_news_posts > 0: | 
					
						
							| 
									
										
										
										
											2021-12-25 23:41:17 +00:00
										 |  |  |             archive_dir = base_dir + '/archive' | 
					
						
							| 
									
										
										
										
											2020-10-21 10:39:09 +00:00
										 |  |  |             archiveSubdir = \ | 
					
						
							| 
									
										
										
										
											2021-12-25 23:41:17 +00:00
										 |  |  |                 archive_dir + '/accounts/news@' + domain + '/outbox' | 
					
						
							| 
									
										
										
										
											2021-09-15 17:03:20 +00:00
										 |  |  |             print('Archiving news posts') | 
					
						
							| 
									
										
										
										
											2021-12-25 17:09:22 +00:00
										 |  |  |             archivePostsForPerson(http_prefix, 'news', | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |                                   domain, base_dir, 'outbox', | 
					
						
							| 
									
										
										
										
											2020-10-21 10:39:09 +00:00
										 |  |  |                                   archiveSubdir, | 
					
						
							|  |  |  |                                   httpd.recentPostsCache, | 
					
						
							| 
									
										
										
										
											2021-12-25 19:39:45 +00:00
										 |  |  |                                   httpd.max_news_posts) | 
					
						
							| 
									
										
										
										
											2020-10-21 10:39:09 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  |         # wait a while before the next feeds update | 
					
						
							| 
									
										
										
										
											2021-02-10 13:31:19 +00:00
										 |  |  |         for tick in range(120): | 
					
						
							|  |  |  |             time.sleep(10) | 
					
						
							|  |  |  |             # if a new blog post has been created then stop | 
					
						
							|  |  |  |             # waiting and recalculate the newswire | 
					
						
							|  |  |  |             if os.path.isfile(refreshFilename): | 
					
						
							|  |  |  |                 try: | 
					
						
							|  |  |  |                     os.remove(refreshFilename) | 
					
						
							| 
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 |  |  |                 except OSError: | 
					
						
							| 
									
										
										
										
											2021-10-29 18:48:15 +00:00
										 |  |  |                     print('EX: runNewswireDaemon unable to delete ' + | 
					
						
							|  |  |  |                           str(refreshFilename)) | 
					
						
							| 
									
										
										
										
											2021-02-10 13:31:19 +00:00
										 |  |  |                 break | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-25 20:34:38 +00:00
										 |  |  | def runNewswireWatchdog(project_version: str, httpd) -> None: | 
					
						
							| 
									
										
										
										
											2020-10-07 12:05:49 +00:00
										 |  |  |     """This tries to keep the newswire update thread running even if it dies
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     print('Starting newswire watchdog') | 
					
						
							|  |  |  |     newswireOriginal = \ | 
					
						
							|  |  |  |         httpd.thrPostSchedule.clone(runNewswireDaemon) | 
					
						
							|  |  |  |     httpd.thrNewswireDaemon.start() | 
					
						
							|  |  |  |     while True: | 
					
						
							|  |  |  |         time.sleep(50) | 
					
						
							| 
									
										
										
										
											2021-06-05 12:43:57 +00:00
										 |  |  |         if httpd.thrNewswireDaemon.is_alive(): | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         httpd.thrNewswireDaemon.kill() | 
					
						
							|  |  |  |         httpd.thrNewswireDaemon = \ | 
					
						
							|  |  |  |             newswireOriginal.clone(runNewswireDaemon) | 
					
						
							|  |  |  |         httpd.thrNewswireDaemon.start() | 
					
						
							|  |  |  |         print('Restarting newswire daemon...') |