Ability to mirror rss feed content

merge-requests/30/head
Bob Mottram 2020-10-19 20:26:58 +01:00
parent 9391fa57c9
commit ee9d9a9dc5
2 changed files with 83 additions and 6 deletions

View File

@ -28,8 +28,9 @@ def addGlobalBlock(baseDir: str,
return False return False
# block an account handle or domain # block an account handle or domain
blockFile = open(blockingFilename, "a+") blockFile = open(blockingFilename, "a+")
blockFile.write(blockHandle + '\n') if blockFile:
blockFile.close() blockFile.write(blockHandle + '\n')
blockFile.close()
else: else:
blockHashtag = blockNickname blockHashtag = blockNickname
# is the hashtag already blocked? # is the hashtag already blocked?
@ -38,8 +39,9 @@ def addGlobalBlock(baseDir: str,
return False return False
# block a hashtag # block a hashtag
blockFile = open(blockingFilename, "a+") blockFile = open(blockingFilename, "a+")
blockFile.write(blockHashtag + '\n') if blockFile:
blockFile.close() blockFile.write(blockHashtag + '\n')
blockFile.close()
return True return True

View File

@ -15,6 +15,8 @@ __status__ = "Production"
import os import os
import time import time
import datetime import datetime
from shutil import rmtree
from subprocess import Popen
from collections import OrderedDict from collections import OrderedDict
from newswire import getDictFromNewswire from newswire import getDictFromNewswire
# from posts import sendSignedJson # from posts import sendSignedJson
@ -348,14 +350,86 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {},
return True return True
def createNewsMirror(baseDir: str, url: str, def createNewsMirror(baseDir: str, postIdNumber: str, url: str,
maxMirroredArticles: int) -> bool: maxMirroredArticles: int) -> bool:
"""Creates a local mirror of a news article """Creates a local mirror of a news article
""" """
if '|' in url or '>' in url:
return True
mirrorDir = baseDir + '/accounts/newsmirror' mirrorDir = baseDir + '/accounts/newsmirror'
if not os.path.isdir(mirrorDir): if not os.path.isdir(mirrorDir):
os.mkdir(mirrorDir) os.mkdir(mirrorDir)
# count the directories
noOfDirs = 0
for subdir, dirs, files in os.walk(mirrorDir):
noOfDirs = len(dirs)
mirrorIndexFilename = baseDir + '/accounts/newsmirror.txt'
if maxMirroredArticles > 0 and noOfDirs > maxMirroredArticles:
if not os.path.isfile(mirrorIndexFilename):
# no index for mirrors found
return True
removals = []
with open(mirrorIndexFilename, 'r') as indexFile:
# remove the oldest directories
ctr = 0
while noOfDirs > maxMirroredArticles:
ctr += 1
if ctr > 5000:
# escape valve
break
postId = indexFile.readline()
if not postId:
continue
postId = postId.strip()
mirrorArticleDir = mirrorDir + '/' + postId
if os.path.isdir(mirrorArticleDir):
rmtree(mirrorArticleDir)
removals.append(postId)
noOfDirs -= 1
# remove the corresponding index entries
if removals:
indexContent = ''
with open(mirrorIndexFilename, 'r') as indexFile:
indexContent = indexFile.read()
for removePostId in removals:
indexContent = \
indexContent.replace(removePostId + '\n', '')
with open(mirrorIndexFilename, "w+") as indexFile:
indexFile.write(indexContent)
mirrorArticleDir = mirrorDir + '/' + postIdNumber
if os.path.isdir(mirrorArticleDir):
# already mirrored
return True
# download the files
commandStr = \
'/usr/bin/wget -mkEpnp -e robots=off ' + url + \
' -P ' + mirrorArticleDir
p = Popen(commandStr, shell=True)
os.waitpid(p.pid, 0)
if not os.path.isdir(mirrorArticleDir):
return True
# append the post Id number to the index file
if os.path.isfile(mirrorIndexFilename):
indexFile = open(mirrorIndexFilename, "a+")
if indexFile:
indexFile.write(postIdNumber + '\n')
indexFile.close()
else:
indexFile = open(mirrorIndexFilename, "w+")
if indexFile:
indexFile.write(postIdNumber + '\n')
indexFile.close()
return True return True
@ -445,7 +519,8 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str,
mirrored = item[7] mirrored = item[7]
if mirrored: if mirrored:
if not createNewsMirror(baseDir, url, maxMirroredArticles): if not createNewsMirror(baseDir, statusNumber,
url, maxMirroredArticles):
continue continue
idStr = \ idStr = \