mirror of https://gitlab.com/bashrc2/epicyon
Ability to mirror rss feed content
parent
9391fa57c9
commit
ee9d9a9dc5
10
blocking.py
10
blocking.py
|
@ -28,8 +28,9 @@ def addGlobalBlock(baseDir: str,
|
||||||
return False
|
return False
|
||||||
# block an account handle or domain
|
# block an account handle or domain
|
||||||
blockFile = open(blockingFilename, "a+")
|
blockFile = open(blockingFilename, "a+")
|
||||||
blockFile.write(blockHandle + '\n')
|
if blockFile:
|
||||||
blockFile.close()
|
blockFile.write(blockHandle + '\n')
|
||||||
|
blockFile.close()
|
||||||
else:
|
else:
|
||||||
blockHashtag = blockNickname
|
blockHashtag = blockNickname
|
||||||
# is the hashtag already blocked?
|
# is the hashtag already blocked?
|
||||||
|
@ -38,8 +39,9 @@ def addGlobalBlock(baseDir: str,
|
||||||
return False
|
return False
|
||||||
# block a hashtag
|
# block a hashtag
|
||||||
blockFile = open(blockingFilename, "a+")
|
blockFile = open(blockingFilename, "a+")
|
||||||
blockFile.write(blockHashtag + '\n')
|
if blockFile:
|
||||||
blockFile.close()
|
blockFile.write(blockHashtag + '\n')
|
||||||
|
blockFile.close()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,8 @@ __status__ = "Production"
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import datetime
|
import datetime
|
||||||
|
from shutil import rmtree
|
||||||
|
from subprocess import Popen
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from newswire import getDictFromNewswire
|
from newswire import getDictFromNewswire
|
||||||
# from posts import sendSignedJson
|
# from posts import sendSignedJson
|
||||||
|
@ -348,14 +350,86 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {},
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def createNewsMirror(baseDir: str, url: str,
|
def createNewsMirror(baseDir: str, postIdNumber: str, url: str,
|
||||||
maxMirroredArticles: int) -> bool:
|
maxMirroredArticles: int) -> bool:
|
||||||
"""Creates a local mirror of a news article
|
"""Creates a local mirror of a news article
|
||||||
"""
|
"""
|
||||||
|
if '|' in url or '>' in url:
|
||||||
|
return True
|
||||||
|
|
||||||
mirrorDir = baseDir + '/accounts/newsmirror'
|
mirrorDir = baseDir + '/accounts/newsmirror'
|
||||||
if not os.path.isdir(mirrorDir):
|
if not os.path.isdir(mirrorDir):
|
||||||
os.mkdir(mirrorDir)
|
os.mkdir(mirrorDir)
|
||||||
|
|
||||||
|
# count the directories
|
||||||
|
noOfDirs = 0
|
||||||
|
for subdir, dirs, files in os.walk(mirrorDir):
|
||||||
|
noOfDirs = len(dirs)
|
||||||
|
|
||||||
|
mirrorIndexFilename = baseDir + '/accounts/newsmirror.txt'
|
||||||
|
|
||||||
|
if maxMirroredArticles > 0 and noOfDirs > maxMirroredArticles:
|
||||||
|
if not os.path.isfile(mirrorIndexFilename):
|
||||||
|
# no index for mirrors found
|
||||||
|
return True
|
||||||
|
removals = []
|
||||||
|
with open(mirrorIndexFilename, 'r') as indexFile:
|
||||||
|
# remove the oldest directories
|
||||||
|
ctr = 0
|
||||||
|
while noOfDirs > maxMirroredArticles:
|
||||||
|
ctr += 1
|
||||||
|
if ctr > 5000:
|
||||||
|
# escape valve
|
||||||
|
break
|
||||||
|
|
||||||
|
postId = indexFile.readline()
|
||||||
|
if not postId:
|
||||||
|
continue
|
||||||
|
postId = postId.strip()
|
||||||
|
mirrorArticleDir = mirrorDir + '/' + postId
|
||||||
|
if os.path.isdir(mirrorArticleDir):
|
||||||
|
rmtree(mirrorArticleDir)
|
||||||
|
removals.append(postId)
|
||||||
|
noOfDirs -= 1
|
||||||
|
|
||||||
|
# remove the corresponding index entries
|
||||||
|
if removals:
|
||||||
|
indexContent = ''
|
||||||
|
with open(mirrorIndexFilename, 'r') as indexFile:
|
||||||
|
indexContent = indexFile.read()
|
||||||
|
for removePostId in removals:
|
||||||
|
indexContent = \
|
||||||
|
indexContent.replace(removePostId + '\n', '')
|
||||||
|
with open(mirrorIndexFilename, "w+") as indexFile:
|
||||||
|
indexFile.write(indexContent)
|
||||||
|
|
||||||
|
mirrorArticleDir = mirrorDir + '/' + postIdNumber
|
||||||
|
if os.path.isdir(mirrorArticleDir):
|
||||||
|
# already mirrored
|
||||||
|
return True
|
||||||
|
|
||||||
|
# download the files
|
||||||
|
commandStr = \
|
||||||
|
'/usr/bin/wget -mkEpnp -e robots=off ' + url + \
|
||||||
|
' -P ' + mirrorArticleDir
|
||||||
|
p = Popen(commandStr, shell=True)
|
||||||
|
os.waitpid(p.pid, 0)
|
||||||
|
|
||||||
|
if not os.path.isdir(mirrorArticleDir):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# append the post Id number to the index file
|
||||||
|
if os.path.isfile(mirrorIndexFilename):
|
||||||
|
indexFile = open(mirrorIndexFilename, "a+")
|
||||||
|
if indexFile:
|
||||||
|
indexFile.write(postIdNumber + '\n')
|
||||||
|
indexFile.close()
|
||||||
|
else:
|
||||||
|
indexFile = open(mirrorIndexFilename, "w+")
|
||||||
|
if indexFile:
|
||||||
|
indexFile.write(postIdNumber + '\n')
|
||||||
|
indexFile.close()
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
@ -445,7 +519,8 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str,
|
||||||
|
|
||||||
mirrored = item[7]
|
mirrored = item[7]
|
||||||
if mirrored:
|
if mirrored:
|
||||||
if not createNewsMirror(baseDir, url, maxMirroredArticles):
|
if not createNewsMirror(baseDir, statusNumber,
|
||||||
|
url, maxMirroredArticles):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
idStr = \
|
idStr = \
|
||||||
|
|
Loading…
Reference in New Issue