mirror of https://gitlab.com/bashrc2/epicyon
Save newswire favicons
parent
ea422f0ead
commit
64c41279b4
|
@ -1016,7 +1016,7 @@ if args.domain:
|
|||
if args.rss:
|
||||
session = createSession(None)
|
||||
testRSS = getRSS(baseDir, domain, session, args.rss,
|
||||
False, False, 1000, 1000, 1000, 1000)
|
||||
False, False, 1000, 1000, 1000, 1000, debug)
|
||||
pprint(testRSS)
|
||||
sys.exit()
|
||||
|
||||
|
|
|
@ -801,7 +801,8 @@ def runNewswireDaemon(baseDir: str, httpd,
|
|||
httpd.maxFeedItemSizeKb,
|
||||
httpd.maxNewswirePosts,
|
||||
httpd.maxCategoriesFeedItemSizeKb,
|
||||
httpd.systemLanguage)
|
||||
httpd.systemLanguage,
|
||||
httpd.debug)
|
||||
|
||||
if not httpd.newswire:
|
||||
print('Newswire feeds not updated')
|
||||
|
|
111
newswire.py
111
newswire.py
|
@ -34,6 +34,7 @@ from utils import localActorUrl
|
|||
from blocking import isBlockedDomain
|
||||
from blocking import isBlockedHashtag
|
||||
from filters import isFiltered
|
||||
from session import getImageBinaryFromUrl
|
||||
|
||||
|
||||
def _removeCDATA(text: str) -> str:
|
||||
|
@ -126,6 +127,46 @@ def limitWordLengths(text: str, maxWordLength: int) -> str:
|
|||
return result
|
||||
|
||||
|
||||
def _getNewswireFaviconUrl(url: str) -> str:
|
||||
"""Returns a favicon url from the given article link
|
||||
"""
|
||||
if '://' not in url:
|
||||
return '/newswire_favicon.ico'
|
||||
if url.startswith('http://'):
|
||||
if not (url.endswith('.onion') or url.endswith('.i2p')):
|
||||
return '/newswire_favicon.ico'
|
||||
domain = url.split('://')[1]
|
||||
if '/' not in domain:
|
||||
return url + '/favicon.ico'
|
||||
else:
|
||||
domain = domain.split('/')[0]
|
||||
return url.split('://')[0] + '://' + domain + '/favicon.ico'
|
||||
|
||||
|
||||
def _downloadNewswireFeedFavicon(session, baseDir: str,
|
||||
link: str, debug: bool) -> bool:
|
||||
"""Downloads the favicon for the given feed link
|
||||
"""
|
||||
url = _getNewswireFaviconUrl(link)
|
||||
if '://' not in link:
|
||||
return False
|
||||
timeoutSec = 10
|
||||
imageData = getImageBinaryFromUrl(session, url, timeoutSec, debug)
|
||||
if not imageData:
|
||||
return False
|
||||
if not os.path.isdir(baseDir + '/favicons'):
|
||||
os.mkdir(baseDir + '/favicons')
|
||||
linkFilename = url.replace('/', '#')
|
||||
imageFilename = baseDir + '/favicons/' + linkFilename
|
||||
try:
|
||||
with open(imageFilename, 'wb+') as fp:
|
||||
fp.write(imageData)
|
||||
except OSError:
|
||||
print('EX: failed writing favicon ' + url)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _addNewswireDictEntry(baseDir: str, domain: str,
|
||||
newswire: {}, dateStr: str,
|
||||
title: str, link: str,
|
||||
|
@ -133,7 +174,7 @@ def _addNewswireDictEntry(baseDir: str, domain: str,
|
|||
description: str, moderated: bool,
|
||||
mirrored: bool,
|
||||
tags: [],
|
||||
maxTags: int) -> None:
|
||||
maxTags: int, session, debug: bool) -> None:
|
||||
"""Update the newswire dictionary
|
||||
"""
|
||||
# remove any markup
|
||||
|
@ -166,6 +207,8 @@ def _addNewswireDictEntry(baseDir: str, domain: str,
|
|||
if isBlockedHashtag(baseDir, tag):
|
||||
return
|
||||
|
||||
_downloadNewswireFeedFavicon(session, baseDir, link, debug)
|
||||
|
||||
newswire[dateStr] = [
|
||||
title,
|
||||
link,
|
||||
|
@ -309,7 +352,8 @@ def _xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
|
|||
moderated: bool, mirrored: bool,
|
||||
maxPostsPerSource: int,
|
||||
maxFeedItemSizeKb: int,
|
||||
maxCategoriesFeedItemSizeKb: int) -> {}:
|
||||
maxCategoriesFeedItemSizeKb: int,
|
||||
session, debug: bool) -> {}:
|
||||
"""Converts an xml RSS 2.0 string to a dictionary
|
||||
"""
|
||||
if '<item>' not in xmlStr:
|
||||
|
@ -378,7 +422,7 @@ def _xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
|
|||
title, link,
|
||||
votesStatus, postFilename,
|
||||
description, moderated,
|
||||
mirrored, [], 32)
|
||||
mirrored, [], 32, session, debug)
|
||||
postCtr += 1
|
||||
if postCtr >= maxPostsPerSource:
|
||||
break
|
||||
|
@ -392,7 +436,8 @@ def _xml1StrToDict(baseDir: str, domain: str, xmlStr: str,
|
|||
moderated: bool, mirrored: bool,
|
||||
maxPostsPerSource: int,
|
||||
maxFeedItemSizeKb: int,
|
||||
maxCategoriesFeedItemSizeKb: int) -> {}:
|
||||
maxCategoriesFeedItemSizeKb: int,
|
||||
session, debug: bool) -> {}:
|
||||
"""Converts an xml RSS 1.0 string to a dictionary
|
||||
https://validator.w3.org/feed/docs/rss1.html
|
||||
"""
|
||||
|
@ -465,7 +510,7 @@ def _xml1StrToDict(baseDir: str, domain: str, xmlStr: str,
|
|||
title, link,
|
||||
votesStatus, postFilename,
|
||||
description, moderated,
|
||||
mirrored, [], 32)
|
||||
mirrored, [], 32, session, debug)
|
||||
postCtr += 1
|
||||
if postCtr >= maxPostsPerSource:
|
||||
break
|
||||
|
@ -478,7 +523,8 @@ def _xml1StrToDict(baseDir: str, domain: str, xmlStr: str,
|
|||
def _atomFeedToDict(baseDir: str, domain: str, xmlStr: str,
|
||||
moderated: bool, mirrored: bool,
|
||||
maxPostsPerSource: int,
|
||||
maxFeedItemSizeKb: int) -> {}:
|
||||
maxFeedItemSizeKb: int,
|
||||
session, debug: bool) -> {}:
|
||||
"""Converts an atom feed string to a dictionary
|
||||
"""
|
||||
if '<entry>' not in xmlStr:
|
||||
|
@ -540,7 +586,7 @@ def _atomFeedToDict(baseDir: str, domain: str, xmlStr: str,
|
|||
title, link,
|
||||
votesStatus, postFilename,
|
||||
description, moderated,
|
||||
mirrored, [], 32)
|
||||
mirrored, [], 32, session, debug)
|
||||
postCtr += 1
|
||||
if postCtr >= maxPostsPerSource:
|
||||
break
|
||||
|
@ -553,7 +599,8 @@ def _atomFeedToDict(baseDir: str, domain: str, xmlStr: str,
|
|||
def _jsonFeedV1ToDict(baseDir: str, domain: str, xmlStr: str,
|
||||
moderated: bool, mirrored: bool,
|
||||
maxPostsPerSource: int,
|
||||
maxFeedItemSizeKb: int) -> {}:
|
||||
maxFeedItemSizeKb: int,
|
||||
session, debug: bool) -> {}:
|
||||
"""Converts a json feed string to a dictionary
|
||||
See https://jsonfeed.org/version/1.1
|
||||
"""
|
||||
|
@ -651,7 +698,7 @@ def _jsonFeedV1ToDict(baseDir: str, domain: str, xmlStr: str,
|
|||
title, link,
|
||||
votesStatus, postFilename,
|
||||
description, moderated,
|
||||
mirrored, [], 32)
|
||||
mirrored, [], 32, session, debug)
|
||||
postCtr += 1
|
||||
if postCtr >= maxPostsPerSource:
|
||||
break
|
||||
|
@ -664,7 +711,8 @@ def _jsonFeedV1ToDict(baseDir: str, domain: str, xmlStr: str,
|
|||
def _atomFeedYTToDict(baseDir: str, domain: str, xmlStr: str,
|
||||
moderated: bool, mirrored: bool,
|
||||
maxPostsPerSource: int,
|
||||
maxFeedItemSizeKb: int) -> {}:
|
||||
maxFeedItemSizeKb: int,
|
||||
session, debug: bool) -> {}:
|
||||
"""Converts an atom-style YouTube feed string to a dictionary
|
||||
"""
|
||||
if '<entry>' not in xmlStr:
|
||||
|
@ -723,7 +771,7 @@ def _atomFeedYTToDict(baseDir: str, domain: str, xmlStr: str,
|
|||
title, link,
|
||||
votesStatus, postFilename,
|
||||
description, moderated, mirrored,
|
||||
[], 32)
|
||||
[], 32, session, debug)
|
||||
postCtr += 1
|
||||
if postCtr >= maxPostsPerSource:
|
||||
break
|
||||
|
@ -736,32 +784,38 @@ def _xmlStrToDict(baseDir: str, domain: str, xmlStr: str,
|
|||
moderated: bool, mirrored: bool,
|
||||
maxPostsPerSource: int,
|
||||
maxFeedItemSizeKb: int,
|
||||
maxCategoriesFeedItemSizeKb: int) -> {}:
|
||||
maxCategoriesFeedItemSizeKb: int,
|
||||
session, debug: bool) -> {}:
|
||||
"""Converts an xml string to a dictionary
|
||||
"""
|
||||
if '<yt:videoId>' in xmlStr and '<yt:channelId>' in xmlStr:
|
||||
print('YouTube feed: reading')
|
||||
return _atomFeedYTToDict(baseDir, domain,
|
||||
xmlStr, moderated, mirrored,
|
||||
maxPostsPerSource, maxFeedItemSizeKb)
|
||||
maxPostsPerSource, maxFeedItemSizeKb,
|
||||
session, debug)
|
||||
elif 'rss version="2.0"' in xmlStr:
|
||||
return _xml2StrToDict(baseDir, domain,
|
||||
xmlStr, moderated, mirrored,
|
||||
maxPostsPerSource, maxFeedItemSizeKb,
|
||||
maxCategoriesFeedItemSizeKb)
|
||||
maxCategoriesFeedItemSizeKb,
|
||||
session, debug)
|
||||
elif '<?xml version="1.0"' in xmlStr:
|
||||
return _xml1StrToDict(baseDir, domain,
|
||||
xmlStr, moderated, mirrored,
|
||||
maxPostsPerSource, maxFeedItemSizeKb,
|
||||
maxCategoriesFeedItemSizeKb)
|
||||
maxCategoriesFeedItemSizeKb,
|
||||
session, debug)
|
||||
elif 'xmlns="http://www.w3.org/2005/Atom"' in xmlStr:
|
||||
return _atomFeedToDict(baseDir, domain,
|
||||
xmlStr, moderated, mirrored,
|
||||
maxPostsPerSource, maxFeedItemSizeKb)
|
||||
maxPostsPerSource, maxFeedItemSizeKb,
|
||||
session, debug)
|
||||
elif 'https://jsonfeed.org/version/1' in xmlStr:
|
||||
return _jsonFeedV1ToDict(baseDir, domain,
|
||||
xmlStr, moderated, mirrored,
|
||||
maxPostsPerSource, maxFeedItemSizeKb)
|
||||
maxPostsPerSource, maxFeedItemSizeKb,
|
||||
session, debug)
|
||||
return {}
|
||||
|
||||
|
||||
|
@ -781,7 +835,7 @@ def getRSS(baseDir: str, domain: str, session, url: str,
|
|||
moderated: bool, mirrored: bool,
|
||||
maxPostsPerSource: int, maxFeedSizeKb: int,
|
||||
maxFeedItemSizeKb: int,
|
||||
maxCategoriesFeedItemSizeKb: int) -> {}:
|
||||
maxCategoriesFeedItemSizeKb: int, debug: bool) -> {}:
|
||||
"""Returns an RSS url as a dict
|
||||
"""
|
||||
if not isinstance(url, str):
|
||||
|
@ -812,7 +866,8 @@ def getRSS(baseDir: str, domain: str, session, url: str,
|
|||
moderated, mirrored,
|
||||
maxPostsPerSource,
|
||||
maxFeedItemSizeKb,
|
||||
maxCategoriesFeedItemSizeKb)
|
||||
maxCategoriesFeedItemSizeKb,
|
||||
session, debug)
|
||||
else:
|
||||
print('WARN: feed is too large, ' +
|
||||
'or contains invalid characters: ' + url)
|
||||
|
@ -923,7 +978,8 @@ def _addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
|
|||
newswire: {},
|
||||
maxBlogsPerAccount: int,
|
||||
indexFilename: str,
|
||||
maxTags: int, systemLanguage: str) -> None:
|
||||
maxTags: int, systemLanguage: str,
|
||||
session, debug: bool) -> None:
|
||||
"""Adds blogs for the given account to the newswire
|
||||
"""
|
||||
if not os.path.isfile(indexFilename):
|
||||
|
@ -987,7 +1043,7 @@ def _addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
|
|||
votes, fullPostFilename,
|
||||
description, moderated, False,
|
||||
tagsFromPost,
|
||||
maxTags)
|
||||
maxTags, session, debug)
|
||||
|
||||
ctr += 1
|
||||
if ctr >= maxBlogsPerAccount:
|
||||
|
@ -996,7 +1052,8 @@ def _addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
|
|||
|
||||
def _addBlogsToNewswire(baseDir: str, domain: str, newswire: {},
|
||||
maxBlogsPerAccount: int,
|
||||
maxTags: int, systemLanguage: str) -> None:
|
||||
maxTags: int, systemLanguage: str,
|
||||
session, debug: bool) -> None:
|
||||
"""Adds blogs from each user account into the newswire
|
||||
"""
|
||||
moderationDict = {}
|
||||
|
@ -1025,7 +1082,8 @@ def _addBlogsToNewswire(baseDir: str, domain: str, newswire: {},
|
|||
_addAccountBlogsToNewswire(baseDir, nickname, domain,
|
||||
newswire, maxBlogsPerAccount,
|
||||
blogsIndex, maxTags,
|
||||
systemLanguage)
|
||||
systemLanguage, session,
|
||||
debug)
|
||||
break
|
||||
|
||||
# sort the moderation dict into chronological order, latest first
|
||||
|
@ -1050,7 +1108,7 @@ def getDictFromNewswire(session, baseDir: str, domain: str,
|
|||
maxTags: int, maxFeedItemSizeKb: int,
|
||||
maxNewswirePosts: int,
|
||||
maxCategoriesFeedItemSizeKb: int,
|
||||
systemLanguage: str) -> {}:
|
||||
systemLanguage: str, debug: bool) -> {}:
|
||||
"""Gets rss feeds as a dictionary from newswire file
|
||||
"""
|
||||
subscriptionsFilename = baseDir + '/accounts/newswire.txt'
|
||||
|
@ -1091,14 +1149,15 @@ def getDictFromNewswire(session, baseDir: str, domain: str,
|
|||
moderated, mirrored,
|
||||
maxPostsPerSource, maxFeedSizeKb,
|
||||
maxFeedItemSizeKb,
|
||||
maxCategoriesFeedItemSizeKb)
|
||||
maxCategoriesFeedItemSizeKb, debug)
|
||||
if itemsList:
|
||||
for dateStr, item in itemsList.items():
|
||||
result[dateStr] = item
|
||||
|
||||
# add blogs from each user account
|
||||
_addBlogsToNewswire(baseDir, domain, result,
|
||||
maxPostsPerSource, maxTags, systemLanguage)
|
||||
maxPostsPerSource, maxTags, systemLanguage,
|
||||
session, debug)
|
||||
|
||||
# sort into chronological order, latest first
|
||||
sortedResult = OrderedDict(sorted(result.items(), reverse=True))
|
||||
|
|
24
session.py
24
session.py
|
@ -452,3 +452,27 @@ def downloadImage(session, baseDir: str, url: str,
|
|||
print('EX: Failed to download image: ' +
|
||||
str(url) + ' ' + str(e))
|
||||
return False
|
||||
|
||||
|
||||
def getImageBinaryFromUrl(session, url: str, timeoutSec: int, debug: bool):
|
||||
"""http GET for an image
|
||||
"""
|
||||
try:
|
||||
result = session.get(url, timeout=timeoutSec)
|
||||
if result.status_code != 200:
|
||||
print('WARN: getImageFromUrl: ' + url +
|
||||
' failed with error code ' + str(result.status_code))
|
||||
return result.content
|
||||
except requests.exceptions.RequestException as e:
|
||||
if debug:
|
||||
print('ERROR: getImageFromUrl failed: ' + str(url) + ', ' +
|
||||
str(e))
|
||||
except ValueError as e:
|
||||
if debug:
|
||||
print('ERROR: getImageFromUrl failed: ' + str(url) + ', ' +
|
||||
str(e))
|
||||
except SocketError as e:
|
||||
if e.errno == errno.ECONNRESET:
|
||||
print('WARN: getImageFromUrl failed, ' +
|
||||
'connection was reset ' + str(e))
|
||||
return None
|
||||
|
|
Loading…
Reference in New Issue