Merge branch 'main' of gitlab.com:bashrc2/epicyon

merge-requests/30/head
Bob Mottram 2021-12-17 13:46:30 +00:00
commit 41f77c1099
7 changed files with 283 additions and 62 deletions

View File

@ -1562,7 +1562,7 @@ class PubServer(BaseHTTPRequestHandler):
self.authorizedNickname = None
notAuthPaths = (
'/icons/', '/avatars/',
'/icons/', '/avatars/', '/favicons/',
'/system/accounts/avatars/',
'/system/accounts/headers/',
'/system/media_attachments/files/',
@ -7407,6 +7407,54 @@ class PubServer(BaseHTTPRequestHandler):
return
self._404()
def _showCachedFavicon(self, refererDomain: str, path: str,
baseDir: str, GETstartTime) -> None:
"""Shows a favicon image obtained from the cache
"""
favFile = path.replace('/favicons/', '')
favFilename = baseDir + urllib.parse.unquote_plus(path)
print('showCachedFavicon: ' + favFilename)
if self.server.faviconsCache.get(favFile):
mediaBinary = self.server.faviconsCache[favFile]
mimeType = mediaFileMimeType(favFilename)
self._set_headers_etag(favFilename,
mimeType,
mediaBinary, None,
refererDomain,
False, None)
self._write(mediaBinary)
fitnessPerformance(GETstartTime, self.server.fitness,
'_GET', '_showCachedFavicon2',
self.server.debug)
return
if not os.path.isfile(favFilename):
self._404()
return
if self._etag_exists(favFilename):
# The file has not changed
self._304()
return
mediaBinary = None
try:
with open(favFilename, 'rb') as avFile:
mediaBinary = avFile.read()
except OSError:
print('EX: unable to read cached favicon ' + favFilename)
if mediaBinary:
mimeType = mediaFileMimeType(favFilename)
self._set_headers_etag(favFilename,
mimeType,
mediaBinary, None,
refererDomain,
False, None)
self._write(mediaBinary)
fitnessPerformance(GETstartTime, self.server.fitness,
'_GET', '_showCachedFavicon',
self.server.debug)
self.server.faviconsCache[favFile] = mediaBinary
return
self._404()
def _showCachedAvatar(self, refererDomain: str, path: str,
baseDir: str, GETstartTime) -> None:
"""Shows an avatar image obtained from the cache
@ -12329,6 +12377,7 @@ class PubServer(BaseHTTPRequestHandler):
'/emoji/' not in path and \
'/tags/' not in path and \
'/avatars/' not in path and \
'/favicons/' not in path and \
'/headers/' not in path and \
'/fonts/' not in path and \
'/icons/' not in path:
@ -13378,18 +13427,19 @@ class PubServer(BaseHTTPRequestHandler):
# default newswire favicon, for links to sites which
# have no favicon
if 'newswire_favicon.ico' in self.path:
self._getFavicon(callingDomain, self.server.baseDir,
self.server.debug,
'newswire_favicon.ico')
return
if not self.path.startswith('/favicons/'):
if 'newswire_favicon.ico' in self.path:
self._getFavicon(callingDomain, self.server.baseDir,
self.server.debug,
'newswire_favicon.ico')
return
# favicon image
if 'favicon.ico' in self.path:
self._getFavicon(callingDomain, self.server.baseDir,
self.server.debug,
'favicon.ico')
return
# favicon image
if 'favicon.ico' in self.path:
self._getFavicon(callingDomain, self.server.baseDir,
self.server.debug,
'favicon.ico')
return
# check authorization
authorized = self._isAuthorized()
@ -13647,6 +13697,20 @@ class PubServer(BaseHTTPRequestHandler):
'_GET', 'hasAccept',
self.server.debug)
# cached favicon images
# Note that this comes before the busy flag to avoid conflicts
if self.path.startswith('/favicons/'):
if self.server.domainFull in self.path:
# favicon for this instance
self._getFavicon(callingDomain, self.server.baseDir,
self.server.debug,
'favicon.ico')
return
self._showCachedFavicon(refererDomain, self.path,
self.server.baseDir,
GETstartTime)
return
# get css
# Note that this comes before the busy flag to avoid conflicts
if self.path.endswith('.css'):
@ -18623,6 +18687,7 @@ def runDaemon(contentLicenseUrl: str,
httpd.instanceId = instanceId
httpd.personCache = {}
httpd.cachedWebfingers = {}
httpd.faviconsCache = {}
httpd.proxyType = proxyType
httpd.session = None
httpd.sessionLastUpdate = 0

View File

@ -1016,7 +1016,7 @@ if args.domain:
if args.rss:
session = createSession(None)
testRSS = getRSS(baseDir, domain, session, args.rss,
False, False, 1000, 1000, 1000, 1000)
False, False, 1000, 1000, 1000, 1000, debug)
pprint(testRSS)
sys.exit()

View File

@ -801,7 +801,8 @@ def runNewswireDaemon(baseDir: str, httpd,
httpd.maxFeedItemSizeKb,
httpd.maxNewswirePosts,
httpd.maxCategoriesFeedItemSizeKb,
httpd.systemLanguage)
httpd.systemLanguage,
httpd.debug)
if not httpd.newswire:
print('Newswire feeds not updated')

View File

@ -18,6 +18,7 @@ from datetime import timezone
from collections import OrderedDict
from utils import validPostDate
from categories import setHashtagCategory
from utils import getFavFilenameFromUrl
from utils import getBaseContentFromPost
from utils import hasObjectDict
from utils import firstParagraphFromString
@ -34,6 +35,7 @@ from utils import localActorUrl
from blocking import isBlockedDomain
from blocking import isBlockedHashtag
from filters import isFiltered
from session import downloadImageAnyMimeType
def _removeCDATA(text: str) -> str:
@ -126,6 +128,67 @@ def limitWordLengths(text: str, maxWordLength: int) -> str:
return result
def getNewswireFaviconUrl(url: str) -> str:
"""Returns a favicon url from the given article link
"""
if '://' not in url:
return '/newswire_favicon.ico'
if url.startswith('http://'):
if not (url.endswith('.onion') or url.endswith('.i2p')):
return '/newswire_favicon.ico'
domain = url.split('://')[1]
if '/' not in domain:
return url + '/favicon.ico'
else:
domain = domain.split('/')[0]
return url.split('://')[0] + '://' + domain + '/favicon.ico'
def _downloadNewswireFeedFavicon(session, baseDir: str,
link: str, debug: bool) -> bool:
"""Downloads the favicon for the given feed link
"""
favUrl = getNewswireFaviconUrl(link)
if '://' not in link:
return False
timeoutSec = 10
imageData, mimeType = \
downloadImageAnyMimeType(session, favUrl, timeoutSec, debug)
if not imageData or not mimeType:
return False
# update the favicon url
extensionsToMime = {
'ico': 'x-icon',
'png': 'png',
'jpg': 'jpeg',
'gif': 'gif',
'avif': 'avif',
'svg': 'svg+xml',
'webp': 'webp'
}
for ext, mimeExt in extensionsToMime.items():
if 'image/' + mimeExt in mimeType:
favUrl = favUrl.replace('.ico', '.' + ext)
break
# create cached favicons directory if needed
if not os.path.isdir(baseDir + '/favicons'):
os.mkdir(baseDir + '/favicons')
# save to the cache
favFilename = getFavFilenameFromUrl(baseDir, favUrl)
if os.path.isfile(favFilename):
return True
try:
with open(favFilename, 'wb+') as fp:
fp.write(imageData)
except OSError:
print('EX: failed writing favicon ' + favFilename)
return False
return True
def _addNewswireDictEntry(baseDir: str, domain: str,
newswire: {}, dateStr: str,
title: str, link: str,
@ -133,7 +196,7 @@ def _addNewswireDictEntry(baseDir: str, domain: str,
description: str, moderated: bool,
mirrored: bool,
tags: [],
maxTags: int) -> None:
maxTags: int, session, debug: bool) -> None:
"""Update the newswire dictionary
"""
# remove any markup
@ -166,6 +229,8 @@ def _addNewswireDictEntry(baseDir: str, domain: str,
if isBlockedHashtag(baseDir, tag):
return
_downloadNewswireFeedFavicon(session, baseDir, link, debug)
newswire[dateStr] = [
title,
link,
@ -314,7 +379,8 @@ def _xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
moderated: bool, mirrored: bool,
maxPostsPerSource: int,
maxFeedItemSizeKb: int,
maxCategoriesFeedItemSizeKb: int) -> {}:
maxCategoriesFeedItemSizeKb: int,
session, debug: bool) -> {}:
"""Converts an xml RSS 2.0 string to a dictionary
"""
if '<item>' not in xmlStr:
@ -383,7 +449,7 @@ def _xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
title, link,
votesStatus, postFilename,
description, moderated,
mirrored, [], 32)
mirrored, [], 32, session, debug)
postCtr += 1
if postCtr >= maxPostsPerSource:
break
@ -397,7 +463,8 @@ def _xml1StrToDict(baseDir: str, domain: str, xmlStr: str,
moderated: bool, mirrored: bool,
maxPostsPerSource: int,
maxFeedItemSizeKb: int,
maxCategoriesFeedItemSizeKb: int) -> {}:
maxCategoriesFeedItemSizeKb: int,
session, debug: bool) -> {}:
"""Converts an xml RSS 1.0 string to a dictionary
https://validator.w3.org/feed/docs/rss1.html
"""
@ -470,7 +537,7 @@ def _xml1StrToDict(baseDir: str, domain: str, xmlStr: str,
title, link,
votesStatus, postFilename,
description, moderated,
mirrored, [], 32)
mirrored, [], 32, session, debug)
postCtr += 1
if postCtr >= maxPostsPerSource:
break
@ -483,7 +550,8 @@ def _xml1StrToDict(baseDir: str, domain: str, xmlStr: str,
def _atomFeedToDict(baseDir: str, domain: str, xmlStr: str,
moderated: bool, mirrored: bool,
maxPostsPerSource: int,
maxFeedItemSizeKb: int) -> {}:
maxFeedItemSizeKb: int,
session, debug: bool) -> {}:
"""Converts an atom feed string to a dictionary
"""
if '<entry>' not in xmlStr:
@ -545,7 +613,7 @@ def _atomFeedToDict(baseDir: str, domain: str, xmlStr: str,
title, link,
votesStatus, postFilename,
description, moderated,
mirrored, [], 32)
mirrored, [], 32, session, debug)
postCtr += 1
if postCtr >= maxPostsPerSource:
break
@ -558,7 +626,8 @@ def _atomFeedToDict(baseDir: str, domain: str, xmlStr: str,
def _jsonFeedV1ToDict(baseDir: str, domain: str, xmlStr: str,
moderated: bool, mirrored: bool,
maxPostsPerSource: int,
maxFeedItemSizeKb: int) -> {}:
maxFeedItemSizeKb: int,
session, debug: bool) -> {}:
"""Converts a json feed string to a dictionary
See https://jsonfeed.org/version/1.1
"""
@ -656,7 +725,7 @@ def _jsonFeedV1ToDict(baseDir: str, domain: str, xmlStr: str,
title, link,
votesStatus, postFilename,
description, moderated,
mirrored, [], 32)
mirrored, [], 32, session, debug)
postCtr += 1
if postCtr >= maxPostsPerSource:
break
@ -669,7 +738,8 @@ def _jsonFeedV1ToDict(baseDir: str, domain: str, xmlStr: str,
def _atomFeedYTToDict(baseDir: str, domain: str, xmlStr: str,
moderated: bool, mirrored: bool,
maxPostsPerSource: int,
maxFeedItemSizeKb: int) -> {}:
maxFeedItemSizeKb: int,
session, debug: bool) -> {}:
"""Converts an atom-style YouTube feed string to a dictionary
"""
if '<entry>' not in xmlStr:
@ -728,7 +798,7 @@ def _atomFeedYTToDict(baseDir: str, domain: str, xmlStr: str,
title, link,
votesStatus, postFilename,
description, moderated, mirrored,
[], 32)
[], 32, session, debug)
postCtr += 1
if postCtr >= maxPostsPerSource:
break
@ -741,32 +811,38 @@ def _xmlStrToDict(baseDir: str, domain: str, xmlStr: str,
moderated: bool, mirrored: bool,
maxPostsPerSource: int,
maxFeedItemSizeKb: int,
maxCategoriesFeedItemSizeKb: int) -> {}:
maxCategoriesFeedItemSizeKb: int,
session, debug: bool) -> {}:
"""Converts an xml string to a dictionary
"""
if '<yt:videoId>' in xmlStr and '<yt:channelId>' in xmlStr:
print('YouTube feed: reading')
return _atomFeedYTToDict(baseDir, domain,
xmlStr, moderated, mirrored,
maxPostsPerSource, maxFeedItemSizeKb)
maxPostsPerSource, maxFeedItemSizeKb,
session, debug)
elif 'rss version="2.0"' in xmlStr:
return _xml2StrToDict(baseDir, domain,
xmlStr, moderated, mirrored,
maxPostsPerSource, maxFeedItemSizeKb,
maxCategoriesFeedItemSizeKb)
maxCategoriesFeedItemSizeKb,
session, debug)
elif '<?xml version="1.0"' in xmlStr:
return _xml1StrToDict(baseDir, domain,
xmlStr, moderated, mirrored,
maxPostsPerSource, maxFeedItemSizeKb,
maxCategoriesFeedItemSizeKb)
maxCategoriesFeedItemSizeKb,
session, debug)
elif 'xmlns="http://www.w3.org/2005/Atom"' in xmlStr:
return _atomFeedToDict(baseDir, domain,
xmlStr, moderated, mirrored,
maxPostsPerSource, maxFeedItemSizeKb)
maxPostsPerSource, maxFeedItemSizeKb,
session, debug)
elif 'https://jsonfeed.org/version/1' in xmlStr:
return _jsonFeedV1ToDict(baseDir, domain,
xmlStr, moderated, mirrored,
maxPostsPerSource, maxFeedItemSizeKb)
maxPostsPerSource, maxFeedItemSizeKb,
session, debug)
return {}
@ -786,7 +862,7 @@ def getRSS(baseDir: str, domain: str, session, url: str,
moderated: bool, mirrored: bool,
maxPostsPerSource: int, maxFeedSizeKb: int,
maxFeedItemSizeKb: int,
maxCategoriesFeedItemSizeKb: int) -> {}:
maxCategoriesFeedItemSizeKb: int, debug: bool) -> {}:
"""Returns an RSS url as a dict
"""
if not isinstance(url, str):
@ -817,7 +893,8 @@ def getRSS(baseDir: str, domain: str, session, url: str,
moderated, mirrored,
maxPostsPerSource,
maxFeedItemSizeKb,
maxCategoriesFeedItemSizeKb)
maxCategoriesFeedItemSizeKb,
session, debug)
else:
print('WARN: feed is too large, ' +
'or contains invalid characters: ' + url)
@ -928,7 +1005,8 @@ def _addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
newswire: {},
maxBlogsPerAccount: int,
indexFilename: str,
maxTags: int, systemLanguage: str) -> None:
maxTags: int, systemLanguage: str,
session, debug: bool) -> None:
"""Adds blogs for the given account to the newswire
"""
if not os.path.isfile(indexFilename):
@ -992,7 +1070,7 @@ def _addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
votes, fullPostFilename,
description, moderated, False,
tagsFromPost,
maxTags)
maxTags, session, debug)
ctr += 1
if ctr >= maxBlogsPerAccount:
@ -1001,7 +1079,8 @@ def _addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
def _addBlogsToNewswire(baseDir: str, domain: str, newswire: {},
maxBlogsPerAccount: int,
maxTags: int, systemLanguage: str) -> None:
maxTags: int, systemLanguage: str,
session, debug: bool) -> None:
"""Adds blogs from each user account into the newswire
"""
moderationDict = {}
@ -1030,7 +1109,8 @@ def _addBlogsToNewswire(baseDir: str, domain: str, newswire: {},
_addAccountBlogsToNewswire(baseDir, nickname, domain,
newswire, maxBlogsPerAccount,
blogsIndex, maxTags,
systemLanguage)
systemLanguage, session,
debug)
break
# sort the moderation dict into chronological order, latest first
@ -1055,7 +1135,7 @@ def getDictFromNewswire(session, baseDir: str, domain: str,
maxTags: int, maxFeedItemSizeKb: int,
maxNewswirePosts: int,
maxCategoriesFeedItemSizeKb: int,
systemLanguage: str) -> {}:
systemLanguage: str, debug: bool) -> {}:
"""Gets rss feeds as a dictionary from newswire file
"""
subscriptionsFilename = baseDir + '/accounts/newswire.txt'
@ -1096,14 +1176,15 @@ def getDictFromNewswire(session, baseDir: str, domain: str,
moderated, mirrored,
maxPostsPerSource, maxFeedSizeKb,
maxFeedItemSizeKb,
maxCategoriesFeedItemSizeKb)
maxCategoriesFeedItemSizeKb, debug)
if itemsList:
for dateStr, item in itemsList.items():
result[dateStr] = item
# add blogs from each user account
_addBlogsToNewswire(baseDir, domain, result,
maxPostsPerSource, maxTags, systemLanguage)
maxPostsPerSource, maxTags, systemLanguage,
session, debug)
# sort into chronological order, latest first
sortedResult = OrderedDict(sorted(result.items(), reverse=True))

View File

@ -394,7 +394,7 @@ def postImage(session, attachImageFilename: str, federationList: [],
def downloadImage(session, baseDir: str, url: str,
imageFilename: str, debug: bool,
force: bool = False) -> bool:
"""Downloads an image
"""Downloads an image with an expected mime type
"""
if not url:
return None
@ -407,7 +407,8 @@ def downloadImage(session, baseDir: str, url: str,
'gif': 'gif',
'svg': 'svg+xml',
'webp': 'webp',
'avif': 'avif'
'avif': 'avif',
'ico': 'x-icon'
}
sessionHeaders = None
for imFormat, mimeType in imageFormats.items():
@ -452,3 +453,62 @@ def downloadImage(session, baseDir: str, url: str,
print('EX: Failed to download image: ' +
str(url) + ' ' + str(e))
return False
def downloadImageAnyMimeType(session, url: str, timeoutSec: int, debug: bool):
"""http GET for an image with any mime type
"""
mimeType = None
contentType = None
result = None
sessionHeaders = {
'Accept': 'image/x-icon, image/png, image/webp, image/jpeg, image/gif'
}
try:
result = session.get(url, headers=sessionHeaders, timeout=timeoutSec)
except requests.exceptions.RequestException as e:
print('ERROR: downloadImageAnyMimeType failed: ' +
str(url) + ', ' + str(e))
return None, None
except ValueError as e:
print('ERROR: downloadImageAnyMimeType failed: ' +
str(url) + ', ' + str(e))
return None, None
except SocketError as e:
if e.errno == errno.ECONNRESET:
print('WARN: downloadImageAnyMimeType failed, ' +
'connection was reset ' + str(e))
return None, None
if not result:
return None, None
if result.status_code != 200:
print('WARN: downloadImageAnyMimeType: ' + url +
' failed with error code ' + str(result.status_code))
return None, None
if result.headers.get('content-type'):
contentType = result.headers['content-type']
elif result.headers.get('Content-type'):
contentType = result.headers['Content-type']
elif result.headers.get('Content-Type'):
contentType = result.headers['Content-Type']
if not contentType:
return None, None
imageFormats = {
'ico': 'x-icon',
'png': 'png',
'jpg': 'jpeg',
'jpeg': 'jpeg',
'gif': 'gif',
'svg': 'svg+xml',
'webp': 'webp',
'avif': 'avif'
}
for imFormat, mType in imageFormats.items():
if 'image/' + mType in contentType:
mimeType = 'image/' + mType
return result.content, mimeType

View File

@ -346,7 +346,7 @@ def getAudioExtensions() -> []:
def getImageExtensions() -> []:
"""Returns a list of the possible image file extensions
"""
return ('png', 'jpg', 'jpeg', 'gif', 'webp', 'avif', 'svg')
return ('png', 'jpg', 'jpeg', 'gif', 'webp', 'avif', 'svg', 'ico')
def getImageMimeType(imageFilename: str) -> str:
@ -358,7 +358,8 @@ def getImageMimeType(imageFilename: str) -> str:
'gif': 'gif',
'avif': 'avif',
'svg': 'svg+xml',
'webp': 'webp'
'webp': 'webp',
'ico': 'x-icon'
}
for ext, mimeExt in extensionsToMime.items():
if imageFilename.endswith('.' + ext):
@ -375,7 +376,8 @@ def getImageExtensionFromMimeType(contentType: str) -> str:
'gif': 'gif',
'svg+xml': 'svg',
'webp': 'webp',
'avif': 'avif'
'avif': 'avif',
'x-icon': 'ico'
}
for mimeExt, ext in imageMedia.items():
if contentType.endswith(mimeExt):
@ -2482,6 +2484,7 @@ def mediaFileMimeType(filename: str) -> str:
'svg': 'image/svg+xml',
'webp': 'image/webp',
'avif': 'image/avif',
'ico': 'image/x-icon',
'mp3': 'audio/mpeg',
'ogg': 'audio/ogg',
'flac': 'audio/flac',
@ -3219,3 +3222,13 @@ def getNewPostEndpoints() -> []:
'newreminder', 'newreport', 'newquestion', 'newshare', 'newwanted',
'editblogpost'
)
def getFavFilenameFromUrl(baseDir: str, faviconUrl: str) -> str:
"""Returns the cached filename for a favicon based upon its url
"""
if '://' in faviconUrl:
faviconUrl = faviconUrl.split('://')[1]
if '/favicon.' in faviconUrl:
faviconUrl = faviconUrl.replace('/favicon.', '.')
return baseDir + '/favicons/' + faviconUrl.replace('/', '-')

View File

@ -11,6 +11,7 @@ import os
from datetime import datetime
from content import removeLongWords
from content import limitRepeatedWords
from utils import getFavFilenameFromUrl
from utils import getBaseContentFromPost
from utils import removeHtml
from utils import locatePost
@ -22,6 +23,7 @@ from utils import getConfigParam
from utils import removeDomainPort
from utils import acctDir
from posts import isModerator
from newswire import getNewswireFaviconUrl
from webapp_utils import getRightImageFile
from webapp_utils import htmlHeaderWithExternalStyle
from webapp_utils import htmlFooter
@ -210,22 +212,6 @@ def _getBrokenFavSubstitute() -> str:
return " onerror=\"this.onerror=null; this.src='/newswire_favicon.ico'\""
def _getNewswireFavicon(url: str) -> str:
"""Returns a favicon url from the given article link
"""
if '://' not in url:
return '/newswire_favicon.ico'
if url.startswith('http://'):
if not (url.endswith('.onion') or url.endswith('.i2p')):
return '/newswire_favicon.ico'
domain = url.split('://')[1]
if '/' not in domain:
return url + '/favicon.ico'
else:
domain = domain.split('/')[0]
return url.split('://')[0] + '://' + domain + '/favicon.ico'
def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool,
translate: {}, positiveVoting: bool) -> str:
"""Converts a newswire dict into html
@ -252,9 +238,24 @@ def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool,
dateStrLink = dateStr.replace('T', ' ')
dateStrLink = dateStrLink.replace('Z', '')
url = item[1]
faviconUrl = _getNewswireFavicon(url)
faviconUrl = getNewswireFaviconUrl(url)
faviconLink = ''
if faviconUrl:
cachedFaviconFilename = getFavFilenameFromUrl(baseDir, faviconUrl)
if os.path.isfile(cachedFaviconFilename):
faviconUrl = \
cachedFaviconFilename.replace(baseDir, '')
else:
extensions = ('png', 'jpg', 'gif', 'avif', 'svg', 'webp')
for ext in extensions:
cachedFaviconFilename = \
getFavFilenameFromUrl(baseDir, faviconUrl)
cachedFaviconFilename = \
cachedFaviconFilename.replace('.ico', '.' + ext)
if os.path.isfile(cachedFaviconFilename):
faviconUrl = \
cachedFaviconFilename.replace(baseDir, '')
faviconLink = \
'<img loading="lazy" src="' + faviconUrl + '" ' + \
'alt="" ' + _getBrokenFavSubstitute() + '/>'