From 64c41279b4f32d6b8c7cb2d8629fbeb344118d62 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 20:57:30 +0000 Subject: [PATCH 01/37] Save newswire favicons --- epicyon.py | 2 +- newsdaemon.py | 3 +- newswire.py | 111 ++++++++++++++++++++++++++++++++++++++------------ session.py | 24 +++++++++++ 4 files changed, 112 insertions(+), 28 deletions(-) diff --git a/epicyon.py b/epicyon.py index 52b0fcca0..391650119 100644 --- a/epicyon.py +++ b/epicyon.py @@ -1016,7 +1016,7 @@ if args.domain: if args.rss: session = createSession(None) testRSS = getRSS(baseDir, domain, session, args.rss, - False, False, 1000, 1000, 1000, 1000) + False, False, 1000, 1000, 1000, 1000, debug) pprint(testRSS) sys.exit() diff --git a/newsdaemon.py b/newsdaemon.py index dc5b35e0b..a4f340b4b 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -801,7 +801,8 @@ def runNewswireDaemon(baseDir: str, httpd, httpd.maxFeedItemSizeKb, httpd.maxNewswirePosts, httpd.maxCategoriesFeedItemSizeKb, - httpd.systemLanguage) + httpd.systemLanguage, + httpd.debug) if not httpd.newswire: print('Newswire feeds not updated') diff --git a/newswire.py b/newswire.py index 803c15b48..a16482270 100644 --- a/newswire.py +++ b/newswire.py @@ -34,6 +34,7 @@ from utils import localActorUrl from blocking import isBlockedDomain from blocking import isBlockedHashtag from filters import isFiltered +from session import getImageBinaryFromUrl def _removeCDATA(text: str) -> str: @@ -126,6 +127,46 @@ def limitWordLengths(text: str, maxWordLength: int) -> str: return result +def _getNewswireFaviconUrl(url: str) -> str: + """Returns a favicon url from the given article link + """ + if '://' not in url: + return '/newswire_favicon.ico' + if url.startswith('http://'): + if not (url.endswith('.onion') or url.endswith('.i2p')): + return '/newswire_favicon.ico' + domain = url.split('://')[1] + if '/' not in domain: + return url + '/favicon.ico' + else: + domain = domain.split('/')[0] + return url.split('://')[0] + '://' + domain + '/favicon.ico' + + +def _downloadNewswireFeedFavicon(session, baseDir: str, + link: str, debug: bool) -> bool: + """Downloads the favicon for the given feed link + """ + url = _getNewswireFaviconUrl(link) + if '://' not in link: + return False + timeoutSec = 10 + imageData = getImageBinaryFromUrl(session, url, timeoutSec, debug) + if not imageData: + return False + if not os.path.isdir(baseDir + '/favicons'): + os.mkdir(baseDir + '/favicons') + linkFilename = url.replace('/', '#') + imageFilename = baseDir + '/favicons/' + linkFilename + try: + with open(imageFilename, 'wb+') as fp: + fp.write(imageData) + except OSError: + print('EX: failed writing favicon ' + url) + return False + return True + + def _addNewswireDictEntry(baseDir: str, domain: str, newswire: {}, dateStr: str, title: str, link: str, @@ -133,7 +174,7 @@ def _addNewswireDictEntry(baseDir: str, domain: str, description: str, moderated: bool, mirrored: bool, tags: [], - maxTags: int) -> None: + maxTags: int, session, debug: bool) -> None: """Update the newswire dictionary """ # remove any markup @@ -166,6 +207,8 @@ def _addNewswireDictEntry(baseDir: str, domain: str, if isBlockedHashtag(baseDir, tag): return + _downloadNewswireFeedFavicon(session, baseDir, link, debug) + newswire[dateStr] = [ title, link, @@ -309,7 +352,8 @@ def _xml2StrToDict(baseDir: str, domain: str, xmlStr: str, moderated: bool, mirrored: bool, maxPostsPerSource: int, maxFeedItemSizeKb: int, - maxCategoriesFeedItemSizeKb: int) -> {}: + maxCategoriesFeedItemSizeKb: int, + session, debug: bool) -> {}: """Converts an xml RSS 2.0 string to a dictionary """ if '' not in xmlStr: @@ -378,7 +422,7 @@ def _xml2StrToDict(baseDir: str, domain: str, xmlStr: str, title, link, votesStatus, postFilename, description, moderated, - mirrored, [], 32) + mirrored, [], 32, session, debug) postCtr += 1 if postCtr >= maxPostsPerSource: break @@ -392,7 +436,8 @@ def _xml1StrToDict(baseDir: str, domain: str, xmlStr: str, moderated: bool, mirrored: bool, maxPostsPerSource: int, maxFeedItemSizeKb: int, - maxCategoriesFeedItemSizeKb: int) -> {}: + maxCategoriesFeedItemSizeKb: int, + session, debug: bool) -> {}: """Converts an xml RSS 1.0 string to a dictionary https://validator.w3.org/feed/docs/rss1.html """ @@ -465,7 +510,7 @@ def _xml1StrToDict(baseDir: str, domain: str, xmlStr: str, title, link, votesStatus, postFilename, description, moderated, - mirrored, [], 32) + mirrored, [], 32, session, debug) postCtr += 1 if postCtr >= maxPostsPerSource: break @@ -478,7 +523,8 @@ def _xml1StrToDict(baseDir: str, domain: str, xmlStr: str, def _atomFeedToDict(baseDir: str, domain: str, xmlStr: str, moderated: bool, mirrored: bool, maxPostsPerSource: int, - maxFeedItemSizeKb: int) -> {}: + maxFeedItemSizeKb: int, + session, debug: bool) -> {}: """Converts an atom feed string to a dictionary """ if '' not in xmlStr: @@ -540,7 +586,7 @@ def _atomFeedToDict(baseDir: str, domain: str, xmlStr: str, title, link, votesStatus, postFilename, description, moderated, - mirrored, [], 32) + mirrored, [], 32, session, debug) postCtr += 1 if postCtr >= maxPostsPerSource: break @@ -553,7 +599,8 @@ def _atomFeedToDict(baseDir: str, domain: str, xmlStr: str, def _jsonFeedV1ToDict(baseDir: str, domain: str, xmlStr: str, moderated: bool, mirrored: bool, maxPostsPerSource: int, - maxFeedItemSizeKb: int) -> {}: + maxFeedItemSizeKb: int, + session, debug: bool) -> {}: """Converts a json feed string to a dictionary See https://jsonfeed.org/version/1.1 """ @@ -651,7 +698,7 @@ def _jsonFeedV1ToDict(baseDir: str, domain: str, xmlStr: str, title, link, votesStatus, postFilename, description, moderated, - mirrored, [], 32) + mirrored, [], 32, session, debug) postCtr += 1 if postCtr >= maxPostsPerSource: break @@ -664,7 +711,8 @@ def _jsonFeedV1ToDict(baseDir: str, domain: str, xmlStr: str, def _atomFeedYTToDict(baseDir: str, domain: str, xmlStr: str, moderated: bool, mirrored: bool, maxPostsPerSource: int, - maxFeedItemSizeKb: int) -> {}: + maxFeedItemSizeKb: int, + session, debug: bool) -> {}: """Converts an atom-style YouTube feed string to a dictionary """ if '' not in xmlStr: @@ -723,7 +771,7 @@ def _atomFeedYTToDict(baseDir: str, domain: str, xmlStr: str, title, link, votesStatus, postFilename, description, moderated, mirrored, - [], 32) + [], 32, session, debug) postCtr += 1 if postCtr >= maxPostsPerSource: break @@ -736,32 +784,38 @@ def _xmlStrToDict(baseDir: str, domain: str, xmlStr: str, moderated: bool, mirrored: bool, maxPostsPerSource: int, maxFeedItemSizeKb: int, - maxCategoriesFeedItemSizeKb: int) -> {}: + maxCategoriesFeedItemSizeKb: int, + session, debug: bool) -> {}: """Converts an xml string to a dictionary """ if '' in xmlStr and '' in xmlStr: print('YouTube feed: reading') return _atomFeedYTToDict(baseDir, domain, xmlStr, moderated, mirrored, - maxPostsPerSource, maxFeedItemSizeKb) + maxPostsPerSource, maxFeedItemSizeKb, + session, debug) elif 'rss version="2.0"' in xmlStr: return _xml2StrToDict(baseDir, domain, xmlStr, moderated, mirrored, maxPostsPerSource, maxFeedItemSizeKb, - maxCategoriesFeedItemSizeKb) + maxCategoriesFeedItemSizeKb, + session, debug) elif ' {}: + maxCategoriesFeedItemSizeKb: int, debug: bool) -> {}: """Returns an RSS url as a dict """ if not isinstance(url, str): @@ -812,7 +866,8 @@ def getRSS(baseDir: str, domain: str, session, url: str, moderated, mirrored, maxPostsPerSource, maxFeedItemSizeKb, - maxCategoriesFeedItemSizeKb) + maxCategoriesFeedItemSizeKb, + session, debug) else: print('WARN: feed is too large, ' + 'or contains invalid characters: ' + url) @@ -923,7 +978,8 @@ def _addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str, newswire: {}, maxBlogsPerAccount: int, indexFilename: str, - maxTags: int, systemLanguage: str) -> None: + maxTags: int, systemLanguage: str, + session, debug: bool) -> None: """Adds blogs for the given account to the newswire """ if not os.path.isfile(indexFilename): @@ -987,7 +1043,7 @@ def _addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str, votes, fullPostFilename, description, moderated, False, tagsFromPost, - maxTags) + maxTags, session, debug) ctr += 1 if ctr >= maxBlogsPerAccount: @@ -996,7 +1052,8 @@ def _addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str, def _addBlogsToNewswire(baseDir: str, domain: str, newswire: {}, maxBlogsPerAccount: int, - maxTags: int, systemLanguage: str) -> None: + maxTags: int, systemLanguage: str, + session, debug: bool) -> None: """Adds blogs from each user account into the newswire """ moderationDict = {} @@ -1025,7 +1082,8 @@ def _addBlogsToNewswire(baseDir: str, domain: str, newswire: {}, _addAccountBlogsToNewswire(baseDir, nickname, domain, newswire, maxBlogsPerAccount, blogsIndex, maxTags, - systemLanguage) + systemLanguage, session, + debug) break # sort the moderation dict into chronological order, latest first @@ -1050,7 +1108,7 @@ def getDictFromNewswire(session, baseDir: str, domain: str, maxTags: int, maxFeedItemSizeKb: int, maxNewswirePosts: int, maxCategoriesFeedItemSizeKb: int, - systemLanguage: str) -> {}: + systemLanguage: str, debug: bool) -> {}: """Gets rss feeds as a dictionary from newswire file """ subscriptionsFilename = baseDir + '/accounts/newswire.txt' @@ -1091,14 +1149,15 @@ def getDictFromNewswire(session, baseDir: str, domain: str, moderated, mirrored, maxPostsPerSource, maxFeedSizeKb, maxFeedItemSizeKb, - maxCategoriesFeedItemSizeKb) + maxCategoriesFeedItemSizeKb, debug) if itemsList: for dateStr, item in itemsList.items(): result[dateStr] = item # add blogs from each user account _addBlogsToNewswire(baseDir, domain, result, - maxPostsPerSource, maxTags, systemLanguage) + maxPostsPerSource, maxTags, systemLanguage, + session, debug) # sort into chronological order, latest first sortedResult = OrderedDict(sorted(result.items(), reverse=True)) diff --git a/session.py b/session.py index 4eedae57e..d40b70be7 100644 --- a/session.py +++ b/session.py @@ -452,3 +452,27 @@ def downloadImage(session, baseDir: str, url: str, print('EX: Failed to download image: ' + str(url) + ' ' + str(e)) return False + + +def getImageBinaryFromUrl(session, url: str, timeoutSec: int, debug: bool): + """http GET for an image + """ + try: + result = session.get(url, timeout=timeoutSec) + if result.status_code != 200: + print('WARN: getImageFromUrl: ' + url + + ' failed with error code ' + str(result.status_code)) + return result.content + except requests.exceptions.RequestException as e: + if debug: + print('ERROR: getImageFromUrl failed: ' + str(url) + ', ' + + str(e)) + except ValueError as e: + if debug: + print('ERROR: getImageFromUrl failed: ' + str(url) + ', ' + + str(e)) + except SocketError as e: + if e.errno == errno.ECONNRESET: + print('WARN: getImageFromUrl failed, ' + + 'connection was reset ' + str(e)) + return None From 83b40675090e07f5607f1b2101cf23d03eb0c151 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 21:13:31 +0000 Subject: [PATCH 02/37] GET endpoint for cached favicons --- daemon.py | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/daemon.py b/daemon.py index 716c90b0a..dbcd23ba9 100644 --- a/daemon.py +++ b/daemon.py @@ -1562,7 +1562,7 @@ class PubServer(BaseHTTPRequestHandler): self.authorizedNickname = None notAuthPaths = ( - '/icons/', '/avatars/', + '/icons/', '/avatars/', '/favicons/', '/system/accounts/avatars/', '/system/accounts/headers/', '/system/media_attachments/files/', @@ -7407,6 +7407,36 @@ class PubServer(BaseHTTPRequestHandler): return self._404() + def _showCachedFavicon(self, refererDomain: str, path: str, + baseDir: str, GETstartTime) -> None: + """Shows a favicon image obtained from the cache + """ + mediaFilename = baseDir + '/favicons' + path + if os.path.isfile(mediaFilename): + if self._etag_exists(mediaFilename): + # The file has not changed + self._304() + return + mediaBinary = None + try: + with open(mediaFilename, 'rb') as avFile: + mediaBinary = avFile.read() + except OSError: + print('EX: unable to read cached favicon ' + mediaFilename) + if mediaBinary: + mimeType = mediaFileMimeType(mediaFilename) + self._set_headers_etag(mediaFilename, + mimeType, + mediaBinary, None, + refererDomain, + False, None) + self._write(mediaBinary) + fitnessPerformance(GETstartTime, self.server.fitness, + '_GET', '_showCachedFavicon', + self.server.debug) + return + self._404() + def _showCachedAvatar(self, refererDomain: str, path: str, baseDir: str, GETstartTime) -> None: """Shows an avatar image obtained from the cache @@ -12329,6 +12359,7 @@ class PubServer(BaseHTTPRequestHandler): '/emoji/' not in path and \ '/tags/' not in path and \ '/avatars/' not in path and \ + '/favicons/' not in path and \ '/headers/' not in path and \ '/fonts/' not in path and \ '/icons/' not in path: @@ -14732,6 +14763,14 @@ class PubServer(BaseHTTPRequestHandler): '_GET', 'help screen image done', self.server.debug) + # cached favicon images + # Note that this comes before the busy flag to avoid conflicts + if self.path.startswith('/favicons/'): + self._showCachedFavicon(refererDomain, self.path, + self.server.baseDir, + GETstartTime) + return + # cached avatar images # Note that this comes before the busy flag to avoid conflicts if self.path.startswith('/avatars/'): From 22d2f49f53b575de9009375a8864fe2bc2d187c0 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 21:14:24 +0000 Subject: [PATCH 03/37] Only download favicons once --- newswire.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/newswire.py b/newswire.py index a16482270..b3a65fd44 100644 --- a/newswire.py +++ b/newswire.py @@ -158,6 +158,8 @@ def _downloadNewswireFeedFavicon(session, baseDir: str, os.mkdir(baseDir + '/favicons') linkFilename = url.replace('/', '#') imageFilename = baseDir + '/favicons/' + linkFilename + if os.path.isfile(imageFilename): + return True try: with open(imageFilename, 'wb+') as fp: fp.write(imageData) From 71cf29629ae0d3d362e89c09ab4044694dade8ff Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 21:22:44 +0000 Subject: [PATCH 04/37] Avoid duplicated function --- newswire.py | 4 ++-- webapp_column_right.py | 19 ++----------------- 2 files changed, 4 insertions(+), 19 deletions(-) diff --git a/newswire.py b/newswire.py index b3a65fd44..907eb40c4 100644 --- a/newswire.py +++ b/newswire.py @@ -127,7 +127,7 @@ def limitWordLengths(text: str, maxWordLength: int) -> str: return result -def _getNewswireFaviconUrl(url: str) -> str: +def getNewswireFaviconUrl(url: str) -> str: """Returns a favicon url from the given article link """ if '://' not in url: @@ -147,7 +147,7 @@ def _downloadNewswireFeedFavicon(session, baseDir: str, link: str, debug: bool) -> bool: """Downloads the favicon for the given feed link """ - url = _getNewswireFaviconUrl(link) + url = getNewswireFaviconUrl(link) if '://' not in link: return False timeoutSec = 10 diff --git a/webapp_column_right.py b/webapp_column_right.py index 5f3b44d2c..bf1162270 100644 --- a/webapp_column_right.py +++ b/webapp_column_right.py @@ -22,6 +22,7 @@ from utils import getConfigParam from utils import removeDomainPort from utils import acctDir from posts import isModerator +from newswire import getNewswireFaviconUrl from webapp_utils import getRightImageFile from webapp_utils import htmlHeaderWithExternalStyle from webapp_utils import htmlFooter @@ -210,22 +211,6 @@ def _getBrokenFavSubstitute() -> str: return " onerror=\"this.onerror=null; this.src='/newswire_favicon.ico'\"" -def _getNewswireFavicon(url: str) -> str: - """Returns a favicon url from the given article link - """ - if '://' not in url: - return '/newswire_favicon.ico' - if url.startswith('http://'): - if not (url.endswith('.onion') or url.endswith('.i2p')): - return '/newswire_favicon.ico' - domain = url.split('://')[1] - if '/' not in domain: - return url + '/favicon.ico' - else: - domain = domain.split('/')[0] - return url.split('://')[0] + '://' + domain + '/favicon.ico' - - def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool, translate: {}, positiveVoting: bool) -> str: """Converts a newswire dict into html @@ -252,7 +237,7 @@ def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool, dateStrLink = dateStr.replace('T', ' ') dateStrLink = dateStrLink.replace('Z', '') url = item[1] - faviconUrl = _getNewswireFavicon(url) + faviconUrl = getNewswireFaviconUrl(url) faviconLink = '' if faviconUrl: faviconLink = \ From 2e681f5b58902953440e07d9ec4b8470c1222dcb Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 21:27:50 +0000 Subject: [PATCH 05/37] Link to cached favicons --- webapp_column_right.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/webapp_column_right.py b/webapp_column_right.py index bf1162270..fb7fc78c6 100644 --- a/webapp_column_right.py +++ b/webapp_column_right.py @@ -240,6 +240,10 @@ def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool, faviconUrl = getNewswireFaviconUrl(url) faviconLink = '' if faviconUrl: + favBase = '/favicons/' + faviconUrl.replace('/', '#') + cachedFaviconFilename = baseDir + favBase + if os.path.isfile(cachedFaviconFilename): + faviconUrl = favBase faviconLink = \ '' From e1b349c0fc91b4927270907074be4fcc9fcb75a0 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 22:01:12 +0000 Subject: [PATCH 06/37] Favicon path --- daemon.py | 2 +- newswire.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/daemon.py b/daemon.py index dbcd23ba9..19af15a2d 100644 --- a/daemon.py +++ b/daemon.py @@ -7411,7 +7411,7 @@ class PubServer(BaseHTTPRequestHandler): baseDir: str, GETstartTime) -> None: """Shows a favicon image obtained from the cache """ - mediaFilename = baseDir + '/favicons' + path + mediaFilename = baseDir + path if os.path.isfile(mediaFilename): if self._etag_exists(mediaFilename): # The file has not changed diff --git a/newswire.py b/newswire.py index 907eb40c4..49954a867 100644 --- a/newswire.py +++ b/newswire.py @@ -147,16 +147,16 @@ def _downloadNewswireFeedFavicon(session, baseDir: str, link: str, debug: bool) -> bool: """Downloads the favicon for the given feed link """ - url = getNewswireFaviconUrl(link) + favUrl = getNewswireFaviconUrl(link) if '://' not in link: return False timeoutSec = 10 - imageData = getImageBinaryFromUrl(session, url, timeoutSec, debug) + imageData = getImageBinaryFromUrl(session, favUrl, timeoutSec, debug) if not imageData: return False if not os.path.isdir(baseDir + '/favicons'): os.mkdir(baseDir + '/favicons') - linkFilename = url.replace('/', '#') + linkFilename = favUrl.replace('/', '#') imageFilename = baseDir + '/favicons/' + linkFilename if os.path.isfile(imageFilename): return True @@ -164,7 +164,7 @@ def _downloadNewswireFeedFavicon(session, baseDir: str, with open(imageFilename, 'wb+') as fp: fp.write(imageData) except OSError: - print('EX: failed writing favicon ' + url) + print('EX: failed writing favicon ' + favUrl) return False return True From 30d85eb04bea63826490b5860301d92ec1a81357 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 22:05:40 +0000 Subject: [PATCH 07/37] Change location --- daemon.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/daemon.py b/daemon.py index 19af15a2d..e02d7379a 100644 --- a/daemon.py +++ b/daemon.py @@ -14745,6 +14745,14 @@ class PubServer(BaseHTTPRequestHandler): '_GET', 'share image done', self.server.debug) + # cached favicon images + # Note that this comes before the busy flag to avoid conflicts + if self.path.startswith('/favicons/'): + self._showCachedFavicon(refererDomain, self.path, + self.server.baseDir, + GETstartTime) + return + # icon images # Note that this comes before the busy flag to avoid conflicts if self.path.startswith('/icons/'): @@ -14763,14 +14771,6 @@ class PubServer(BaseHTTPRequestHandler): '_GET', 'help screen image done', self.server.debug) - # cached favicon images - # Note that this comes before the busy flag to avoid conflicts - if self.path.startswith('/favicons/'): - self._showCachedFavicon(refererDomain, self.path, - self.server.baseDir, - GETstartTime) - return - # cached avatar images # Note that this comes before the busy flag to avoid conflicts if self.path.startswith('/avatars/'): From 0a86daeccb49217d7a6ed228b04ca4216f4fe28f Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 22:07:16 +0000 Subject: [PATCH 08/37] Change location --- daemon.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/daemon.py b/daemon.py index e02d7379a..87fb22391 100644 --- a/daemon.py +++ b/daemon.py @@ -14048,6 +14048,14 @@ class PubServer(BaseHTTPRequestHandler): '_GET', 'registered devices done', self.server.debug) + # cached favicon images + # Note that this comes before the busy flag to avoid conflicts + if self.path.startswith('/favicons/'): + self._showCachedFavicon(refererDomain, self.path, + self.server.baseDir, + GETstartTime) + return + if htmlGET and usersInPath: # show the person options screen with view/follow/block/report if '?options=' in self.path: @@ -14745,14 +14753,6 @@ class PubServer(BaseHTTPRequestHandler): '_GET', 'share image done', self.server.debug) - # cached favicon images - # Note that this comes before the busy flag to avoid conflicts - if self.path.startswith('/favicons/'): - self._showCachedFavicon(refererDomain, self.path, - self.server.baseDir, - GETstartTime) - return - # icon images # Note that this comes before the busy flag to avoid conflicts if self.path.startswith('/icons/'): From 192ebebe9ec87d9e700e6836cfbfefb546a624c0 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 22:13:12 +0000 Subject: [PATCH 09/37] Unquote --- daemon.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/daemon.py b/daemon.py index 87fb22391..6014b66ea 100644 --- a/daemon.py +++ b/daemon.py @@ -7411,7 +7411,8 @@ class PubServer(BaseHTTPRequestHandler): baseDir: str, GETstartTime) -> None: """Shows a favicon image obtained from the cache """ - mediaFilename = baseDir + path + mediaFilename = baseDir + urllib.parse.unquote_plus(path) + print('showCachedFavicon: ' + showCachedFavicon) if os.path.isfile(mediaFilename): if self._etag_exists(mediaFilename): # The file has not changed @@ -13678,6 +13679,14 @@ class PubServer(BaseHTTPRequestHandler): '_GET', 'hasAccept', self.server.debug) + # cached favicon images + # Note that this comes before the busy flag to avoid conflicts + if htmlGET and self.path.startswith('/favicons/'): + self._showCachedFavicon(refererDomain, self.path, + self.server.baseDir, + GETstartTime) + return + # get css # Note that this comes before the busy flag to avoid conflicts if self.path.endswith('.css'): @@ -14048,14 +14057,6 @@ class PubServer(BaseHTTPRequestHandler): '_GET', 'registered devices done', self.server.debug) - # cached favicon images - # Note that this comes before the busy flag to avoid conflicts - if self.path.startswith('/favicons/'): - self._showCachedFavicon(refererDomain, self.path, - self.server.baseDir, - GETstartTime) - return - if htmlGET and usersInPath: # show the person options screen with view/follow/block/report if '?options=' in self.path: From d5806b1e28aeabddec9a51deb3364e5e4b57e72e Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 22:14:34 +0000 Subject: [PATCH 10/37] Debug --- daemon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/daemon.py b/daemon.py index 6014b66ea..4b733a38b 100644 --- a/daemon.py +++ b/daemon.py @@ -7412,7 +7412,7 @@ class PubServer(BaseHTTPRequestHandler): """Shows a favicon image obtained from the cache """ mediaFilename = baseDir + urllib.parse.unquote_plus(path) - print('showCachedFavicon: ' + showCachedFavicon) + print('showCachedFavicon: ' + mediaFilename) if os.path.isfile(mediaFilename): if self._etag_exists(mediaFilename): # The file has not changed From f9bbfdd8cde962434ba6631d2c46acd51338775f Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 22:21:16 +0000 Subject: [PATCH 11/37] Less indentation --- daemon.py | 48 +++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/daemon.py b/daemon.py index 4b733a38b..04a6fd22b 100644 --- a/daemon.py +++ b/daemon.py @@ -7413,29 +7413,31 @@ class PubServer(BaseHTTPRequestHandler): """ mediaFilename = baseDir + urllib.parse.unquote_plus(path) print('showCachedFavicon: ' + mediaFilename) - if os.path.isfile(mediaFilename): - if self._etag_exists(mediaFilename): - # The file has not changed - self._304() - return - mediaBinary = None - try: - with open(mediaFilename, 'rb') as avFile: - mediaBinary = avFile.read() - except OSError: - print('EX: unable to read cached favicon ' + mediaFilename) - if mediaBinary: - mimeType = mediaFileMimeType(mediaFilename) - self._set_headers_etag(mediaFilename, - mimeType, - mediaBinary, None, - refererDomain, - False, None) - self._write(mediaBinary) - fitnessPerformance(GETstartTime, self.server.fitness, - '_GET', '_showCachedFavicon', - self.server.debug) - return + if not os.path.isfile(mediaFilename): + self._404() + return + if self._etag_exists(mediaFilename): + # The file has not changed + self._304() + return + mediaBinary = None + try: + with open(mediaFilename, 'rb') as avFile: + mediaBinary = avFile.read() + except OSError: + print('EX: unable to read cached favicon ' + mediaFilename) + if mediaBinary: + mimeType = mediaFileMimeType(mediaFilename) + self._set_headers_etag(mediaFilename, + mimeType, + mediaBinary, None, + refererDomain, + False, None) + self._write(mediaBinary) + fitnessPerformance(GETstartTime, self.server.fitness, + '_GET', '_showCachedFavicon', + self.server.debug) + return self._404() def _showCachedAvatar(self, refererDomain: str, path: str, From e9b98b82cece5444ca8b3f8c9463a62f83b439dc Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 22:26:46 +0000 Subject: [PATCH 12/37] Dash instead of hash --- daemon.py | 1 - newswire.py | 2 +- webapp_column_right.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/daemon.py b/daemon.py index 04a6fd22b..f27c50256 100644 --- a/daemon.py +++ b/daemon.py @@ -7412,7 +7412,6 @@ class PubServer(BaseHTTPRequestHandler): """Shows a favicon image obtained from the cache """ mediaFilename = baseDir + urllib.parse.unquote_plus(path) - print('showCachedFavicon: ' + mediaFilename) if not os.path.isfile(mediaFilename): self._404() return diff --git a/newswire.py b/newswire.py index 49954a867..ceab0dcec 100644 --- a/newswire.py +++ b/newswire.py @@ -156,7 +156,7 @@ def _downloadNewswireFeedFavicon(session, baseDir: str, return False if not os.path.isdir(baseDir + '/favicons'): os.mkdir(baseDir + '/favicons') - linkFilename = favUrl.replace('/', '#') + linkFilename = favUrl.replace('/', '-') imageFilename = baseDir + '/favicons/' + linkFilename if os.path.isfile(imageFilename): return True diff --git a/webapp_column_right.py b/webapp_column_right.py index fb7fc78c6..4cf7a005b 100644 --- a/webapp_column_right.py +++ b/webapp_column_right.py @@ -240,7 +240,7 @@ def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool, faviconUrl = getNewswireFaviconUrl(url) faviconLink = '' if faviconUrl: - favBase = '/favicons/' + faviconUrl.replace('/', '#') + favBase = '/favicons/' + faviconUrl.replace('/', '-') cachedFaviconFilename = baseDir + favBase if os.path.isfile(cachedFaviconFilename): faviconUrl = favBase From 3a488db6d1cfd99f2e6f86a4785fc15a1e8707e0 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 22:39:05 +0000 Subject: [PATCH 13/37] Cache favicons in memory --- daemon.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/daemon.py b/daemon.py index f27c50256..2a51d726e 100644 --- a/daemon.py +++ b/daemon.py @@ -7411,7 +7411,20 @@ class PubServer(BaseHTTPRequestHandler): baseDir: str, GETstartTime) -> None: """Shows a favicon image obtained from the cache """ + favFile = path.replace('/favicons/', '') mediaFilename = baseDir + urllib.parse.unquote_plus(path) + if self.server.faviconsCache.get(favFile): + mediaBinary = self.server.faviconsCache[favFile] + self._set_headers_etag(mediaFilename, + 'image/x-icon', + mediaBinary, None, + refererDomain, + False, None) + self._write(mediaBinary) + fitnessPerformance(GETstartTime, self.server.fitness, + '_GET', '_showCachedFavicon2', + self.server.debug) + return if not os.path.isfile(mediaFilename): self._404() return @@ -7426,9 +7439,8 @@ class PubServer(BaseHTTPRequestHandler): except OSError: print('EX: unable to read cached favicon ' + mediaFilename) if mediaBinary: - mimeType = mediaFileMimeType(mediaFilename) self._set_headers_etag(mediaFilename, - mimeType, + 'image/x-icon', mediaBinary, None, refererDomain, False, None) @@ -7436,6 +7448,7 @@ class PubServer(BaseHTTPRequestHandler): fitnessPerformance(GETstartTime, self.server.fitness, '_GET', '_showCachedFavicon', self.server.debug) + self.server.faviconsCache[favFile] = mediaBinary return self._404() @@ -18664,6 +18677,7 @@ def runDaemon(contentLicenseUrl: str, httpd.instanceId = instanceId httpd.personCache = {} httpd.cachedWebfingers = {} + httpd.faviconsCache = {} httpd.proxyType = proxyType httpd.session = None httpd.sessionLastUpdate = 0 From 06165e76a9381d5bed32b1b6499a199c627f6f86 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 22:52:48 +0000 Subject: [PATCH 14/37] Debug --- daemon.py | 1 + 1 file changed, 1 insertion(+) diff --git a/daemon.py b/daemon.py index 2a51d726e..9495d9d71 100644 --- a/daemon.py +++ b/daemon.py @@ -7413,6 +7413,7 @@ class PubServer(BaseHTTPRequestHandler): """ favFile = path.replace('/favicons/', '') mediaFilename = baseDir + urllib.parse.unquote_plus(path) + print('showCachedFavicon: ' + mediaFilename) if self.server.faviconsCache.get(favFile): mediaBinary = self.server.faviconsCache[favFile] self._set_headers_etag(mediaFilename, From 791bbcf423391392a5c59ec362c067aa87a43d78 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 23:00:15 +0000 Subject: [PATCH 15/37] Avoid favicon confusion --- daemon.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/daemon.py b/daemon.py index 9495d9d71..302a681c0 100644 --- a/daemon.py +++ b/daemon.py @@ -13425,18 +13425,19 @@ class PubServer(BaseHTTPRequestHandler): # default newswire favicon, for links to sites which # have no favicon - if 'newswire_favicon.ico' in self.path: - self._getFavicon(callingDomain, self.server.baseDir, - self.server.debug, - 'newswire_favicon.ico') - return + if not self.path.startswith('/favicons/'): + if 'newswire_favicon.ico' in self.path: + self._getFavicon(callingDomain, self.server.baseDir, + self.server.debug, + 'newswire_favicon.ico') + return - # favicon image - if 'favicon.ico' in self.path: - self._getFavicon(callingDomain, self.server.baseDir, - self.server.debug, - 'favicon.ico') - return + # favicon image + if 'favicon.ico' in self.path: + self._getFavicon(callingDomain, self.server.baseDir, + self.server.debug, + 'favicon.ico') + return # check authorization authorized = self._isAuthorized() From 6da17599cae8c0a4be4e718186ea8d98802b0e09 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 23:32:13 +0000 Subject: [PATCH 16/37] Detect favicon mime type --- daemon.py | 13 +++++++++---- newswire.py | 9 ++++++++- session.py | 13 +++++++++++-- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/daemon.py b/daemon.py index 302a681c0..2c725975c 100644 --- a/daemon.py +++ b/daemon.py @@ -7416,8 +7416,9 @@ class PubServer(BaseHTTPRequestHandler): print('showCachedFavicon: ' + mediaFilename) if self.server.faviconsCache.get(favFile): mediaBinary = self.server.faviconsCache[favFile] + mimeType = mediaFileMimeType(mediaFilename) self._set_headers_etag(mediaFilename, - 'image/x-icon', + mimeType, mediaBinary, None, refererDomain, False, None) @@ -7427,8 +7428,11 @@ class PubServer(BaseHTTPRequestHandler): self.server.debug) return if not os.path.isfile(mediaFilename): - self._404() - return + originalMediaFilename = mediaFilename + mediaFilename = originalMediaFilename.replace('.ico', '.png') + if not os.path.isfile(mediaFilename): + self._404() + return if self._etag_exists(mediaFilename): # The file has not changed self._304() @@ -7440,8 +7444,9 @@ class PubServer(BaseHTTPRequestHandler): except OSError: print('EX: unable to read cached favicon ' + mediaFilename) if mediaBinary: + mimeType = mediaFileMimeType(mediaFilename) self._set_headers_etag(mediaFilename, - 'image/x-icon', + mimeType, mediaBinary, None, refererDomain, False, None) diff --git a/newswire.py b/newswire.py index ceab0dcec..a54a74b3a 100644 --- a/newswire.py +++ b/newswire.py @@ -151,7 +151,14 @@ def _downloadNewswireFeedFavicon(session, baseDir: str, if '://' not in link: return False timeoutSec = 10 - imageData = getImageBinaryFromUrl(session, favUrl, timeoutSec, debug) + imageData, mimeType = \ + getImageBinaryFromUrl(session, favUrl, timeoutSec, debug) + if 'image/png' in mimeType: + favUrl = favUrl.replace('.ico', '.png') + elif 'image/webp' in mimeType: + favUrl = favUrl.replace('.ico', '.webp') + elif 'image/gif' in mimeType: + favUrl = favUrl.replace('.ico', '.gif') if not imageData: return False if not os.path.isdir(baseDir + '/favicons'): diff --git a/session.py b/session.py index d40b70be7..a3e3473bc 100644 --- a/session.py +++ b/session.py @@ -462,7 +462,16 @@ def getImageBinaryFromUrl(session, url: str, timeoutSec: int, debug: bool): if result.status_code != 200: print('WARN: getImageFromUrl: ' + url + ' failed with error code ' + str(result.status_code)) - return result.content + mimeType = 'image/png' + if 'image/x-icon' in result.headers['content-length']: + mimeType = 'image/x-icon' + elif 'image/webp' in result.headers['content-length']: + mimeType = 'image/webp' + elif 'image/jpeg' in result.headers['content-length']: + mimeType = 'image/jpeg' + elif 'image/gif' in result.headers['content-length']: + mimeType = 'image/gif' + return result.content, mimeType except requests.exceptions.RequestException as e: if debug: print('ERROR: getImageFromUrl failed: ' + str(url) + ', ' + @@ -475,4 +484,4 @@ def getImageBinaryFromUrl(session, url: str, timeoutSec: int, debug: bool): if e.errno == errno.ECONNRESET: print('WARN: getImageFromUrl failed, ' + 'connection was reset ' + str(e)) - return None + return None, None From 5b4a3caa4ae7eaa865d4a4355ea47be78b6509b9 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 23:47:01 +0000 Subject: [PATCH 17/37] Detect icon type --- utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utils.py b/utils.py index 127b57a21..06544bdb0 100644 --- a/utils.py +++ b/utils.py @@ -2482,6 +2482,7 @@ def mediaFileMimeType(filename: str) -> str: 'svg': 'image/svg+xml', 'webp': 'image/webp', 'avif': 'image/avif', + 'ico': 'image/x-icon', 'mp3': 'audio/mpeg', 'ogg': 'audio/ogg', 'flac': 'audio/flac', From ca11153a36c502e255d5926289448a6372840a58 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 16 Dec 2021 23:59:53 +0000 Subject: [PATCH 18/37] Sequence --- newswire.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/newswire.py b/newswire.py index a54a74b3a..0d8c26400 100644 --- a/newswire.py +++ b/newswire.py @@ -153,14 +153,14 @@ def _downloadNewswireFeedFavicon(session, baseDir: str, timeoutSec = 10 imageData, mimeType = \ getImageBinaryFromUrl(session, favUrl, timeoutSec, debug) + if not imageData: + return False if 'image/png' in mimeType: favUrl = favUrl.replace('.ico', '.png') elif 'image/webp' in mimeType: favUrl = favUrl.replace('.ico', '.webp') elif 'image/gif' in mimeType: favUrl = favUrl.replace('.ico', '.gif') - if not imageData: - return False if not os.path.isdir(baseDir + '/favicons'): os.mkdir(baseDir + '/favicons') linkFilename = favUrl.replace('/', '-') From b5975917a69f9ac882492bbd61118cb9a5023ac1 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 09:48:45 +0000 Subject: [PATCH 19/37] Get content type --- session.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/session.py b/session.py index a3e3473bc..664dcffbe 100644 --- a/session.py +++ b/session.py @@ -457,19 +457,28 @@ def downloadImage(session, baseDir: str, url: str, def getImageBinaryFromUrl(session, url: str, timeoutSec: int, debug: bool): """http GET for an image """ + mimeType = 'image/png' + contentType = None try: result = session.get(url, timeout=timeoutSec) if result.status_code != 200: print('WARN: getImageFromUrl: ' + url + ' failed with error code ' + str(result.status_code)) - mimeType = 'image/png' - if 'image/x-icon' in result.headers['content-length']: + if result.headers.get('content-type'): + contentType = result.headers['content-type'] + elif result.headers.get('Content-type'): + contentType = result.headers['Content-type'] + elif result.headers.get('Content-Type'): + contentType = result.headers['Content-Type'] + if not contentType: + return None, None + if 'image/x-icon' in contentType: mimeType = 'image/x-icon' - elif 'image/webp' in result.headers['content-length']: + elif 'image/webp' in contentType: mimeType = 'image/webp' - elif 'image/jpeg' in result.headers['content-length']: + elif 'image/jpeg' in contentType: mimeType = 'image/jpeg' - elif 'image/gif' in result.headers['content-length']: + elif 'image/gif' in contentType: mimeType = 'image/gif' return result.content, mimeType except requests.exceptions.RequestException as e: From 974e2d0830a6735c9c50ccaab21a51b9bd507e15 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 09:55:19 +0000 Subject: [PATCH 20/37] Exception handling only where needed --- session.py | 49 ++++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/session.py b/session.py index 664dcffbe..fdf548563 100644 --- a/session.py +++ b/session.py @@ -459,28 +459,9 @@ def getImageBinaryFromUrl(session, url: str, timeoutSec: int, debug: bool): """ mimeType = 'image/png' contentType = None + result = None try: result = session.get(url, timeout=timeoutSec) - if result.status_code != 200: - print('WARN: getImageFromUrl: ' + url + - ' failed with error code ' + str(result.status_code)) - if result.headers.get('content-type'): - contentType = result.headers['content-type'] - elif result.headers.get('Content-type'): - contentType = result.headers['Content-type'] - elif result.headers.get('Content-Type'): - contentType = result.headers['Content-Type'] - if not contentType: - return None, None - if 'image/x-icon' in contentType: - mimeType = 'image/x-icon' - elif 'image/webp' in contentType: - mimeType = 'image/webp' - elif 'image/jpeg' in contentType: - mimeType = 'image/jpeg' - elif 'image/gif' in contentType: - mimeType = 'image/gif' - return result.content, mimeType except requests.exceptions.RequestException as e: if debug: print('ERROR: getImageFromUrl failed: ' + str(url) + ', ' + @@ -493,4 +474,30 @@ def getImageBinaryFromUrl(session, url: str, timeoutSec: int, debug: bool): if e.errno == errno.ECONNRESET: print('WARN: getImageFromUrl failed, ' + 'connection was reset ' + str(e)) - return None, None + + if not result: + return None, None + + if result.status_code != 200: + print('WARN: getImageFromUrl: ' + url + + ' failed with error code ' + str(result.status_code)) + + if result.headers.get('content-type'): + contentType = result.headers['content-type'] + elif result.headers.get('Content-type'): + contentType = result.headers['Content-type'] + elif result.headers.get('Content-Type'): + contentType = result.headers['Content-Type'] + + if not contentType: + return None, None + + if 'image/x-icon' in contentType: + mimeType = 'image/x-icon' + elif 'image/webp' in contentType: + mimeType = 'image/webp' + elif 'image/jpeg' in contentType: + mimeType = 'image/jpeg' + elif 'image/gif' in contentType: + mimeType = 'image/gif' + return result.content, mimeType From 823766088e277e5ae9ff95dd122ef56a926abf71 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 10:04:18 +0000 Subject: [PATCH 21/37] Rename function --- newswire.py | 4 ++-- session.py | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/newswire.py b/newswire.py index 0d8c26400..beb1877c2 100644 --- a/newswire.py +++ b/newswire.py @@ -34,7 +34,7 @@ from utils import localActorUrl from blocking import isBlockedDomain from blocking import isBlockedHashtag from filters import isFiltered -from session import getImageBinaryFromUrl +from session import downloadImageAnyMimeType def _removeCDATA(text: str) -> str: @@ -152,7 +152,7 @@ def _downloadNewswireFeedFavicon(session, baseDir: str, return False timeoutSec = 10 imageData, mimeType = \ - getImageBinaryFromUrl(session, favUrl, timeoutSec, debug) + downloadImageAnyMimeType(session, favUrl, timeoutSec, debug) if not imageData: return False if 'image/png' in mimeType: diff --git a/session.py b/session.py index fdf548563..ec9b0949c 100644 --- a/session.py +++ b/session.py @@ -394,7 +394,7 @@ def postImage(session, attachImageFilename: str, federationList: [], def downloadImage(session, baseDir: str, url: str, imageFilename: str, debug: bool, force: bool = False) -> bool: - """Downloads an image + """Downloads an image with an expected mime type """ if not url: return None @@ -407,7 +407,8 @@ def downloadImage(session, baseDir: str, url: str, 'gif': 'gif', 'svg': 'svg+xml', 'webp': 'webp', - 'avif': 'avif' + 'avif': 'avif', + 'ico': 'x-icon' } sessionHeaders = None for imFormat, mimeType in imageFormats.items(): @@ -454,8 +455,8 @@ def downloadImage(session, baseDir: str, url: str, return False -def getImageBinaryFromUrl(session, url: str, timeoutSec: int, debug: bool): - """http GET for an image +def downloadImageAnyMimeType(session, url: str, timeoutSec: int, debug: bool): + """http GET for an image with any mime type """ mimeType = 'image/png' contentType = None From 2b74116f12b2944b74588db32b635c1603b5bb2a Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 10:07:49 +0000 Subject: [PATCH 22/37] Tidying --- session.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/session.py b/session.py index ec9b0949c..b4f7ab3f5 100644 --- a/session.py +++ b/session.py @@ -493,12 +493,17 @@ def downloadImageAnyMimeType(session, url: str, timeoutSec: int, debug: bool): if not contentType: return None, None - if 'image/x-icon' in contentType: - mimeType = 'image/x-icon' - elif 'image/webp' in contentType: - mimeType = 'image/webp' - elif 'image/jpeg' in contentType: - mimeType = 'image/jpeg' - elif 'image/gif' in contentType: - mimeType = 'image/gif' + imageFormats = { + 'ico': 'x-icon', + 'png': 'png', + 'jpg': 'jpeg', + 'jpeg': 'jpeg', + 'gif': 'gif', + 'svg': 'svg+xml', + 'webp': 'webp', + 'avif': 'avif' + } + for imFormat, mType in imageFormats.items(): + if 'image/' + mType in contentType: + mimeType = 'image/' + mType return result.content, mimeType From ddd29d8577a70a0f6fbfd15503b757576dcc0467 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 10:12:11 +0000 Subject: [PATCH 23/37] Change function names --- session.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/session.py b/session.py index b4f7ab3f5..7f9398d2b 100644 --- a/session.py +++ b/session.py @@ -464,24 +464,26 @@ def downloadImageAnyMimeType(session, url: str, timeoutSec: int, debug: bool): try: result = session.get(url, timeout=timeoutSec) except requests.exceptions.RequestException as e: - if debug: - print('ERROR: getImageFromUrl failed: ' + str(url) + ', ' + - str(e)) + print('ERROR: downloadImageAnyMimeType failed: ' + + str(url) + ', ' + str(e)) + return None, None except ValueError as e: - if debug: - print('ERROR: getImageFromUrl failed: ' + str(url) + ', ' + - str(e)) + print('ERROR: downloadImageAnyMimeType failed: ' + + str(url) + ', ' + str(e)) + return None, None except SocketError as e: if e.errno == errno.ECONNRESET: - print('WARN: getImageFromUrl failed, ' + + print('WARN: downloadImageAnyMimeType failed, ' + 'connection was reset ' + str(e)) + return None, None if not result: return None, None if result.status_code != 200: - print('WARN: getImageFromUrl: ' + url + + print('WARN: downloadImageAnyMimeType: ' + url + ' failed with error code ' + str(result.status_code)) + return None, None if result.headers.get('content-type'): contentType = result.headers['content-type'] From d5e9652b16359552160b513d15f4dfc0abc50f7b Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 10:15:05 +0000 Subject: [PATCH 24/37] Explicitly detect mime type --- newswire.py | 2 +- session.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/newswire.py b/newswire.py index beb1877c2..48e85c5c8 100644 --- a/newswire.py +++ b/newswire.py @@ -153,7 +153,7 @@ def _downloadNewswireFeedFavicon(session, baseDir: str, timeoutSec = 10 imageData, mimeType = \ downloadImageAnyMimeType(session, favUrl, timeoutSec, debug) - if not imageData: + if not imageData or not mimeType: return False if 'image/png' in mimeType: favUrl = favUrl.replace('.ico', '.png') diff --git a/session.py b/session.py index 7f9398d2b..351218276 100644 --- a/session.py +++ b/session.py @@ -458,7 +458,7 @@ def downloadImage(session, baseDir: str, url: str, def downloadImageAnyMimeType(session, url: str, timeoutSec: int, debug: bool): """http GET for an image with any mime type """ - mimeType = 'image/png' + mimeType = None contentType = None result = None try: From adedd622518e5ae9fb74348dce81b663793ad394 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 10:36:22 +0000 Subject: [PATCH 25/37] Support png favicons --- webapp_column_right.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/webapp_column_right.py b/webapp_column_right.py index 4cf7a005b..2ca5bc12a 100644 --- a/webapp_column_right.py +++ b/webapp_column_right.py @@ -244,6 +244,12 @@ def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool, cachedFaviconFilename = baseDir + favBase if os.path.isfile(cachedFaviconFilename): faviconUrl = favBase + else: + favBase = favBase.replace('.ico', '.png') + cachedFaviconFilename = baseDir + favBase + if os.path.isfile(cachedFaviconFilename): + faviconUrl = favBase + faviconLink = \ '' From c6516b55c3109cc23dc0bf5a0ceeaf03ce69ed5e Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 12:01:54 +0000 Subject: [PATCH 26/37] Shorter cached favicon filename --- daemon.py | 27 ++++++++++++--------------- newswire.py | 35 ++++++++++++++++++++++++----------- utils.py | 16 +++++++++++++--- webapp_column_right.py | 21 ++++++++++++++------- 4 files changed, 63 insertions(+), 36 deletions(-) diff --git a/daemon.py b/daemon.py index 2c725975c..02b8cdf58 100644 --- a/daemon.py +++ b/daemon.py @@ -7412,12 +7412,12 @@ class PubServer(BaseHTTPRequestHandler): """Shows a favicon image obtained from the cache """ favFile = path.replace('/favicons/', '') - mediaFilename = baseDir + urllib.parse.unquote_plus(path) - print('showCachedFavicon: ' + mediaFilename) + favFilename = baseDir + urllib.parse.unquote_plus(path) + print('showCachedFavicon: ' + favFilename) if self.server.faviconsCache.get(favFile): mediaBinary = self.server.faviconsCache[favFile] - mimeType = mediaFileMimeType(mediaFilename) - self._set_headers_etag(mediaFilename, + mimeType = mediaFileMimeType(favFilename) + self._set_headers_etag(favFilename, mimeType, mediaBinary, None, refererDomain, @@ -7427,25 +7427,22 @@ class PubServer(BaseHTTPRequestHandler): '_GET', '_showCachedFavicon2', self.server.debug) return - if not os.path.isfile(mediaFilename): - originalMediaFilename = mediaFilename - mediaFilename = originalMediaFilename.replace('.ico', '.png') - if not os.path.isfile(mediaFilename): - self._404() - return - if self._etag_exists(mediaFilename): + if not os.path.isfile(favFilename): + self._404() + return + if self._etag_exists(favFilename): # The file has not changed self._304() return mediaBinary = None try: - with open(mediaFilename, 'rb') as avFile: + with open(favFilename, 'rb') as avFile: mediaBinary = avFile.read() except OSError: - print('EX: unable to read cached favicon ' + mediaFilename) + print('EX: unable to read cached favicon ' + favFilename) if mediaBinary: - mimeType = mediaFileMimeType(mediaFilename) - self._set_headers_etag(mediaFilename, + mimeType = mediaFileMimeType(favFilename) + self._set_headers_etag(favFilename, mimeType, mediaBinary, None, refererDomain, diff --git a/newswire.py b/newswire.py index 48e85c5c8..03e766a91 100644 --- a/newswire.py +++ b/newswire.py @@ -18,6 +18,7 @@ from datetime import timezone from collections import OrderedDict from utils import validPostDate from categories import setHashtagCategory +from utils import getFavFilenameFromUrl from utils import getBaseContentFromPost from utils import hasObjectDict from utils import firstParagraphFromString @@ -155,23 +156,35 @@ def _downloadNewswireFeedFavicon(session, baseDir: str, downloadImageAnyMimeType(session, favUrl, timeoutSec, debug) if not imageData or not mimeType: return False - if 'image/png' in mimeType: - favUrl = favUrl.replace('.ico', '.png') - elif 'image/webp' in mimeType: - favUrl = favUrl.replace('.ico', '.webp') - elif 'image/gif' in mimeType: - favUrl = favUrl.replace('.ico', '.gif') + + # update the favicon url + extensionsToMime = { + 'ico': 'x-icon', + 'png': 'png', + 'jpg': 'jpeg', + 'gif': 'gif', + 'avif': 'avif', + 'svg': 'svg+xml', + 'webp': 'webp' + } + for ext, mimeExt in extensionsToMime.items(): + if 'image/' + mimeExt in mimeType: + favUrl = favUrl.replace('.ico', '.' + mimeExt) + break + + # create cached favicons directory if needed if not os.path.isdir(baseDir + '/favicons'): os.mkdir(baseDir + '/favicons') - linkFilename = favUrl.replace('/', '-') - imageFilename = baseDir + '/favicons/' + linkFilename - if os.path.isfile(imageFilename): + + # save to the cache + favFilename = getFavFilenameFromUrl(baseDir, favUrl) + if os.path.isfile(favFilename): return True try: - with open(imageFilename, 'wb+') as fp: + with open(favFilename, 'wb+') as fp: fp.write(imageData) except OSError: - print('EX: failed writing favicon ' + favUrl) + print('EX: failed writing favicon ' + favFilename) return False return True diff --git a/utils.py b/utils.py index 06544bdb0..6a0e888e5 100644 --- a/utils.py +++ b/utils.py @@ -346,7 +346,7 @@ def getAudioExtensions() -> []: def getImageExtensions() -> []: """Returns a list of the possible image file extensions """ - return ('png', 'jpg', 'jpeg', 'gif', 'webp', 'avif', 'svg') + return ('png', 'jpg', 'jpeg', 'gif', 'webp', 'avif', 'svg', 'ico') def getImageMimeType(imageFilename: str) -> str: @@ -358,7 +358,8 @@ def getImageMimeType(imageFilename: str) -> str: 'gif': 'gif', 'avif': 'avif', 'svg': 'svg+xml', - 'webp': 'webp' + 'webp': 'webp', + 'ico': 'x-icon' } for ext, mimeExt in extensionsToMime.items(): if imageFilename.endswith('.' + ext): @@ -375,7 +376,8 @@ def getImageExtensionFromMimeType(contentType: str) -> str: 'gif': 'gif', 'svg+xml': 'svg', 'webp': 'webp', - 'avif': 'avif' + 'avif': 'avif', + 'x-icon': 'ico' } for mimeExt, ext in imageMedia.items(): if contentType.endswith(mimeExt): @@ -3220,3 +3222,11 @@ def getNewPostEndpoints() -> []: 'newreminder', 'newreport', 'newquestion', 'newshare', 'newwanted', 'editblogpost' ) + + +def getFavFilenameFromUrl(baseDir: str, faviconUrl: str) -> str: + """Returns the cached filename for a favicon based upon its url + """ + if '://' in faviconUrl: + faviconUrl = faviconUrl.split('://')[1] + return baseDir + '/favicons/' + faviconUrl.replace('/', '-') diff --git a/webapp_column_right.py b/webapp_column_right.py index 2ca5bc12a..3cd69a86c 100644 --- a/webapp_column_right.py +++ b/webapp_column_right.py @@ -11,6 +11,7 @@ import os from datetime import datetime from content import removeLongWords from content import limitRepeatedWords +from utils import getFavFilenameFromUrl from utils import getBaseContentFromPost from utils import removeHtml from utils import locatePost @@ -240,15 +241,21 @@ def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool, faviconUrl = getNewswireFaviconUrl(url) faviconLink = '' if faviconUrl: - favBase = '/favicons/' + faviconUrl.replace('/', '-') - cachedFaviconFilename = baseDir + favBase + cachedFaviconFilename = getFavFilenameFromUrl(baseDir, faviconUrl) if os.path.isfile(cachedFaviconFilename): - faviconUrl = favBase + faviconUrl = \ + cachedFaviconFilename.replace(baseDir + '/favicons', '') else: - favBase = favBase.replace('.ico', '.png') - cachedFaviconFilename = baseDir + favBase - if os.path.isfile(cachedFaviconFilename): - faviconUrl = favBase + extensions = ('png', 'jpg', 'gif', 'avif', 'svg', 'webp') + for ext in extensions: + cachedFaviconFilename = \ + getFavFilenameFromUrl(baseDir, faviconUrl) + cachedFaviconFilename = \ + cachedFaviconFilename.replace('.ico', '.' + ext) + if os.path.isfile(cachedFaviconFilename): + faviconUrl = \ + cachedFaviconFilename.replace(baseDir + + '/favicons', '') faviconLink = \ ' Date: Fri, 17 Dec 2021 12:07:22 +0000 Subject: [PATCH 27/37] Check for favicon from local instance --- daemon.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/daemon.py b/daemon.py index 02b8cdf58..b5f743350 100644 --- a/daemon.py +++ b/daemon.py @@ -13700,6 +13700,12 @@ class PubServer(BaseHTTPRequestHandler): # cached favicon images # Note that this comes before the busy flag to avoid conflicts if htmlGET and self.path.startswith('/favicons/'): + if self.server.domainFull in self.path: + # favicon for this instance + self._getFavicon(callingDomain, self.server.baseDir, + self.server.debug, + 'favicon.ico') + return self._showCachedFavicon(refererDomain, self.path, self.server.baseDir, GETstartTime) From 7c8fc0719f6380651c08580855143dc94f1c23ca Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 12:10:06 +0000 Subject: [PATCH 28/37] Fix extension --- newswire.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/newswire.py b/newswire.py index 03e766a91..51c64c359 100644 --- a/newswire.py +++ b/newswire.py @@ -169,7 +169,7 @@ def _downloadNewswireFeedFavicon(session, baseDir: str, } for ext, mimeExt in extensionsToMime.items(): if 'image/' + mimeExt in mimeType: - favUrl = favUrl.replace('.ico', '.' + mimeExt) + favUrl = favUrl.replace('.ico', '.' + ext) break # create cached favicons directory if needed From 1b2f3d2539a7cd9f575896a54150c282071faf73 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 12:11:59 +0000 Subject: [PATCH 29/37] Shorter filename --- utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils.py b/utils.py index 6a0e888e5..54969c6c6 100644 --- a/utils.py +++ b/utils.py @@ -3229,4 +3229,6 @@ def getFavFilenameFromUrl(baseDir: str, faviconUrl: str) -> str: """ if '://' in faviconUrl: faviconUrl = faviconUrl.split('://')[1] + if '/favicon' in faviconUrl: + faviconUrl = faviconUrl.replace('/favicon', '') return baseDir + '/favicons/' + faviconUrl.replace('/', '-') From f36d19ccbe7b0eeccb750ec56b6a45455ee087dc Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 12:13:20 +0000 Subject: [PATCH 30/37] Include dot --- utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils.py b/utils.py index 54969c6c6..14314e7e5 100644 --- a/utils.py +++ b/utils.py @@ -3229,6 +3229,6 @@ def getFavFilenameFromUrl(baseDir: str, faviconUrl: str) -> str: """ if '://' in faviconUrl: faviconUrl = faviconUrl.split('://')[1] - if '/favicon' in faviconUrl: - faviconUrl = faviconUrl.replace('/favicon', '') + if '/favicon.' in faviconUrl: + faviconUrl = faviconUrl.replace('/favicon.', '.') return baseDir + '/favicons/' + faviconUrl.replace('/', '-') From bf3fc6da375b62f8315d3a91a557318f0ee5d1da Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 12:26:01 +0000 Subject: [PATCH 31/37] Don't replace favicons directory --- webapp_column_right.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/webapp_column_right.py b/webapp_column_right.py index 3cd69a86c..8e476c34e 100644 --- a/webapp_column_right.py +++ b/webapp_column_right.py @@ -244,7 +244,7 @@ def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool, cachedFaviconFilename = getFavFilenameFromUrl(baseDir, faviconUrl) if os.path.isfile(cachedFaviconFilename): faviconUrl = \ - cachedFaviconFilename.replace(baseDir + '/favicons', '') + cachedFaviconFilename.replace(baseDir, '') else: extensions = ('png', 'jpg', 'gif', 'avif', 'svg', 'webp') for ext in extensions: @@ -254,8 +254,7 @@ def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool, cachedFaviconFilename.replace('.ico', '.' + ext) if os.path.isfile(cachedFaviconFilename): faviconUrl = \ - cachedFaviconFilename.replace(baseDir + - '/favicons', '') + cachedFaviconFilename.replace(baseDir, '') faviconLink = \ ' Date: Fri, 17 Dec 2021 12:41:25 +0000 Subject: [PATCH 32/37] Use full domain for favicon --- webapp_column_right.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/webapp_column_right.py b/webapp_column_right.py index 8e476c34e..78f1e094e 100644 --- a/webapp_column_right.py +++ b/webapp_column_right.py @@ -197,7 +197,7 @@ def getRightColumnContent(baseDir: str, nickname: str, domainFull: str, # show the newswire lines newswireContentStr = \ _htmlNewswire(baseDir, newswire, nickname, moderator, translate, - positiveVoting) + positiveVoting, httpPrefix, domainFull) htmlStr += newswireContentStr # show the rss icon at the bottom, typically on the right hand side @@ -213,7 +213,8 @@ def _getBrokenFavSubstitute() -> str: def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool, - translate: {}, positiveVoting: bool) -> str: + translate: {}, positiveVoting: bool, + httpPrefix: str, domainFull: str) -> str: """Converts a newswire dict into html """ separatorStr = htmlPostSeparator(baseDir, 'right') @@ -244,6 +245,7 @@ def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool, cachedFaviconFilename = getFavFilenameFromUrl(baseDir, faviconUrl) if os.path.isfile(cachedFaviconFilename): faviconUrl = \ + httpPrefix + '://' + domainFull + \ cachedFaviconFilename.replace(baseDir, '') else: extensions = ('png', 'jpg', 'gif', 'avif', 'svg', 'webp') @@ -254,6 +256,7 @@ def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool, cachedFaviconFilename.replace('.ico', '.' + ext) if os.path.isfile(cachedFaviconFilename): faviconUrl = \ + httpPrefix + '://' + domainFull + \ cachedFaviconFilename.replace(baseDir, '') faviconLink = \ From 9fa69bcc815ce119a55bb9635b10e8a142697f67 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 12:55:30 +0000 Subject: [PATCH 33/37] Set accept header for favicons --- session.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/session.py b/session.py index 351218276..974cbd53f 100644 --- a/session.py +++ b/session.py @@ -461,8 +461,11 @@ def downloadImageAnyMimeType(session, url: str, timeoutSec: int, debug: bool): mimeType = None contentType = None result = None + sessionHeaders = { + 'Accept': 'image/x-icon; image/png' + } try: - result = session.get(url, timeout=timeoutSec) + result = session.get(url, headers=sessionHeaders, timeout=timeoutSec) except requests.exceptions.RequestException as e: print('ERROR: downloadImageAnyMimeType failed: ' + str(url) + ', ' + str(e)) From 66447bcbf1952b2906fdf61c87c45c9f10a59e55 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 12:57:08 +0000 Subject: [PATCH 34/37] Comma --- session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/session.py b/session.py index 974cbd53f..d6fffd177 100644 --- a/session.py +++ b/session.py @@ -462,7 +462,7 @@ def downloadImageAnyMimeType(session, url: str, timeoutSec: int, debug: bool): contentType = None result = None sessionHeaders = { - 'Accept': 'image/x-icon; image/png' + 'Accept': 'image/x-icon, image/png' } try: result = session.get(url, headers=sessionHeaders, timeout=timeoutSec) From c5bb57a424c06273d4445b9c993059c80c9fe41d Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 12:58:58 +0000 Subject: [PATCH 35/37] Extra types --- session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/session.py b/session.py index d6fffd177..8464bd0ab 100644 --- a/session.py +++ b/session.py @@ -462,7 +462,7 @@ def downloadImageAnyMimeType(session, url: str, timeoutSec: int, debug: bool): contentType = None result = None sessionHeaders = { - 'Accept': 'image/x-icon, image/png' + 'Accept': 'image/x-icon, image/png, image/webp, image/jpeg, image/gif' } try: result = session.get(url, headers=sessionHeaders, timeout=timeoutSec) From 856b8a15167a00783ddad27a4693e9a9b9e443d5 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 13:11:59 +0000 Subject: [PATCH 36/37] No get --- daemon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/daemon.py b/daemon.py index b5f743350..33b206b01 100644 --- a/daemon.py +++ b/daemon.py @@ -13699,7 +13699,7 @@ class PubServer(BaseHTTPRequestHandler): # cached favicon images # Note that this comes before the busy flag to avoid conflicts - if htmlGET and self.path.startswith('/favicons/'): + if self.path.startswith('/favicons/'): if self.server.domainFull in self.path: # favicon for this instance self._getFavicon(callingDomain, self.server.baseDir, From ad8426708ea18638a95961a84056b0f9bcb9aca8 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 17 Dec 2021 13:23:19 +0000 Subject: [PATCH 37/37] Remove full url --- webapp_column_right.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/webapp_column_right.py b/webapp_column_right.py index 78f1e094e..8e476c34e 100644 --- a/webapp_column_right.py +++ b/webapp_column_right.py @@ -197,7 +197,7 @@ def getRightColumnContent(baseDir: str, nickname: str, domainFull: str, # show the newswire lines newswireContentStr = \ _htmlNewswire(baseDir, newswire, nickname, moderator, translate, - positiveVoting, httpPrefix, domainFull) + positiveVoting) htmlStr += newswireContentStr # show the rss icon at the bottom, typically on the right hand side @@ -213,8 +213,7 @@ def _getBrokenFavSubstitute() -> str: def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool, - translate: {}, positiveVoting: bool, - httpPrefix: str, domainFull: str) -> str: + translate: {}, positiveVoting: bool) -> str: """Converts a newswire dict into html """ separatorStr = htmlPostSeparator(baseDir, 'right') @@ -245,7 +244,6 @@ def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool, cachedFaviconFilename = getFavFilenameFromUrl(baseDir, faviconUrl) if os.path.isfile(cachedFaviconFilename): faviconUrl = \ - httpPrefix + '://' + domainFull + \ cachedFaviconFilename.replace(baseDir, '') else: extensions = ('png', 'jpg', 'gif', 'avif', 'svg', 'webp') @@ -256,7 +254,6 @@ def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool, cachedFaviconFilename.replace('.ico', '.' + ext) if os.path.isfile(cachedFaviconFilename): faviconUrl = \ - httpPrefix + '://' + domainFull + \ cachedFaviconFilename.replace(baseDir, '') faviconLink = \