From c8858f871a9133b5e125232340e475318851550a Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Tue, 3 Nov 2020 16:04:25 +0000 Subject: [PATCH] Check the size of individual rss/atom feed items --- daemon.py | 6 +++++- epicyon.py | 13 ++++++++++++- newsdaemon.py | 3 ++- newswire.py | 34 +++++++++++++++++++++++++--------- tests.py | 6 +++--- 5 files changed, 47 insertions(+), 15 deletions(-) diff --git a/daemon.py b/daemon.py index f6cad15a3..7587f7005 100644 --- a/daemon.py +++ b/daemon.py @@ -12222,7 +12222,8 @@ def loadTokens(baseDir: str, tokensDict: {}, tokensLookup: {}) -> None: tokensLookup[token] = nickname -def runDaemon(publishButtonAtTop: bool, +def runDaemon(maxFeedItemSizeKb: int, + publishButtonAtTop: bool, rssIconAtTop: bool, iconsAsButtons: bool, fullWidthTimelineButtonHeader: bool, @@ -12400,6 +12401,9 @@ def runDaemon(publishButtonAtTop: bool, # above the header image httpd.publishButtonAtTop = publishButtonAtTop + # maximum size of individual RSS feed items, in K + httpd.maxFeedItemSizeKb = maxFeedItemSizeKb + if registration == 'open': httpd.registration = True else: diff --git a/epicyon.py b/epicyon.py index eacd846ac..9bc1144e4 100644 --- a/epicyon.py +++ b/epicyon.py @@ -120,6 +120,11 @@ parser.add_argument('--maxFeedSize', dest='maxNewswireFeedSizeKb', type=int, default=10240, help='Maximum newswire rss/atom feed size in K') +parser.add_argument('--maxFeedItemSizeKb', + dest='maxFeedItemSizeKb', type=int, + default=2048, + help='Maximum size of an individual rss/atom ' + + 'feed item in K') parser.add_argument('--maxMirroredArticles', dest='maxMirroredArticles', type=int, default=100, @@ -2010,6 +2015,11 @@ maxFollowers = \ if maxFollowers is not None: args.maxFollowers = int(maxFollowers) +maxFeedItemSizeKb = \ + getConfigParam(baseDir, 'maxFeedItemSizeKb') +if maxFeedItemSizeKb is not None: + args.maxFeedItemSizeKb = int(maxFeedItemSizeKb) + allowNewsFollowers = \ getConfigParam(baseDir, 'allowNewsFollowers') if allowNewsFollowers is not None: @@ -2053,7 +2063,8 @@ if setTheme(baseDir, themeName, domain): print('Theme set to ' + themeName) if __name__ == "__main__": - runDaemon(args.publishButtonAtTop, + runDaemon(args.maxFeedItemSizeKb, + args.publishButtonAtTop, args.rssIconAtTop, args.iconsAsButtons, args.fullWidthTimelineButtonHeader, diff --git a/newsdaemon.py b/newsdaemon.py index 2fd69834a..a6e4fc0bd 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -705,7 +705,8 @@ def runNewswireDaemon(baseDir: str, httpd, getDictFromNewswire(httpd.session, baseDir, domain, httpd.maxNewswirePostsPerSource, httpd.maxNewswireFeedSizeKb, - httpd.maxTags) + httpd.maxTags, + httpd.maxFeedItemSizeKb) except Exception as e: print('WARN: unable to update newswire ' + str(e)) time.sleep(120) diff --git a/newswire.py b/newswire.py index 069bc8ef7..3f76e471b 100644 --- a/newswire.py +++ b/newswire.py @@ -126,7 +126,8 @@ def addNewswireDictEntry(baseDir: str, domain: str, def xml2StrToDict(baseDir: str, domain: str, xmlStr: str, moderated: bool, mirrored: bool, - maxPostsPerSource: int) -> {}: + maxPostsPerSource: int, + maxFeedItemSizeKb: int) -> {}: """Converts an xml 2.0 string to a dictionary """ if '' not in xmlStr: @@ -134,7 +135,11 @@ def xml2StrToDict(baseDir: str, domain: str, xmlStr: str, result = {} rssItems = xmlStr.split('') postCtr = 0 + maxBytes = maxFeedItemSizeKb * 1024 for rssItem in rssItems: + if len(rssItem) > maxBytes: + print('WARN: rss feed item is too big') + continue if '' not in rssItem: continue if '' not in rssItem: @@ -205,7 +210,8 @@ def xml2StrToDict(baseDir: str, domain: str, xmlStr: str, def atomFeedToDict(baseDir: str, domain: str, xmlStr: str, moderated: bool, mirrored: bool, - maxPostsPerSource: int) -> {}: + maxPostsPerSource: int, + maxFeedItemSizeKb: int) -> {}: """Converts an atom feed string to a dictionary """ if '' not in xmlStr: @@ -213,7 +219,11 @@ def atomFeedToDict(baseDir: str, domain: str, xmlStr: str, result = {} rssItems = xmlStr.split('') postCtr = 0 + maxBytes = maxFeedItemSizeKb * 1024 for rssItem in rssItems: + if len(rssItem) > maxBytes: + print('WARN: atom feed item is too big') + continue if '' not in rssItem: continue if '' not in rssItem: @@ -283,21 +293,25 @@ def atomFeedToDict(baseDir: str, domain: str, xmlStr: str, def xmlStrToDict(baseDir: str, domain: str, xmlStr: str, moderated: bool, mirrored: bool, - maxPostsPerSource: int) -> {}: + maxPostsPerSource: int, + maxFeedItemSizeKb: int) -> {}: """Converts an xml string to a dictionary """ if 'rss version="2.0"' in xmlStr: return xml2StrToDict(baseDir, domain, - xmlStr, moderated, mirrored, maxPostsPerSource) + xmlStr, moderated, mirrored, + maxPostsPerSource, maxFeedItemSizeKb) elif 'xmlns="http://www.w3.org/2005/Atom"' in xmlStr: return atomFeedToDict(baseDir, domain, - xmlStr, moderated, mirrored, maxPostsPerSource) + xmlStr, moderated, mirrored, + maxPostsPerSource, maxFeedItemSizeKb) return {} def getRSS(baseDir: str, domain: str, session, url: str, moderated: bool, mirrored: bool, - maxPostsPerSource: int, maxFeedSizeKb: int) -> {}: + maxPostsPerSource: int, maxFeedSizeKb: int, + maxFeedItemSizeKb: int) -> {}: """Returns an RSS url as a dict """ if not isinstance(url, str): @@ -325,7 +339,8 @@ def getRSS(baseDir: str, domain: str, session, url: str, not containsInvalidChars(result.text): return xmlStrToDict(baseDir, domain, result.text, moderated, mirrored, - maxPostsPerSource) + maxPostsPerSource, + maxFeedItemSizeKb) else: print('WARN: feed is too large: ' + url) except requests.exceptions.RequestException as e: @@ -549,7 +564,7 @@ def addBlogsToNewswire(baseDir: str, domain: str, newswire: {}, def getDictFromNewswire(session, baseDir: str, domain: str, maxPostsPerSource: int, maxFeedSizeKb: int, - maxTags: int) -> {}: + maxTags: int, maxFeedItemSizeKb: int) -> {}: """Gets rss feeds as a dictionary from newswire file """ subscriptionsFilename = baseDir + '/accounts/newswire.txt' @@ -588,7 +603,8 @@ def getDictFromNewswire(session, baseDir: str, domain: str, itemsList = getRSS(baseDir, domain, session, url, moderated, mirrored, - maxPostsPerSource, maxFeedSizeKb) + maxPostsPerSource, maxFeedSizeKb, + maxFeedItemSizeKb) if itemsList: for dateStr, item in itemsList.items(): result[dateStr] = item diff --git a/tests.py b/tests.py index 647a8376a..c4cf32433 100644 --- a/tests.py +++ b/tests.py @@ -291,7 +291,7 @@ def createServerAlice(path: str, domain: str, port: int, onionDomain = None i2pDomain = None print('Server running: Alice') - runDaemon(False, True, False, False, True, 10, False, + runDaemon(2048, False, True, False, False, True, 10, False, 0, 100, 1024, 5, False, 0, False, 1, False, False, False, 5, True, True, 'en', __version__, @@ -356,7 +356,7 @@ def createServerBob(path: str, domain: str, port: int, onionDomain = None i2pDomain = None print('Server running: Bob') - runDaemon(False, True, False, False, True, 10, False, + runDaemon(2048, False, True, False, False, True, 10, False, 0, 100, 1024, 5, False, 0, False, 1, False, False, False, 5, True, True, 'en', __version__, @@ -395,7 +395,7 @@ def createServerEve(path: str, domain: str, port: int, federationList: [], onionDomain = None i2pDomain = None print('Server running: Eve') - runDaemon(False, True, False, False, True, 10, False, + runDaemon(2048, False, True, False, False, True, 10, False, 0, 100, 1024, 5, False, 0, False, 1, False, False, False, 5, True, True, 'en', __version__,