diff --git a/daemon.py b/daemon.py index 5db5756d..86cb9be8 100644 --- a/daemon.py +++ b/daemon.py @@ -11951,7 +11951,8 @@ def loadTokens(baseDir: str, tokensDict: {}, tokensLookup: {}) -> None: tokensLookup[token] = nickname -def runDaemon(maxNewswirePostsPerSource: int, +def runDaemon(maxNewswireFeedSizeKb: int, + maxNewswirePostsPerSource: int, showPublishedDateOnly: bool, votingTimeMins: int, positiveVoting: bool, @@ -12072,6 +12073,9 @@ def runDaemon(maxNewswirePostsPerSource: int, # number of votes needed to remove a newswire item from the news timeline # or if positive voting is anabled to add the item to the news timeline httpd.newswireVotesThreshold = newswireVotesThreshold + # maximum overall size of an rss/atom feed read by the newswire daemon + # If the feed is too large then this is probably a DoS attempt + httpd.maxNewswireFeedSizeKb = maxNewswireFeedSizeKb # For each newswire source (account or rss feed) # this is the maximum number of posts to show for each. diff --git a/epicyon.py b/epicyon.py index 40ecac65..5b478786 100644 --- a/epicyon.py +++ b/epicyon.py @@ -116,6 +116,10 @@ parser.add_argument('--postsPerSource', dest='maxNewswirePostsPerSource', type=int, default=5, help='Maximum newswire posts per feed or account') +parser.add_argument('--maxFeedSize', + dest='maxNewswireFeedSizeKb', type=int, + default=2048, + help='Maximum newswire rss/atom feed size in K') parser.add_argument('--postcache', dest='maxRecentPosts', type=int, default=512, help='The maximum number of recent posts to store in RAM') @@ -1936,6 +1940,13 @@ if maxNewswirePostsPerSource: if maxNewswirePostsPerSource.isdigit(): args.maxNewswirePostsPerSource = maxNewswirePostsPerSource +# set the maximum size of a newswire rss/atom feed in Kilobytes +maxNewswireFeedSizeKb = \ + getConfigParam(baseDir, 'maxNewswireFeedSizeKb') +if maxNewswireFeedSizeKb: + if maxNewswireFeedSizeKb.isdigit(): + args.maxNewswireFeedSizeKb = maxNewswireFeedSizeKb + YTDomain = getConfigParam(baseDir, 'youtubedomain') if YTDomain: if '://' in YTDomain: @@ -1949,7 +1960,8 @@ if setTheme(baseDir, themeName, domain): print('Theme set to ' + themeName) if __name__ == "__main__": - runDaemon(args.maxNewswirePostsPerSource, + runDaemon(args.maxNewswireFeedSizeKb, + args.maxNewswirePostsPerSource, args.dateonly, args.votingtime, args.positivevoting, diff --git a/newsdaemon.py b/newsdaemon.py index 30b71e61..0e861bf0 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -227,7 +227,8 @@ def runNewswireDaemon(baseDir: str, httpd, try: newNewswire = \ getDictFromNewswire(httpd.session, baseDir, - httpd.maxNewswirePostsPerSource) + httpd.maxNewswirePostsPerSource, + httpd.maxNewswireFeedSizeKb) except Exception as e: print('WARN: unable to update newswire ' + str(e)) time.sleep(120) diff --git a/newswire.py b/newswire.py index cf269d09..3e6735f4 100644 --- a/newswire.py +++ b/newswire.py @@ -196,7 +196,8 @@ def xmlStrToDict(xmlStr: str, moderated: bool, def getRSS(session, url: str, moderated: bool, - maxPostsPerSource: int) -> {}: + maxPostsPerSource: int, + maxFeedSizeKb: int) -> {}: """Returns an RSS url as a dict """ if not isinstance(url, str): @@ -219,7 +220,11 @@ def getRSS(session, url: str, moderated: bool, print('WARN: no session specified for getRSS') try: result = session.get(url, headers=sessionHeaders, params=sessionParams) - return xmlStrToDict(result.text, moderated, maxPostsPerSource) + if result: + if int(len(result) / 1024) < maxFeedSizeKb: + return xmlStrToDict(result.text, moderated, maxPostsPerSource) + else: + print('WARN: feed is too large: ' + url) except requests.exceptions.RequestException as e: print('ERROR: getRSS failed\nurl: ' + str(url) + '\n' + 'headers: ' + str(sessionHeaders) + '\n' + @@ -396,7 +401,8 @@ def addBlogsToNewswire(baseDir: str, newswire: {}, os.remove(newswireModerationFilename) -def getDictFromNewswire(session, baseDir: str, maxPostsPerSource: int) -> {}: +def getDictFromNewswire(session, baseDir: str, + maxPostsPerSource: int, maxFeedSizeKb: int) -> {}: """Gets rss feeds as a dictionary from newswire file """ subscriptionsFilename = baseDir + '/accounts/newswire.txt' @@ -427,7 +433,8 @@ def getDictFromNewswire(session, baseDir: str, maxPostsPerSource: int) -> {}: moderated = True url = url.replace('*', '').strip() - itemsList = getRSS(session, url, moderated, maxPostsPerSource) + itemsList = getRSS(session, url, moderated, + maxPostsPerSource, maxFeedSizeKb) for dateStr, item in itemsList.items(): result[dateStr] = item diff --git a/tests.py b/tests.py index 645c18ba..082108b6 100644 --- a/tests.py +++ b/tests.py @@ -288,7 +288,7 @@ def createServerAlice(path: str, domain: str, port: int, onionDomain = None i2pDomain = None print('Server running: Alice') - runDaemon(5, False, 0, False, 1, False, False, False, + runDaemon(1024, 5, False, 0, False, 1, False, False, False, 5, True, True, 'en', __version__, "instanceId", False, path, domain, onionDomain, i2pDomain, None, port, port, @@ -351,7 +351,7 @@ def createServerBob(path: str, domain: str, port: int, onionDomain = None i2pDomain = None print('Server running: Bob') - runDaemon(5, False, 0, False, 1, False, False, False, + runDaemon(1024, 5, False, 0, False, 1, False, False, False, 5, True, True, 'en', __version__, "instanceId", False, path, domain, onionDomain, i2pDomain, None, port, port, @@ -388,7 +388,7 @@ def createServerEve(path: str, domain: str, port: int, federationList: [], onionDomain = None i2pDomain = None print('Server running: Eve') - runDaemon(5, False, 0, False, 1, False, False, False, + runDaemon(1024, 5, False, 0, False, 1, False, False, False, 5, True, True, 'en', __version__, "instanceId", False, path, domain, onionDomain, i2pDomain, None, port, port,