Check the size of individual rss/atom feed items

main
Bob Mottram 2020-11-03 16:04:25 +00:00
parent 896776b54e
commit c8858f871a
5 changed files with 47 additions and 15 deletions

View File

@ -12222,7 +12222,8 @@ def loadTokens(baseDir: str, tokensDict: {}, tokensLookup: {}) -> None:
tokensLookup[token] = nickname tokensLookup[token] = nickname
def runDaemon(publishButtonAtTop: bool, def runDaemon(maxFeedItemSizeKb: int,
publishButtonAtTop: bool,
rssIconAtTop: bool, rssIconAtTop: bool,
iconsAsButtons: bool, iconsAsButtons: bool,
fullWidthTimelineButtonHeader: bool, fullWidthTimelineButtonHeader: bool,
@ -12400,6 +12401,9 @@ def runDaemon(publishButtonAtTop: bool,
# above the header image # above the header image
httpd.publishButtonAtTop = publishButtonAtTop httpd.publishButtonAtTop = publishButtonAtTop
# maximum size of individual RSS feed items, in K
httpd.maxFeedItemSizeKb = maxFeedItemSizeKb
if registration == 'open': if registration == 'open':
httpd.registration = True httpd.registration = True
else: else:

View File

@ -120,6 +120,11 @@ parser.add_argument('--maxFeedSize',
dest='maxNewswireFeedSizeKb', type=int, dest='maxNewswireFeedSizeKb', type=int,
default=10240, default=10240,
help='Maximum newswire rss/atom feed size in K') help='Maximum newswire rss/atom feed size in K')
parser.add_argument('--maxFeedItemSizeKb',
dest='maxFeedItemSizeKb', type=int,
default=2048,
help='Maximum size of an individual rss/atom ' +
'feed item in K')
parser.add_argument('--maxMirroredArticles', parser.add_argument('--maxMirroredArticles',
dest='maxMirroredArticles', type=int, dest='maxMirroredArticles', type=int,
default=100, default=100,
@ -2010,6 +2015,11 @@ maxFollowers = \
if maxFollowers is not None: if maxFollowers is not None:
args.maxFollowers = int(maxFollowers) args.maxFollowers = int(maxFollowers)
maxFeedItemSizeKb = \
getConfigParam(baseDir, 'maxFeedItemSizeKb')
if maxFeedItemSizeKb is not None:
args.maxFeedItemSizeKb = int(maxFeedItemSizeKb)
allowNewsFollowers = \ allowNewsFollowers = \
getConfigParam(baseDir, 'allowNewsFollowers') getConfigParam(baseDir, 'allowNewsFollowers')
if allowNewsFollowers is not None: if allowNewsFollowers is not None:
@ -2053,7 +2063,8 @@ if setTheme(baseDir, themeName, domain):
print('Theme set to ' + themeName) print('Theme set to ' + themeName)
if __name__ == "__main__": if __name__ == "__main__":
runDaemon(args.publishButtonAtTop, runDaemon(args.maxFeedItemSizeKb,
args.publishButtonAtTop,
args.rssIconAtTop, args.rssIconAtTop,
args.iconsAsButtons, args.iconsAsButtons,
args.fullWidthTimelineButtonHeader, args.fullWidthTimelineButtonHeader,

View File

@ -705,7 +705,8 @@ def runNewswireDaemon(baseDir: str, httpd,
getDictFromNewswire(httpd.session, baseDir, domain, getDictFromNewswire(httpd.session, baseDir, domain,
httpd.maxNewswirePostsPerSource, httpd.maxNewswirePostsPerSource,
httpd.maxNewswireFeedSizeKb, httpd.maxNewswireFeedSizeKb,
httpd.maxTags) httpd.maxTags,
httpd.maxFeedItemSizeKb)
except Exception as e: except Exception as e:
print('WARN: unable to update newswire ' + str(e)) print('WARN: unable to update newswire ' + str(e))
time.sleep(120) time.sleep(120)

View File

@ -126,7 +126,8 @@ def addNewswireDictEntry(baseDir: str, domain: str,
def xml2StrToDict(baseDir: str, domain: str, xmlStr: str, def xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
moderated: bool, mirrored: bool, moderated: bool, mirrored: bool,
maxPostsPerSource: int) -> {}: maxPostsPerSource: int,
maxFeedItemSizeKb: int) -> {}:
"""Converts an xml 2.0 string to a dictionary """Converts an xml 2.0 string to a dictionary
""" """
if '<item>' not in xmlStr: if '<item>' not in xmlStr:
@ -134,7 +135,11 @@ def xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
result = {} result = {}
rssItems = xmlStr.split('<item>') rssItems = xmlStr.split('<item>')
postCtr = 0 postCtr = 0
maxBytes = maxFeedItemSizeKb * 1024
for rssItem in rssItems: for rssItem in rssItems:
if len(rssItem) > maxBytes:
print('WARN: rss feed item is too big')
continue
if '<title>' not in rssItem: if '<title>' not in rssItem:
continue continue
if '</title>' not in rssItem: if '</title>' not in rssItem:
@ -205,7 +210,8 @@ def xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
def atomFeedToDict(baseDir: str, domain: str, xmlStr: str, def atomFeedToDict(baseDir: str, domain: str, xmlStr: str,
moderated: bool, mirrored: bool, moderated: bool, mirrored: bool,
maxPostsPerSource: int) -> {}: maxPostsPerSource: int,
maxFeedItemSizeKb: int) -> {}:
"""Converts an atom feed string to a dictionary """Converts an atom feed string to a dictionary
""" """
if '<entry>' not in xmlStr: if '<entry>' not in xmlStr:
@ -213,7 +219,11 @@ def atomFeedToDict(baseDir: str, domain: str, xmlStr: str,
result = {} result = {}
rssItems = xmlStr.split('<entry>') rssItems = xmlStr.split('<entry>')
postCtr = 0 postCtr = 0
maxBytes = maxFeedItemSizeKb * 1024
for rssItem in rssItems: for rssItem in rssItems:
if len(rssItem) > maxBytes:
print('WARN: atom feed item is too big')
continue
if '<title>' not in rssItem: if '<title>' not in rssItem:
continue continue
if '</title>' not in rssItem: if '</title>' not in rssItem:
@ -283,21 +293,25 @@ def atomFeedToDict(baseDir: str, domain: str, xmlStr: str,
def xmlStrToDict(baseDir: str, domain: str, xmlStr: str, def xmlStrToDict(baseDir: str, domain: str, xmlStr: str,
moderated: bool, mirrored: bool, moderated: bool, mirrored: bool,
maxPostsPerSource: int) -> {}: maxPostsPerSource: int,
maxFeedItemSizeKb: int) -> {}:
"""Converts an xml string to a dictionary """Converts an xml string to a dictionary
""" """
if 'rss version="2.0"' in xmlStr: if 'rss version="2.0"' in xmlStr:
return xml2StrToDict(baseDir, domain, return xml2StrToDict(baseDir, domain,
xmlStr, moderated, mirrored, maxPostsPerSource) xmlStr, moderated, mirrored,
maxPostsPerSource, maxFeedItemSizeKb)
elif 'xmlns="http://www.w3.org/2005/Atom"' in xmlStr: elif 'xmlns="http://www.w3.org/2005/Atom"' in xmlStr:
return atomFeedToDict(baseDir, domain, return atomFeedToDict(baseDir, domain,
xmlStr, moderated, mirrored, maxPostsPerSource) xmlStr, moderated, mirrored,
maxPostsPerSource, maxFeedItemSizeKb)
return {} return {}
def getRSS(baseDir: str, domain: str, session, url: str, def getRSS(baseDir: str, domain: str, session, url: str,
moderated: bool, mirrored: bool, moderated: bool, mirrored: bool,
maxPostsPerSource: int, maxFeedSizeKb: int) -> {}: maxPostsPerSource: int, maxFeedSizeKb: int,
maxFeedItemSizeKb: int) -> {}:
"""Returns an RSS url as a dict """Returns an RSS url as a dict
""" """
if not isinstance(url, str): if not isinstance(url, str):
@ -325,7 +339,8 @@ def getRSS(baseDir: str, domain: str, session, url: str,
not containsInvalidChars(result.text): not containsInvalidChars(result.text):
return xmlStrToDict(baseDir, domain, result.text, return xmlStrToDict(baseDir, domain, result.text,
moderated, mirrored, moderated, mirrored,
maxPostsPerSource) maxPostsPerSource,
maxFeedItemSizeKb)
else: else:
print('WARN: feed is too large: ' + url) print('WARN: feed is too large: ' + url)
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
@ -549,7 +564,7 @@ def addBlogsToNewswire(baseDir: str, domain: str, newswire: {},
def getDictFromNewswire(session, baseDir: str, domain: str, def getDictFromNewswire(session, baseDir: str, domain: str,
maxPostsPerSource: int, maxFeedSizeKb: int, maxPostsPerSource: int, maxFeedSizeKb: int,
maxTags: int) -> {}: maxTags: int, maxFeedItemSizeKb: int) -> {}:
"""Gets rss feeds as a dictionary from newswire file """Gets rss feeds as a dictionary from newswire file
""" """
subscriptionsFilename = baseDir + '/accounts/newswire.txt' subscriptionsFilename = baseDir + '/accounts/newswire.txt'
@ -588,7 +603,8 @@ def getDictFromNewswire(session, baseDir: str, domain: str,
itemsList = getRSS(baseDir, domain, session, url, itemsList = getRSS(baseDir, domain, session, url,
moderated, mirrored, moderated, mirrored,
maxPostsPerSource, maxFeedSizeKb) maxPostsPerSource, maxFeedSizeKb,
maxFeedItemSizeKb)
if itemsList: if itemsList:
for dateStr, item in itemsList.items(): for dateStr, item in itemsList.items():
result[dateStr] = item result[dateStr] = item

View File

@ -291,7 +291,7 @@ def createServerAlice(path: str, domain: str, port: int,
onionDomain = None onionDomain = None
i2pDomain = None i2pDomain = None
print('Server running: Alice') print('Server running: Alice')
runDaemon(False, True, False, False, True, 10, False, runDaemon(2048, False, True, False, False, True, 10, False,
0, 100, 1024, 5, False, 0, 100, 1024, 5, False,
0, False, 1, False, False, False, 0, False, 1, False, False, False,
5, True, True, 'en', __version__, 5, True, True, 'en', __version__,
@ -356,7 +356,7 @@ def createServerBob(path: str, domain: str, port: int,
onionDomain = None onionDomain = None
i2pDomain = None i2pDomain = None
print('Server running: Bob') print('Server running: Bob')
runDaemon(False, True, False, False, True, 10, False, runDaemon(2048, False, True, False, False, True, 10, False,
0, 100, 1024, 5, False, 0, 0, 100, 1024, 5, False, 0,
False, 1, False, False, False, False, 1, False, False, False,
5, True, True, 'en', __version__, 5, True, True, 'en', __version__,
@ -395,7 +395,7 @@ def createServerEve(path: str, domain: str, port: int, federationList: [],
onionDomain = None onionDomain = None
i2pDomain = None i2pDomain = None
print('Server running: Eve') print('Server running: Eve')
runDaemon(False, True, False, False, True, 10, False, runDaemon(2048, False, True, False, False, True, 10, False,
0, 100, 1024, 5, False, 0, 0, 100, 1024, 5, False, 0,
False, 1, False, False, False, False, 1, False, False, False,
5, True, True, 'en', __version__, 5, True, True, 'en', __version__,