Limit the maximum number of hashtags imported from newswire feeds

main
Bob Mottram 2020-10-23 15:41:29 +01:00
parent d579d21e02
commit e521a95e18
3 changed files with 23 additions and 10 deletions

View File

@ -12124,6 +12124,10 @@ def runDaemon(maxNewsPosts: int,
# maximum number of posts in the news timeline/outbox # maximum number of posts in the news timeline/outbox
httpd.maxNewsPosts = maxNewsPosts httpd.maxNewsPosts = maxNewsPosts
# The maximum number of tags per post which can be
# attached to RSS feeds pulled in via the newswire
httpd.maxTags = 32
if registration == 'open': if registration == 'open':
httpd.registration = True httpd.registration = True
else: else:

View File

@ -685,7 +685,8 @@ def runNewswireDaemon(baseDir: str, httpd,
newNewswire = \ newNewswire = \
getDictFromNewswire(httpd.session, baseDir, domain, getDictFromNewswire(httpd.session, baseDir, domain,
httpd.maxNewswirePostsPerSource, httpd.maxNewswirePostsPerSource,
httpd.maxNewswireFeedSizeKb) httpd.maxNewswireFeedSizeKb,
httpd.maxTags)
except Exception as e: except Exception as e:
print('WARN: unable to update newswire ' + str(e)) print('WARN: unable to update newswire ' + str(e))
time.sleep(120) time.sleep(120)

View File

@ -54,7 +54,7 @@ def rss2Footer() -> str:
return rssStr return rssStr
def getNewswireTags(text: str) -> []: def getNewswireTags(text: str, maxTags: int) -> []:
"""Returns a list of hashtags found in the given text """Returns a list of hashtags found in the given text
""" """
if '#' not in text: if '#' not in text:
@ -73,6 +73,8 @@ def getNewswireTags(text: str) -> []:
if len(wrd) > 1: if len(wrd) > 1:
if wrd not in tags: if wrd not in tags:
tags.append(wrd) tags.append(wrd)
if len(tags) >= maxTags:
break
return tags return tags
@ -81,14 +83,15 @@ def addNewswireDictEntry(baseDir: str, domain: str,
title: str, link: str, title: str, link: str,
votesStatus: str, postFilename: str, votesStatus: str, postFilename: str,
description: str, moderated: bool, description: str, moderated: bool,
mirrored: bool, tags=[]) -> None: mirrored: bool,
tags=[], maxTags=32) -> None:
"""Update the newswire dictionary """Update the newswire dictionary
""" """
allText = title + ' ' + description allText = title + ' ' + description
if isFiltered(baseDir, 'news', domain, allText): if isFiltered(baseDir, 'news', domain, allText):
return return
if not tags: if not tags:
tags = getNewswireTags(allText) tags = getNewswireTags(allText, maxTags)
newswireItemBlocked = False newswireItemBlocked = False
if tags: if tags:
for tag in tags: for tag in tags:
@ -410,7 +413,8 @@ def getHashtagsFromPost(postJsonObject: {}) -> []:
def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str, def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
newswire: {}, newswire: {},
maxBlogsPerAccount: int, maxBlogsPerAccount: int,
indexFilename: str) -> None: indexFilename: str,
maxTags: int) -> None:
"""Adds blogs for the given account to the newswire """Adds blogs for the given account to the newswire
""" """
if not os.path.isfile(indexFilename): if not os.path.isfile(indexFilename):
@ -470,7 +474,8 @@ def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
postJsonObject['object']['url'], postJsonObject['object']['url'],
votes, fullPostFilename, votes, fullPostFilename,
description, moderated, False, description, moderated, False,
getHashtagsFromPost(postJsonObject)) getHashtagsFromPost(postJsonObject),
maxTags)
ctr += 1 ctr += 1
if ctr >= maxBlogsPerAccount: if ctr >= maxBlogsPerAccount:
@ -478,7 +483,8 @@ def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
def addBlogsToNewswire(baseDir: str, domain: str, newswire: {}, def addBlogsToNewswire(baseDir: str, domain: str, newswire: {},
maxBlogsPerAccount: int) -> None: maxBlogsPerAccount: int,
maxTags: int) -> None:
"""Adds blogs from each user account into the newswire """Adds blogs from each user account into the newswire
""" """
moderationDict = {} moderationDict = {}
@ -508,7 +514,7 @@ def addBlogsToNewswire(baseDir: str, domain: str, newswire: {},
domain = handle.split('@')[1] domain = handle.split('@')[1]
addAccountBlogsToNewswire(baseDir, nickname, domain, addAccountBlogsToNewswire(baseDir, nickname, domain,
newswire, maxBlogsPerAccount, newswire, maxBlogsPerAccount,
blogsIndex) blogsIndex, maxTags)
# sort the moderation dict into chronological order, latest first # sort the moderation dict into chronological order, latest first
sortedModerationDict = \ sortedModerationDict = \
@ -524,7 +530,8 @@ def addBlogsToNewswire(baseDir: str, domain: str, newswire: {},
def getDictFromNewswire(session, baseDir: str, domain: str, def getDictFromNewswire(session, baseDir: str, domain: str,
maxPostsPerSource: int, maxFeedSizeKb: int) -> {}: maxPostsPerSource: int, maxFeedSizeKb: int,
maxTags: int) -> {}:
"""Gets rss feeds as a dictionary from newswire file """Gets rss feeds as a dictionary from newswire file
""" """
subscriptionsFilename = baseDir + '/accounts/newswire.txt' subscriptionsFilename = baseDir + '/accounts/newswire.txt'
@ -568,7 +575,8 @@ def getDictFromNewswire(session, baseDir: str, domain: str,
result[dateStr] = item result[dateStr] = item
# add blogs from each user account # add blogs from each user account
addBlogsToNewswire(baseDir, domain, result, maxPostsPerSource) addBlogsToNewswire(baseDir, domain, result,
maxPostsPerSource, maxTags)
# sort into chronological order, latest first # sort into chronological order, latest first
sortedResult = OrderedDict(sorted(result.items(), reverse=True)) sortedResult = OrderedDict(sorted(result.items(), reverse=True))