forked from indymedia/epicyon
Limit the maximum number of hashtags imported from newswire feeds
parent
d579d21e02
commit
e521a95e18
|
@ -12124,6 +12124,10 @@ def runDaemon(maxNewsPosts: int,
|
||||||
# maximum number of posts in the news timeline/outbox
|
# maximum number of posts in the news timeline/outbox
|
||||||
httpd.maxNewsPosts = maxNewsPosts
|
httpd.maxNewsPosts = maxNewsPosts
|
||||||
|
|
||||||
|
# The maximum number of tags per post which can be
|
||||||
|
# attached to RSS feeds pulled in via the newswire
|
||||||
|
httpd.maxTags = 32
|
||||||
|
|
||||||
if registration == 'open':
|
if registration == 'open':
|
||||||
httpd.registration = True
|
httpd.registration = True
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -685,7 +685,8 @@ def runNewswireDaemon(baseDir: str, httpd,
|
||||||
newNewswire = \
|
newNewswire = \
|
||||||
getDictFromNewswire(httpd.session, baseDir, domain,
|
getDictFromNewswire(httpd.session, baseDir, domain,
|
||||||
httpd.maxNewswirePostsPerSource,
|
httpd.maxNewswirePostsPerSource,
|
||||||
httpd.maxNewswireFeedSizeKb)
|
httpd.maxNewswireFeedSizeKb,
|
||||||
|
httpd.maxTags)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print('WARN: unable to update newswire ' + str(e))
|
print('WARN: unable to update newswire ' + str(e))
|
||||||
time.sleep(120)
|
time.sleep(120)
|
||||||
|
|
26
newswire.py
26
newswire.py
|
@ -54,7 +54,7 @@ def rss2Footer() -> str:
|
||||||
return rssStr
|
return rssStr
|
||||||
|
|
||||||
|
|
||||||
def getNewswireTags(text: str) -> []:
|
def getNewswireTags(text: str, maxTags: int) -> []:
|
||||||
"""Returns a list of hashtags found in the given text
|
"""Returns a list of hashtags found in the given text
|
||||||
"""
|
"""
|
||||||
if '#' not in text:
|
if '#' not in text:
|
||||||
|
@ -73,6 +73,8 @@ def getNewswireTags(text: str) -> []:
|
||||||
if len(wrd) > 1:
|
if len(wrd) > 1:
|
||||||
if wrd not in tags:
|
if wrd not in tags:
|
||||||
tags.append(wrd)
|
tags.append(wrd)
|
||||||
|
if len(tags) >= maxTags:
|
||||||
|
break
|
||||||
return tags
|
return tags
|
||||||
|
|
||||||
|
|
||||||
|
@ -81,14 +83,15 @@ def addNewswireDictEntry(baseDir: str, domain: str,
|
||||||
title: str, link: str,
|
title: str, link: str,
|
||||||
votesStatus: str, postFilename: str,
|
votesStatus: str, postFilename: str,
|
||||||
description: str, moderated: bool,
|
description: str, moderated: bool,
|
||||||
mirrored: bool, tags=[]) -> None:
|
mirrored: bool,
|
||||||
|
tags=[], maxTags=32) -> None:
|
||||||
"""Update the newswire dictionary
|
"""Update the newswire dictionary
|
||||||
"""
|
"""
|
||||||
allText = title + ' ' + description
|
allText = title + ' ' + description
|
||||||
if isFiltered(baseDir, 'news', domain, allText):
|
if isFiltered(baseDir, 'news', domain, allText):
|
||||||
return
|
return
|
||||||
if not tags:
|
if not tags:
|
||||||
tags = getNewswireTags(allText)
|
tags = getNewswireTags(allText, maxTags)
|
||||||
newswireItemBlocked = False
|
newswireItemBlocked = False
|
||||||
if tags:
|
if tags:
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
|
@ -410,7 +413,8 @@ def getHashtagsFromPost(postJsonObject: {}) -> []:
|
||||||
def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
|
def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
|
||||||
newswire: {},
|
newswire: {},
|
||||||
maxBlogsPerAccount: int,
|
maxBlogsPerAccount: int,
|
||||||
indexFilename: str) -> None:
|
indexFilename: str,
|
||||||
|
maxTags: int) -> None:
|
||||||
"""Adds blogs for the given account to the newswire
|
"""Adds blogs for the given account to the newswire
|
||||||
"""
|
"""
|
||||||
if not os.path.isfile(indexFilename):
|
if not os.path.isfile(indexFilename):
|
||||||
|
@ -470,7 +474,8 @@ def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
|
||||||
postJsonObject['object']['url'],
|
postJsonObject['object']['url'],
|
||||||
votes, fullPostFilename,
|
votes, fullPostFilename,
|
||||||
description, moderated, False,
|
description, moderated, False,
|
||||||
getHashtagsFromPost(postJsonObject))
|
getHashtagsFromPost(postJsonObject),
|
||||||
|
maxTags)
|
||||||
|
|
||||||
ctr += 1
|
ctr += 1
|
||||||
if ctr >= maxBlogsPerAccount:
|
if ctr >= maxBlogsPerAccount:
|
||||||
|
@ -478,7 +483,8 @@ def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
|
||||||
|
|
||||||
|
|
||||||
def addBlogsToNewswire(baseDir: str, domain: str, newswire: {},
|
def addBlogsToNewswire(baseDir: str, domain: str, newswire: {},
|
||||||
maxBlogsPerAccount: int) -> None:
|
maxBlogsPerAccount: int,
|
||||||
|
maxTags: int) -> None:
|
||||||
"""Adds blogs from each user account into the newswire
|
"""Adds blogs from each user account into the newswire
|
||||||
"""
|
"""
|
||||||
moderationDict = {}
|
moderationDict = {}
|
||||||
|
@ -508,7 +514,7 @@ def addBlogsToNewswire(baseDir: str, domain: str, newswire: {},
|
||||||
domain = handle.split('@')[1]
|
domain = handle.split('@')[1]
|
||||||
addAccountBlogsToNewswire(baseDir, nickname, domain,
|
addAccountBlogsToNewswire(baseDir, nickname, domain,
|
||||||
newswire, maxBlogsPerAccount,
|
newswire, maxBlogsPerAccount,
|
||||||
blogsIndex)
|
blogsIndex, maxTags)
|
||||||
|
|
||||||
# sort the moderation dict into chronological order, latest first
|
# sort the moderation dict into chronological order, latest first
|
||||||
sortedModerationDict = \
|
sortedModerationDict = \
|
||||||
|
@ -524,7 +530,8 @@ def addBlogsToNewswire(baseDir: str, domain: str, newswire: {},
|
||||||
|
|
||||||
|
|
||||||
def getDictFromNewswire(session, baseDir: str, domain: str,
|
def getDictFromNewswire(session, baseDir: str, domain: str,
|
||||||
maxPostsPerSource: int, maxFeedSizeKb: int) -> {}:
|
maxPostsPerSource: int, maxFeedSizeKb: int,
|
||||||
|
maxTags: int) -> {}:
|
||||||
"""Gets rss feeds as a dictionary from newswire file
|
"""Gets rss feeds as a dictionary from newswire file
|
||||||
"""
|
"""
|
||||||
subscriptionsFilename = baseDir + '/accounts/newswire.txt'
|
subscriptionsFilename = baseDir + '/accounts/newswire.txt'
|
||||||
|
@ -568,7 +575,8 @@ def getDictFromNewswire(session, baseDir: str, domain: str,
|
||||||
result[dateStr] = item
|
result[dateStr] = item
|
||||||
|
|
||||||
# add blogs from each user account
|
# add blogs from each user account
|
||||||
addBlogsToNewswire(baseDir, domain, result, maxPostsPerSource)
|
addBlogsToNewswire(baseDir, domain, result,
|
||||||
|
maxPostsPerSource, maxTags)
|
||||||
|
|
||||||
# sort into chronological order, latest first
|
# sort into chronological order, latest first
|
||||||
sortedResult = OrderedDict(sorted(result.items(), reverse=True))
|
sortedResult = OrderedDict(sorted(result.items(), reverse=True))
|
||||||
|
|
Loading…
Reference in New Issue