Limit word lengths within newswire titles

merge-requests/30/head
Bob Mottram 2021-07-07 15:00:53 +01:00
parent 19bd991fee
commit 0118168173
2 changed files with 48 additions and 10 deletions

View File

@ -93,22 +93,43 @@ def getNewswireTags(text: str, maxTags: int) -> []:
words = textSimplified.split(' ') words = textSimplified.split(' ')
tags = [] tags = []
for wrd in words: for wrd in words:
if wrd.startswith('#'): if not wrd.startswith('#'):
if len(wrd) > 1: continue
if wrd not in tags: if len(wrd) <= 1:
tags.append(wrd) continue
if len(tags) >= maxTags: if wrd in tags:
break continue
tags.append(wrd)
if len(tags) >= maxTags:
break
return tags return tags
def limitWordLengths(text: str, maxWordLength: int) -> str:
"""Limits the maximum length of words so that the newswire
column cannot become too wide
"""
if ' ' not in text:
return text
words = text.split(' ')
result = ''
for wrd in words:
if len(wrd) > maxWordLength:
wrd = wrd[:maxWordLength]
if result:
result += ' '
result += wrd
return result
def _addNewswireDictEntry(baseDir: str, domain: str, def _addNewswireDictEntry(baseDir: str, domain: str,
newswire: {}, dateStr: str, newswire: {}, dateStr: str,
title: str, link: str, title: str, link: str,
votesStatus: str, postFilename: str, votesStatus: str, postFilename: str,
description: str, moderated: bool, description: str, moderated: bool,
mirrored: bool, mirrored: bool,
tags=[], maxTags=32) -> None: tags: [] = [],
maxTags: int = 32) -> None:
"""Update the newswire dictionary """Update the newswire dictionary
""" """
# remove any markup # remove any markup
@ -121,6 +142,8 @@ def _addNewswireDictEntry(baseDir: str, domain: str,
if isFiltered(baseDir, None, None, allText): if isFiltered(baseDir, None, None, allText):
return return
title = limitWordLengths(title, 13)
if tags is None: if tags is None:
tags = [] tags = []
@ -129,9 +152,10 @@ def _addNewswireDictEntry(baseDir: str, domain: str,
# combine the tags into a single list # combine the tags into a single list
for tag in tags: for tag in tags:
if tag not in postTags: if tag in postTags:
if len(postTags) < maxTags: continue
postTags.append(tag) if len(postTags) < maxTags:
postTags.append(tag)
# check that no tags are blocked # check that no tags are blocked
for tag in postTags: for tag in postTags:

View File

@ -115,6 +115,7 @@ from newsdaemon import hashtagRuleTree
from newsdaemon import hashtagRuleResolve from newsdaemon import hashtagRuleResolve
from newswire import getNewswireTags from newswire import getNewswireTags
from newswire import parseFeedDate from newswire import parseFeedDate
from newswire import limitWordLengths
from mastoapiv1 import getMastoApiV1IdFromNickname from mastoapiv1 import getMastoApiV1IdFromNickname
from mastoapiv1 import getNicknameFromMastoApiV1Id from mastoapiv1 import getNicknameFromMastoApiV1Id
from webapp_post import prepareHtmlPostNickname from webapp_post import prepareHtmlPostNickname
@ -4141,9 +4142,22 @@ def _testSwitchWords() -> None:
assert result == 'This is a test hamster' assert result == 'This is a test hamster'
def _testLimitWordLengths() -> None:
print('testLimitWordLengths')
maxWordLength = 13
text = "This is a test"
result = limitWordLengths(text, maxWordLength)
assert result == text
text = "This is an exceptionallylongword test"
result = limitWordLengths(text, maxWordLength)
assert result == "This is an exceptionally test"
def runAllTests(): def runAllTests():
print('Running tests...') print('Running tests...')
updateDefaultThemesList(os.getcwd()) updateDefaultThemesList(os.getcwd())
_testLimitWordLengths()
_testSwitchWords() _testSwitchWords()
_testFunctions() _testFunctions()
_testUserAgentDomain() _testUserAgentDomain()