Remove long words from newswire titles

This can cause column overflows
merge-requests/8/head
Bob Mottram 2020-10-31 22:47:43 +00:00
parent 8989d070c7
commit ade3c0560b
1 changed files with 5 additions and 0 deletions

View File

@ -22,6 +22,7 @@ from utils import removeHtml
from blocking import isBlockedDomain
from blocking import isBlockedHashtag
from filters import isFiltered
from content import removeLongWords
def rss2Header(httpPrefix: str,
@ -89,6 +90,10 @@ def addNewswireDictEntry(baseDir: str, domain: str,
tags=[], maxTags=32) -> None:
"""Update the newswire dictionary
"""
# remove any long words from the title, which can
# cause column overflows
title = removeLongWords(title, 30, [])
allText = removeHtml(title + ' ' + description)
# check that none of the text is filtered against