Tidy extraction of tags from rss feeds

merge-requests/30/head
Bob Mottram 2020-10-25 10:17:12 +00:00
parent 3d30aa55ce
commit 361df8a2ae
1 changed files with 33 additions and 18 deletions

View File

@ -88,17 +88,32 @@ def addNewswireDictEntry(baseDir: str, domain: str,
"""Update the newswire dictionary """Update the newswire dictionary
""" """
allText = title + ' ' + description allText = title + ' ' + description
# check that none of the text is filtered against
if isFiltered(baseDir, 'news', domain, allText): if isFiltered(baseDir, 'news', domain, allText):
return return
if not tags:
tags = getNewswireTags(allText, maxTags) if tags is None:
tags = []
# extract hashtags from the text of the feed post
postTags = getNewswireTags(allText, maxTags)
# combine the tags into a single list
for tag in postTags:
if tag not in tags:
tags.append(tag)
# check that no tags are blocked
newswireItemBlocked = False newswireItemBlocked = False
if tags:
for tag in tags: for tag in tags:
if isBlockedHashtag(baseDir, tag.replace('#', '')): if isBlockedHashtag(baseDir, tag.replace('#', '')):
newswireItemBlocked = True newswireItemBlocked = True
break break
if not newswireItemBlocked:
if newswireItemBlocked:
return
newswire[dateStr] = [ newswire[dateStr] = [
title, title,
link, link,