Tidy extraction of tags from rss feeds

main
Bob Mottram 2020-10-25 10:17:12 +00:00
parent 3d30aa55ce
commit 361df8a2ae
1 changed files with 33 additions and 18 deletions

View File

@ -88,17 +88,32 @@ def addNewswireDictEntry(baseDir: str, domain: str,
"""Update the newswire dictionary
"""
allText = title + ' ' + description
# check that none of the text is filtered against
if isFiltered(baseDir, 'news', domain, allText):
return
if not tags:
tags = getNewswireTags(allText, maxTags)
if tags is None:
tags = []
# extract hashtags from the text of the feed post
postTags = getNewswireTags(allText, maxTags)
# combine the tags into a single list
for tag in postTags:
if tag not in tags:
tags.append(tag)
# check that no tags are blocked
newswireItemBlocked = False
if tags:
for tag in tags:
if isBlockedHashtag(baseDir, tag.replace('#', '')):
newswireItemBlocked = True
break
if not newswireItemBlocked:
if newswireItemBlocked:
return
newswire[dateStr] = [
title,
link,