mirror of https://gitlab.com/bashrc2/epicyon
Tidy extraction of tags from rss feeds
parent
3d30aa55ce
commit
361df8a2ae
23
newswire.py
23
newswire.py
|
@ -88,17 +88,32 @@ def addNewswireDictEntry(baseDir: str, domain: str,
|
||||||
"""Update the newswire dictionary
|
"""Update the newswire dictionary
|
||||||
"""
|
"""
|
||||||
allText = title + ' ' + description
|
allText = title + ' ' + description
|
||||||
|
|
||||||
|
# check that none of the text is filtered against
|
||||||
if isFiltered(baseDir, 'news', domain, allText):
|
if isFiltered(baseDir, 'news', domain, allText):
|
||||||
return
|
return
|
||||||
if not tags:
|
|
||||||
tags = getNewswireTags(allText, maxTags)
|
if tags is None:
|
||||||
|
tags = []
|
||||||
|
|
||||||
|
# extract hashtags from the text of the feed post
|
||||||
|
postTags = getNewswireTags(allText, maxTags)
|
||||||
|
|
||||||
|
# combine the tags into a single list
|
||||||
|
for tag in postTags:
|
||||||
|
if tag not in tags:
|
||||||
|
tags.append(tag)
|
||||||
|
|
||||||
|
# check that no tags are blocked
|
||||||
newswireItemBlocked = False
|
newswireItemBlocked = False
|
||||||
if tags:
|
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
if isBlockedHashtag(baseDir, tag.replace('#', '')):
|
if isBlockedHashtag(baseDir, tag.replace('#', '')):
|
||||||
newswireItemBlocked = True
|
newswireItemBlocked = True
|
||||||
break
|
break
|
||||||
if not newswireItemBlocked:
|
|
||||||
|
if newswireItemBlocked:
|
||||||
|
return
|
||||||
|
|
||||||
newswire[dateStr] = [
|
newswire[dateStr] = [
|
||||||
title,
|
title,
|
||||||
link,
|
link,
|
||||||
|
|
Loading…
Reference in New Issue