forked from indymedia/epicyon
Extract hashtags from feeds
parent
5bd133ffff
commit
a60491585e
|
@ -701,7 +701,12 @@ def addHtmlTags(baseDir: str, httpPrefix: str,
|
||||||
content = content.replace('\r', '')
|
content = content.replace('\r', '')
|
||||||
content = content.replace('\n', ' --linebreak-- ')
|
content = content.replace('\n', ' --linebreak-- ')
|
||||||
content = addMusicTag(content, 'nowplaying')
|
content = addMusicTag(content, 'nowplaying')
|
||||||
words = content.replace(',', ' ').replace(';', ' ').split(' ')
|
contentSimplified = \
|
||||||
|
content.replace(',', ' ').replace(';', ' ').replace('- ', ' ')
|
||||||
|
contentSimplified = contentSimplified.replace('. ', ' ').strip()
|
||||||
|
if contentSimplified.endswith('.'):
|
||||||
|
contentSimplified = contentSimplified[:len(contentSimplified)-1]
|
||||||
|
words = contentSimplified.split(' ')
|
||||||
|
|
||||||
# remove . for words which are not mentions
|
# remove . for words which are not mentions
|
||||||
newWords = []
|
newWords = []
|
||||||
|
|
22
newswire.py
22
newswire.py
|
@ -52,6 +52,25 @@ def rss2Footer() -> str:
|
||||||
return rssStr
|
return rssStr
|
||||||
|
|
||||||
|
|
||||||
|
def getNewswireTags(text: str) -> []:
|
||||||
|
"""Returns a list of hashtags found in the given text
|
||||||
|
"""
|
||||||
|
if ' ' not in text:
|
||||||
|
return []
|
||||||
|
textSimplified = \
|
||||||
|
text.replace(',', ' ').replace(';', ' ').replace('- ', ' ')
|
||||||
|
textSimplified = textSimplified.replace('. ', ' ').strip()
|
||||||
|
if textSimplified.endswith('.'):
|
||||||
|
textSimplified = textSimplified[:len(textSimplified)-1]
|
||||||
|
words = textSimplified.split(' ')
|
||||||
|
tags = []
|
||||||
|
for wrd in words:
|
||||||
|
if wrd.startswith('#'):
|
||||||
|
if wrd not in tags:
|
||||||
|
tags.append(wrd)
|
||||||
|
return tags
|
||||||
|
|
||||||
|
|
||||||
def addNewswireDictEntry(newswire: {}, dateStr: str,
|
def addNewswireDictEntry(newswire: {}, dateStr: str,
|
||||||
title: str, link: str,
|
title: str, link: str,
|
||||||
votesStatus: str, postFilename: str,
|
votesStatus: str, postFilename: str,
|
||||||
|
@ -60,7 +79,8 @@ def addNewswireDictEntry(newswire: {}, dateStr: str,
|
||||||
"""
|
"""
|
||||||
newswire[dateStr] = [title, link,
|
newswire[dateStr] = [title, link,
|
||||||
votesStatus, postFilename,
|
votesStatus, postFilename,
|
||||||
description, moderated]
|
description, moderated,
|
||||||
|
getNewswireTags(title + ' ' + description)]
|
||||||
|
|
||||||
|
|
||||||
def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool,
|
def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool,
|
||||||
|
|
Loading…
Reference in New Issue