forked from indymedia/epicyon
Extract hashtags from feeds
parent
5bd133ffff
commit
a60491585e
|
@ -701,7 +701,12 @@ def addHtmlTags(baseDir: str, httpPrefix: str,
|
|||
content = content.replace('\r', '')
|
||||
content = content.replace('\n', ' --linebreak-- ')
|
||||
content = addMusicTag(content, 'nowplaying')
|
||||
words = content.replace(',', ' ').replace(';', ' ').split(' ')
|
||||
contentSimplified = \
|
||||
content.replace(',', ' ').replace(';', ' ').replace('- ', ' ')
|
||||
contentSimplified = contentSimplified.replace('. ', ' ').strip()
|
||||
if contentSimplified.endswith('.'):
|
||||
contentSimplified = contentSimplified[:len(contentSimplified)-1]
|
||||
words = contentSimplified.split(' ')
|
||||
|
||||
# remove . for words which are not mentions
|
||||
newWords = []
|
||||
|
|
22
newswire.py
22
newswire.py
|
@ -52,6 +52,25 @@ def rss2Footer() -> str:
|
|||
return rssStr
|
||||
|
||||
|
||||
def getNewswireTags(text: str) -> []:
|
||||
"""Returns a list of hashtags found in the given text
|
||||
"""
|
||||
if ' ' not in text:
|
||||
return []
|
||||
textSimplified = \
|
||||
text.replace(',', ' ').replace(';', ' ').replace('- ', ' ')
|
||||
textSimplified = textSimplified.replace('. ', ' ').strip()
|
||||
if textSimplified.endswith('.'):
|
||||
textSimplified = textSimplified[:len(textSimplified)-1]
|
||||
words = textSimplified.split(' ')
|
||||
tags = []
|
||||
for wrd in words:
|
||||
if wrd.startswith('#'):
|
||||
if wrd not in tags:
|
||||
tags.append(wrd)
|
||||
return tags
|
||||
|
||||
|
||||
def addNewswireDictEntry(newswire: {}, dateStr: str,
|
||||
title: str, link: str,
|
||||
votesStatus: str, postFilename: str,
|
||||
|
@ -60,7 +79,8 @@ def addNewswireDictEntry(newswire: {}, dateStr: str,
|
|||
"""
|
||||
newswire[dateStr] = [title, link,
|
||||
votesStatus, postFilename,
|
||||
description, moderated]
|
||||
description, moderated,
|
||||
getNewswireTags(title + ' ' + description)]
|
||||
|
||||
|
||||
def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool,
|
||||
|
|
Loading…
Reference in New Issue