diff --git a/content.py b/content.py index 8b140c1f..c2d19f6e 100644 --- a/content.py +++ b/content.py @@ -701,7 +701,12 @@ def addHtmlTags(baseDir: str, httpPrefix: str, content = content.replace('\r', '') content = content.replace('\n', ' --linebreak-- ') content = addMusicTag(content, 'nowplaying') - words = content.replace(',', ' ').replace(';', ' ').split(' ') + contentSimplified = \ + content.replace(',', ' ').replace(';', ' ').replace('- ', ' ') + contentSimplified = contentSimplified.replace('. ', ' ').strip() + if contentSimplified.endswith('.'): + contentSimplified = contentSimplified[:len(contentSimplified)-1] + words = contentSimplified.split(' ') # remove . for words which are not mentions newWords = [] diff --git a/newswire.py b/newswire.py index f145eb86..bec28b70 100644 --- a/newswire.py +++ b/newswire.py @@ -52,6 +52,25 @@ def rss2Footer() -> str: return rssStr +def getNewswireTags(text: str) -> []: + """Returns a list of hashtags found in the given text + """ + if ' ' not in text: + return [] + textSimplified = \ + text.replace(',', ' ').replace(';', ' ').replace('- ', ' ') + textSimplified = textSimplified.replace('. ', ' ').strip() + if textSimplified.endswith('.'): + textSimplified = textSimplified[:len(textSimplified)-1] + words = textSimplified.split(' ') + tags = [] + for wrd in words: + if wrd.startswith('#'): + if wrd not in tags: + tags.append(wrd) + return tags + + def addNewswireDictEntry(newswire: {}, dateStr: str, title: str, link: str, votesStatus: str, postFilename: str, @@ -60,7 +79,8 @@ def addNewswireDictEntry(newswire: {}, dateStr: str, """ newswire[dateStr] = [title, link, votesStatus, postFilename, - description, moderated] + description, moderated, + getNewswireTags(title + ' ' + description)] def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool,