forked from indymedia/epicyon
Extract hashtags from newswire feeds
parent
a5d21852bb
commit
af3b7baf12
|
@ -374,7 +374,7 @@ def addHashTags(wordStr: str, httpPrefix: str, domain: str,
|
||||||
hashtagUrl = httpPrefix + "://" + domain + "/tags/" + hashtag
|
hashtagUrl = httpPrefix + "://" + domain + "/tags/" + hashtag
|
||||||
postHashtags[hashtag] = {
|
postHashtags[hashtag] = {
|
||||||
'href': hashtagUrl,
|
'href': hashtagUrl,
|
||||||
'name': '#'+hashtag,
|
'name': '#' + hashtag,
|
||||||
'type': 'Hashtag'
|
'type': 'Hashtag'
|
||||||
}
|
}
|
||||||
replaceHashTags[wordStr] = "<a href=\"" + hashtagUrl + \
|
replaceHashTags[wordStr] = "<a href=\"" + hashtagUrl + \
|
||||||
|
|
37
newswire.py
37
newswire.py
|
@ -75,13 +75,15 @@ def getNewswireTags(text: str) -> []:
|
||||||
def addNewswireDictEntry(newswire: {}, dateStr: str,
|
def addNewswireDictEntry(newswire: {}, dateStr: str,
|
||||||
title: str, link: str,
|
title: str, link: str,
|
||||||
votesStatus: str, postFilename: str,
|
votesStatus: str, postFilename: str,
|
||||||
description: str, moderated: bool) -> None:
|
description: str, moderated: bool,
|
||||||
|
tags=[]) -> None:
|
||||||
"""Update the newswire dictionary
|
"""Update the newswire dictionary
|
||||||
"""
|
"""
|
||||||
|
if not tags:
|
||||||
|
tags = getNewswireTags(title + ' ' + description)
|
||||||
newswire[dateStr] = [title, link,
|
newswire[dateStr] = [title, link,
|
||||||
votesStatus, postFilename,
|
votesStatus, postFilename,
|
||||||
description, moderated,
|
description, moderated, tags]
|
||||||
getNewswireTags(title + ' ' + description)]
|
|
||||||
|
|
||||||
|
|
||||||
def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool,
|
def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool,
|
||||||
|
@ -340,6 +342,32 @@ def isaBlogPost(postJsonObject: {}) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def getHashtagsFromPost(postJsonObject: {}) -> []:
|
||||||
|
"""Returns a list of any hashtags within a post
|
||||||
|
"""
|
||||||
|
if not postJsonObject.get('object'):
|
||||||
|
return []
|
||||||
|
if not isinstance(postJsonObject['object'], dict):
|
||||||
|
return []
|
||||||
|
if not postJsonObject['object'].get('tag'):
|
||||||
|
return []
|
||||||
|
if not isinstance(postJsonObject['object']['tag'], dict):
|
||||||
|
return []
|
||||||
|
tags = []
|
||||||
|
for tg in postJsonObject['object']['tag'].items():
|
||||||
|
if not isinstance(tg, dict):
|
||||||
|
continue
|
||||||
|
if not tg.get('name'):
|
||||||
|
continue
|
||||||
|
if not tg.get('type'):
|
||||||
|
continue
|
||||||
|
if tg['type'] != 'Hashtag':
|
||||||
|
continue
|
||||||
|
if tg['name'] not in tags:
|
||||||
|
tags.append(tg['name'])
|
||||||
|
return tags
|
||||||
|
|
||||||
|
|
||||||
def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
|
def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
|
||||||
newswire: {},
|
newswire: {},
|
||||||
maxBlogsPerAccount: int,
|
maxBlogsPerAccount: int,
|
||||||
|
@ -401,7 +429,8 @@ def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
|
||||||
postJsonObject['object']['summary'],
|
postJsonObject['object']['summary'],
|
||||||
postJsonObject['object']['url'],
|
postJsonObject['object']['url'],
|
||||||
votes, fullPostFilename,
|
votes, fullPostFilename,
|
||||||
description, moderated)
|
description, moderated,
|
||||||
|
getHashtagsFromPost(postJsonObject))
|
||||||
|
|
||||||
ctr += 1
|
ctr += 1
|
||||||
if ctr >= maxBlogsPerAccount:
|
if ctr >= maxBlogsPerAccount:
|
||||||
|
|
Loading…
Reference in New Issue