mirror of https://gitlab.com/bashrc2/epicyon
Extract hashtags from newswire feeds
parent
a5d21852bb
commit
af3b7baf12
37
newswire.py
37
newswire.py
|
@ -75,13 +75,15 @@ def getNewswireTags(text: str) -> []:
|
||||||
def addNewswireDictEntry(newswire: {}, dateStr: str,
|
def addNewswireDictEntry(newswire: {}, dateStr: str,
|
||||||
title: str, link: str,
|
title: str, link: str,
|
||||||
votesStatus: str, postFilename: str,
|
votesStatus: str, postFilename: str,
|
||||||
description: str, moderated: bool) -> None:
|
description: str, moderated: bool,
|
||||||
|
tags=[]) -> None:
|
||||||
"""Update the newswire dictionary
|
"""Update the newswire dictionary
|
||||||
"""
|
"""
|
||||||
|
if not tags:
|
||||||
|
tags = getNewswireTags(title + ' ' + description)
|
||||||
newswire[dateStr] = [title, link,
|
newswire[dateStr] = [title, link,
|
||||||
votesStatus, postFilename,
|
votesStatus, postFilename,
|
||||||
description, moderated,
|
description, moderated, tags]
|
||||||
getNewswireTags(title + ' ' + description)]
|
|
||||||
|
|
||||||
|
|
||||||
def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool,
|
def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool,
|
||||||
|
@ -340,6 +342,32 @@ def isaBlogPost(postJsonObject: {}) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def getHashtagsFromPost(postJsonObject: {}) -> []:
|
||||||
|
"""Returns a list of any hashtags within a post
|
||||||
|
"""
|
||||||
|
if not postJsonObject.get('object'):
|
||||||
|
return []
|
||||||
|
if not isinstance(postJsonObject['object'], dict):
|
||||||
|
return []
|
||||||
|
if not postJsonObject['object'].get('tag'):
|
||||||
|
return []
|
||||||
|
if not isinstance(postJsonObject['object']['tag'], dict):
|
||||||
|
return []
|
||||||
|
tags = []
|
||||||
|
for tg in postJsonObject['object']['tag'].items():
|
||||||
|
if not isinstance(tg, dict):
|
||||||
|
continue
|
||||||
|
if not tg.get('name'):
|
||||||
|
continue
|
||||||
|
if not tg.get('type'):
|
||||||
|
continue
|
||||||
|
if tg['type'] != 'Hashtag':
|
||||||
|
continue
|
||||||
|
if tg['name'] not in tags:
|
||||||
|
tags.append(tg['name'])
|
||||||
|
return tags
|
||||||
|
|
||||||
|
|
||||||
def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
|
def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
|
||||||
newswire: {},
|
newswire: {},
|
||||||
maxBlogsPerAccount: int,
|
maxBlogsPerAccount: int,
|
||||||
|
@ -401,7 +429,8 @@ def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
|
||||||
postJsonObject['object']['summary'],
|
postJsonObject['object']['summary'],
|
||||||
postJsonObject['object']['url'],
|
postJsonObject['object']['url'],
|
||||||
votes, fullPostFilename,
|
votes, fullPostFilename,
|
||||||
description, moderated)
|
description, moderated,
|
||||||
|
getHashtagsFromPost(postJsonObject))
|
||||||
|
|
||||||
ctr += 1
|
ctr += 1
|
||||||
if ctr >= maxBlogsPerAccount:
|
if ctr >= maxBlogsPerAccount:
|
||||||
|
|
Loading…
Reference in New Issue