Get newswire summary from blog posts

merge-requests/8/head
Bob Mottram 2020-11-08 09:47:01 +00:00
parent 654e844e07
commit a62641fec4
1 changed files with 16 additions and 2 deletions

View File

@ -82,7 +82,7 @@ def getNewswireTags(text: str, maxTags: int) -> []:
def addNewswireDictEntry(baseDir: str, domain: str, def addNewswireDictEntry(baseDir: str, domain: str,
newswire: {}, dateStr: str, newswire: {}, dateStr: str,
title: str, link: str, title: str, content: str, link: str,
votesStatus: str, postFilename: str, votesStatus: str, postFilename: str,
description: str, moderated: bool, description: str, moderated: bool,
mirrored: bool, mirrored: bool,
@ -412,6 +412,7 @@ def isNewswireBlogPost(postJsonObject: {}) -> bool:
return False return False
if postJsonObject['object'].get('summary') and \ if postJsonObject['object'].get('summary') and \
postJsonObject['object'].get('url') and \ postJsonObject['object'].get('url') and \
postJsonObject['object'].get('content') and \
postJsonObject['object'].get('published'): postJsonObject['object'].get('published'):
return isPublicPost(postJsonObject) return isPublicPost(postJsonObject)
return False return False
@ -443,6 +444,19 @@ def getHashtagsFromPost(postJsonObject: {}) -> []:
return tags return tags
def firstParagraph(postJsonObject: {}) -> str:
"""Get the first paragraph from a blog post
to be used as a summary in the newswire feed
"""
content = postJsonObject['object']['content']
if '<p>' not in content or '</p>' not in content:
return removeHtml(content)
paragraph = content.split('<p>')[1]
if '</p>' in paragraph:
paragraph = paragraph.split('</p>')[0]
return removeHtml(paragraph)
def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str, def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
newswire: {}, newswire: {},
maxBlogsPerAccount: int, maxBlogsPerAccount: int,
@ -500,7 +514,7 @@ def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
votes = [] votes = []
if os.path.isfile(fullPostFilename + '.votes'): if os.path.isfile(fullPostFilename + '.votes'):
votes = loadJson(fullPostFilename + '.votes') votes = loadJson(fullPostFilename + '.votes')
description = '' description = firstParagraph(postJsonObject)
addNewswireDictEntry(baseDir, domain, addNewswireDictEntry(baseDir, domain,
newswire, published, newswire, published,
postJsonObject['object']['summary'], postJsonObject['object']['summary'],