mirror of https://gitlab.com/bashrc2/epicyon
Remove html from rss feed descriptions
parent
ff15cea822
commit
3c1314d4b4
|
@ -23,7 +23,6 @@ from newswire import getDictFromNewswire
|
||||||
# from posts import sendSignedJson
|
# from posts import sendSignedJson
|
||||||
from posts import createNewsPost
|
from posts import createNewsPost
|
||||||
from posts import archivePostsForPerson
|
from posts import archivePostsForPerson
|
||||||
from content import removeHtmlTag
|
|
||||||
from content import dangerousMarkup
|
from content import dangerousMarkup
|
||||||
from content import validHashTag
|
from content import validHashTag
|
||||||
from utils import removeHtml
|
from utils import removeHtml
|
||||||
|
|
20
newswire.py
20
newswire.py
|
@ -304,13 +304,13 @@ def _xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
|
||||||
description = ''
|
description = ''
|
||||||
if '<description>' in rssItem and '</description>' in rssItem:
|
if '<description>' in rssItem and '</description>' in rssItem:
|
||||||
description = rssItem.split('<description>')[1]
|
description = rssItem.split('<description>')[1]
|
||||||
description = _removeCDATA(description.split('</description>')[0])
|
description = removeHtml(description.split('</description>')[0])
|
||||||
else:
|
else:
|
||||||
if '<media:description>' in rssItem and \
|
if '<media:description>' in rssItem and \
|
||||||
'</media:description>' in rssItem:
|
'</media:description>' in rssItem:
|
||||||
description = rssItem.split('<media:description>')[1]
|
description = rssItem.split('<media:description>')[1]
|
||||||
description = description.split('</media:description>')[0]
|
description = description.split('</media:description>')[0]
|
||||||
description = _removeCDATA(description)
|
description = removeHtml(description)
|
||||||
link = rssItem.split('<link>')[1]
|
link = rssItem.split('<link>')[1]
|
||||||
link = link.split('</link>')[0]
|
link = link.split('</link>')[0]
|
||||||
if '://' not in link:
|
if '://' not in link:
|
||||||
|
@ -388,13 +388,13 @@ def _xml1StrToDict(baseDir: str, domain: str, xmlStr: str,
|
||||||
description = ''
|
description = ''
|
||||||
if '<description>' in rssItem and '</description>' in rssItem:
|
if '<description>' in rssItem and '</description>' in rssItem:
|
||||||
description = rssItem.split('<description>')[1]
|
description = rssItem.split('<description>')[1]
|
||||||
description = _removeCDATA(description.split('</description>')[0])
|
description = removeHtml(description.split('</description>')[0])
|
||||||
else:
|
else:
|
||||||
if '<media:description>' in rssItem and \
|
if '<media:description>' in rssItem and \
|
||||||
'</media:description>' in rssItem:
|
'</media:description>' in rssItem:
|
||||||
description = rssItem.split('<media:description>')[1]
|
description = rssItem.split('<media:description>')[1]
|
||||||
description = description.split('</media:description>')[0]
|
description = description.split('</media:description>')[0]
|
||||||
description = _removeCDATA(description)
|
description = removeHtml(description)
|
||||||
link = rssItem.split('<link>')[1]
|
link = rssItem.split('<link>')[1]
|
||||||
link = link.split('</link>')[0]
|
link = link.split('</link>')[0]
|
||||||
if '://' not in link:
|
if '://' not in link:
|
||||||
|
@ -460,13 +460,13 @@ def _atomFeedToDict(baseDir: str, domain: str, xmlStr: str,
|
||||||
description = ''
|
description = ''
|
||||||
if '<summary>' in atomItem and '</summary>' in atomItem:
|
if '<summary>' in atomItem and '</summary>' in atomItem:
|
||||||
description = atomItem.split('<summary>')[1]
|
description = atomItem.split('<summary>')[1]
|
||||||
description = _removeCDATA(description.split('</summary>')[0])
|
description = removeHtml(description.split('</summary>')[0])
|
||||||
else:
|
else:
|
||||||
if '<media:description>' in atomItem and \
|
if '<media:description>' in atomItem and \
|
||||||
'</media:description>' in atomItem:
|
'</media:description>' in atomItem:
|
||||||
description = atomItem.split('<media:description>')[1]
|
description = atomItem.split('<media:description>')[1]
|
||||||
description = description.split('</media:description>')[0]
|
description = description.split('</media:description>')[0]
|
||||||
description = _removeCDATA(description)
|
description = removeHtml(description)
|
||||||
link = atomItem.split('<link>')[1]
|
link = atomItem.split('<link>')[1]
|
||||||
link = link.split('</link>')[0]
|
link = link.split('</link>')[0]
|
||||||
if '://' not in link:
|
if '://' not in link:
|
||||||
|
@ -538,11 +538,11 @@ def _atomFeedYTToDict(baseDir: str, domain: str, xmlStr: str,
|
||||||
'</media:description>' in atomItem:
|
'</media:description>' in atomItem:
|
||||||
description = atomItem.split('<media:description>')[1]
|
description = atomItem.split('<media:description>')[1]
|
||||||
description = description.split('</media:description>')[0]
|
description = description.split('</media:description>')[0]
|
||||||
description = _removeCDATA(description)
|
description = removeHtml(description)
|
||||||
elif '<summary>' in atomItem and '</summary>' in atomItem:
|
elif '<summary>' in atomItem and '</summary>' in atomItem:
|
||||||
description = atomItem.split('<summary>')[1]
|
description = atomItem.split('<summary>')[1]
|
||||||
description = description.split('</summary>')[0]
|
description = description.split('</summary>')[0]
|
||||||
description = _removeCDATA(description)
|
description = removeHtml(description)
|
||||||
link = atomItem.split('<yt:videoId>')[1]
|
link = atomItem.split('<yt:videoId>')[1]
|
||||||
link = link.split('</yt:videoId>')[0]
|
link = link.split('</yt:videoId>')[0]
|
||||||
link = 'https://www.youtube.com/watch?v=' + link.strip()
|
link = 'https://www.youtube.com/watch?v=' + link.strip()
|
||||||
|
@ -692,7 +692,7 @@ def getRSSfromDict(baseDir: str, newswire: {},
|
||||||
continue
|
continue
|
||||||
rssStr += '<item>\n'
|
rssStr += '<item>\n'
|
||||||
rssStr += ' <title>' + fields[0] + '</title>\n'
|
rssStr += ' <title>' + fields[0] + '</title>\n'
|
||||||
description = _removeCDATA(firstParagraphFromString(fields[4]))
|
description = removeHtml(firstParagraphFromString(fields[4]))
|
||||||
rssStr += ' <description>' + description + '</description>\n'
|
rssStr += ' <description>' + description + '</description>\n'
|
||||||
url = fields[1]
|
url = fields[1]
|
||||||
if '://' not in url:
|
if '://' not in url:
|
||||||
|
@ -812,7 +812,7 @@ def _addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
|
||||||
votes = loadJson(fullPostFilename + '.votes')
|
votes = loadJson(fullPostFilename + '.votes')
|
||||||
content = postJsonObject['object']['content']
|
content = postJsonObject['object']['content']
|
||||||
description = firstParagraphFromString(content)
|
description = firstParagraphFromString(content)
|
||||||
description = _removeCDATA(description)
|
description = removeHtml(description)
|
||||||
tagsFromPost = _getHashtagsFromPost(postJsonObject)
|
tagsFromPost = _getHashtagsFromPost(postJsonObject)
|
||||||
_addNewswireDictEntry(baseDir, domain,
|
_addNewswireDictEntry(baseDir, domain,
|
||||||
newswire, published,
|
newswire, published,
|
||||||
|
|
Loading…
Reference in New Issue