Remove html from news items imported from rss

merge-requests/30/head
Bob Mottram 2021-01-11 21:38:31 +00:00
parent 24effb0929
commit ff15cea822
1 changed files with 5 additions and 22 deletions

View File

@ -26,6 +26,7 @@ from posts import archivePostsForPerson
from content import removeHtmlTag
from content import dangerousMarkup
from content import validHashTag
from utils import removeHtml
from utils import getFullDomain
from utils import loadJson
from utils import saveJson
@ -506,14 +507,7 @@ def _convertRSStoActivityPub(baseDir: str, httpPrefix: str,
rssDescription = ''
# get the rss description if it exists
rssDescription = _removeControlCharacters(item[4])
if rssDescription.startswith('<![CDATA['):
rssDescription = rssDescription.replace('<![CDATA[', '')
rssDescription = rssDescription.replace(']]>', '')
rssDescription = rssDescription.replace(']]', '')
if '&' in rssDescription:
rssDescription = html.unescape(rssDescription)
rssDescription = '<p>' + rssDescription + '<p>'
rssDescription = '<p>' + removeHtml(item[4]) + '<p>'
mirrored = item[7]
postUrl = url
@ -526,20 +520,9 @@ def _convertRSStoActivityPub(baseDir: str, httpPrefix: str,
postUrl += '/index.html'
# add the off-site link to the description
if rssDescription and \
not dangerousMarkup(rssDescription, allowLocalNetworkAccess):
rssDescription += \
'<br><a href="' + postUrl + '">' + \
translate['Read more...'] + '</a>'
else:
rssDescription = \
'<a href="' + postUrl + '">' + \
translate['Read more...'] + '</a>'
# remove image dimensions
if '<img' in rssDescription:
rssDescription = removeHtmlTag(rssDescription, 'width')
rssDescription = removeHtmlTag(rssDescription, 'height')
rssDescription += \
'<br><a href="' + postUrl + '">' + \
translate['Read more...'] + '</a>'
followersOnly = False
# NOTE: the id when the post is created will not be