forked from indymedia/epicyon
Remove dangerous markup from rss feeds
parent
051b361c79
commit
13c067bfa4
17
content.py
17
content.py
|
@ -14,6 +14,23 @@ from utils import fileLastModified
|
|||
from utils import getLinkPrefixes
|
||||
|
||||
|
||||
def removeHtmlTag(htmlStr: str, tag: str) -> str:
|
||||
"""Removes a given tag from a html string
|
||||
"""
|
||||
tagFound = True
|
||||
while tagFound:
|
||||
matchStr = ' ' + tag + '="'
|
||||
if matchStr not in htmlStr:
|
||||
tagFound = False
|
||||
break
|
||||
sections = htmlStr.split(matchStr, 1)
|
||||
if '"' not in sections[1]:
|
||||
tagFound = False
|
||||
break
|
||||
htmlStr = sections[0] + sections[1].split('"', 1)[1]
|
||||
return htmlStr
|
||||
|
||||
|
||||
def removeQuotesWithinQuotes(content: str) -> str:
|
||||
"""Removes any blockquote inside blockquote
|
||||
"""
|
||||
|
|
|
@ -12,6 +12,8 @@ import datetime
|
|||
from collections import OrderedDict
|
||||
from newswire import getDictFromNewswire
|
||||
from posts import createNewsPost
|
||||
from content import removeHtmlTag
|
||||
from content import dangerousMarkup
|
||||
from utils import loadJson
|
||||
from utils import saveJson
|
||||
from utils import getStatusNumber
|
||||
|
@ -51,8 +53,21 @@ def saveArrivedTime(baseDir: str, postFilename: str, arrived: str) -> None:
|
|||
|
||||
|
||||
def removeControlCharacters(content: str) -> str:
|
||||
content = content.replace('&8211;', '-').replace('–', '-')
|
||||
return content.replace('&8230;', '...').replace('…', '...')
|
||||
"""TODO this is hacky and a better solution is needed
|
||||
the unicode is messing up somehow
|
||||
"""
|
||||
lookups = {
|
||||
"8211": "-",
|
||||
"8230": "...",
|
||||
"8216": "'",
|
||||
"8217": "'",
|
||||
"8220": '"',
|
||||
"8221": '"'
|
||||
}
|
||||
for code, ch in lookups.items():
|
||||
content = content.replace('&' + code + ';', ch)
|
||||
content = content.replace('&#' + code + ';', ch)
|
||||
return content
|
||||
|
||||
|
||||
def convertRSStoActivityPub(baseDir: str, httpPrefix: str,
|
||||
|
@ -96,6 +111,8 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str,
|
|||
|
||||
rssTitle = removeControlCharacters(item[0])
|
||||
url = item[1]
|
||||
if dangerousMarkup(url) or dangerousMarkup(rssTitle):
|
||||
continue
|
||||
rssDescription = ''
|
||||
|
||||
# get the rss description if it exists
|
||||
|
@ -106,7 +123,7 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str,
|
|||
rssDescription = '<p>' + rssDescription + '<p>'
|
||||
|
||||
# add the off-site link to the description
|
||||
if rssDescription:
|
||||
if rssDescription and not dangerousMarkup(rssDescription):
|
||||
rssDescription += \
|
||||
'<br><a href="' + url + '">' + \
|
||||
translate['Read more...'] + '</a>'
|
||||
|
@ -115,6 +132,10 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str,
|
|||
'<a href="' + url + '">' + \
|
||||
translate['Read more...'] + '</a>'
|
||||
|
||||
# remove image dimensions
|
||||
rssDescription = removeHtmlTag(rssDescription, 'width')
|
||||
rssDescription = removeHtmlTag(rssDescription, 'height')
|
||||
|
||||
followersOnly = False
|
||||
useBlurhash = False
|
||||
# NOTE: the id when the post is created will not be
|
||||
|
|
11
tests.py
11
tests.py
|
@ -78,6 +78,7 @@ from content import addHtmlTags
|
|||
from content import removeLongWords
|
||||
from content import replaceContentDuplicates
|
||||
from content import removeTextFormatting
|
||||
from content import removeHtmlTag
|
||||
from theme import setCSSparam
|
||||
from jsonldsig import testSignJsonld
|
||||
from jsonldsig import jsonldVerify
|
||||
|
@ -2162,8 +2163,18 @@ def testReplaceEmailQuote():
|
|||
assert resultStr == expectedStr
|
||||
|
||||
|
||||
def testRemoveHtmlTag():
|
||||
print('testRemoveHtmlTag')
|
||||
testStr = "<p><img width=\"864\" height=\"486\" " + \
|
||||
"src=\"https://somesiteorother.com/image.jpg\"></p>"
|
||||
resultStr = removeHtmlTag(testStr, 'width')
|
||||
assert resultStr == "<p><img height=\"486\" " + \
|
||||
"src=\"https://somesiteorother.com/image.jpg\"></p>"
|
||||
|
||||
|
||||
def runAllTests():
|
||||
print('Running tests...')
|
||||
testRemoveHtmlTag()
|
||||
testReplaceEmailQuote()
|
||||
testConstantTimeStringCheck()
|
||||
testTranslations()
|
||||
|
|
Loading…
Reference in New Issue