mirror of https://gitlab.com/bashrc2/epicyon
Remove escaped html from feeds
parent
ee76750305
commit
77fd759adf
|
@ -15,6 +15,7 @@ __status__ = "Production"
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import datetime
|
import datetime
|
||||||
|
import html
|
||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
from subprocess import Popen
|
from subprocess import Popen
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
@ -65,20 +66,10 @@ def saveArrivedTime(baseDir: str, postFilename: str, arrived: str) -> None:
|
||||||
|
|
||||||
|
|
||||||
def removeControlCharacters(content: str) -> str:
|
def removeControlCharacters(content: str) -> str:
|
||||||
"""TODO this is hacky and a better solution is needed
|
"""Remove escaped html
|
||||||
the unicode is messing up somehow
|
|
||||||
"""
|
"""
|
||||||
lookups = {
|
if '&' in content:
|
||||||
"8211": "-",
|
return html.unescape(content)
|
||||||
"8230": "...",
|
|
||||||
"8216": "'",
|
|
||||||
"8217": "'",
|
|
||||||
"8220": '"',
|
|
||||||
"8221": '"'
|
|
||||||
}
|
|
||||||
for code, ch in lookups.items():
|
|
||||||
content = content.replace('&' + code + ';', ch)
|
|
||||||
content = content.replace('&#' + code + ';', ch)
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
@ -513,6 +504,8 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str,
|
||||||
if rssDescription.startswith('<![CDATA['):
|
if rssDescription.startswith('<![CDATA['):
|
||||||
rssDescription = rssDescription.replace('<![CDATA[', '')
|
rssDescription = rssDescription.replace('<![CDATA[', '')
|
||||||
rssDescription = rssDescription.replace(']]>', '')
|
rssDescription = rssDescription.replace(']]>', '')
|
||||||
|
if '&' in rssDescription:
|
||||||
|
rssDescription = html.unescape(rssDescription)
|
||||||
rssDescription = '<p>' + rssDescription + '<p>'
|
rssDescription = '<p>' + rssDescription + '<p>'
|
||||||
|
|
||||||
mirrored = item[7]
|
mirrored = item[7]
|
||||||
|
@ -578,6 +571,7 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str,
|
||||||
blog['object']['url'] = \
|
blog['object']['url'] = \
|
||||||
httpPrefix + '://' + domain + '/@news/' + statusNumber
|
httpPrefix + '://' + domain + '/@news/' + statusNumber
|
||||||
blog['object']['published'] = dateStr
|
blog['object']['published'] = dateStr
|
||||||
|
|
||||||
blog['object']['content'] = rssDescription
|
blog['object']['content'] = rssDescription
|
||||||
blog['object']['contentMap']['en'] = rssDescription
|
blog['object']['contentMap']['en'] = rssDescription
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue