mirror of https://gitlab.com/bashrc2/epicyon
Remove control characters
parent
acc76cdcd6
commit
81cc189755
|
@ -9,7 +9,7 @@ __status__ = "Production"
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import datetime
|
import datetime
|
||||||
import urllib.parse
|
import unicodedata
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from newswire import getDictFromNewswire
|
from newswire import getDictFromNewswire
|
||||||
from posts import createNewsPost
|
from posts import createNewsPost
|
||||||
|
@ -51,6 +51,9 @@ def saveArrivedTime(baseDir: str, postFilename: str, arrived: str) -> None:
|
||||||
arrivedFile.close()
|
arrivedFile.close()
|
||||||
|
|
||||||
|
|
||||||
|
def removeControlCharacters(content: str):
|
||||||
|
return "".join(ch for ch in content if unicodedata.category(ch)[0]!="C")
|
||||||
|
|
||||||
def convertRSStoActivityPub(baseDir: str, httpPrefix: str,
|
def convertRSStoActivityPub(baseDir: str, httpPrefix: str,
|
||||||
domain: str, port: int,
|
domain: str, port: int,
|
||||||
newswire: {},
|
newswire: {},
|
||||||
|
@ -90,8 +93,8 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str,
|
||||||
newswire[originalDateStr][3] = filename
|
newswire[originalDateStr][3] = filename
|
||||||
continue
|
continue
|
||||||
|
|
||||||
rssTitle = urllib.parse.unquote_plus(item[0])
|
rssTitle = removeControlCharacters(item[0])
|
||||||
url = urllib.parse.unquote_plus(item[1])
|
url = removeControlCharacters(item[1])
|
||||||
rssDescription = ''
|
rssDescription = ''
|
||||||
|
|
||||||
# get the rss description if it exists
|
# get the rss description if it exists
|
||||||
|
|
Loading…
Reference in New Issue