Reject blocked domains within news feeds

merge-requests/8/head
Bob Mottram 2020-10-16 12:58:31 +01:00
parent c3dbec6181
commit e5950d4363
1 changed files with 18 additions and 2 deletions

View File

@ -16,7 +16,8 @@ from utils import locatePost
from utils import loadJson
from utils import saveJson
from utils import isSuspended
from utils import containsInvalidChars
from blocking import isBlockedDomain
def rss2Header(httpPrefix: str,
nickname: str, domainFull: str,
@ -80,6 +81,13 @@ def xml2StrToDict(xmlStr: str, moderated: bool,
description = description.split('</description>')[0]
link = rssItem.split('<link>')[1]
link = link.split('</link>')[0]
if '://' not in link:
continue
domain = link.split('://')[1]
if '/' in domain:
domain = domain.split('/')[0]
if isBlockedDomain(baseDir, domain):
continue
pubDate = rssItem.split('<pubDate>')[1]
pubDate = pubDate.split('</pubDate>')[0]
parsed = False
@ -147,6 +155,13 @@ def atomFeedToDict(xmlStr: str, moderated: bool,
description = description.split('</summary>')[0]
link = rssItem.split('<link>')[1]
link = link.split('</link>')[0]
if '://' not in link:
continue
domain = link.split('://')[1]
if '/' in domain:
domain = domain.split('/')[0]
if isBlockedDomain(baseDir, domain):
continue
pubDate = rssItem.split('<updated>')[1]
pubDate = pubDate.split('</updated>')[0]
parsed = False
@ -221,7 +236,8 @@ def getRSS(session, url: str, moderated: bool,
try:
result = session.get(url, headers=sessionHeaders, params=sessionParams)
if result:
if int(len(result) / 1024) < maxFeedSizeKb:
if int(len(result) / 1024) < maxFeedSizeKb and \
not containsInvalidChars(result):
return xmlStrToDict(result.text, moderated, maxPostsPerSource)
else:
print('WARN: feed is too large: ' + url)