From e5950d43633f1562fed8f80d65a5dbcb4f87ba4d Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 16 Oct 2020 12:58:31 +0100 Subject: [PATCH] Reject blocked domains within news feeds --- newswire.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/newswire.py b/newswire.py index 3e6735f4..0d854c82 100644 --- a/newswire.py +++ b/newswire.py @@ -16,7 +16,8 @@ from utils import locatePost from utils import loadJson from utils import saveJson from utils import isSuspended - +from utils import containsInvalidChars +from blocking import isBlockedDomain def rss2Header(httpPrefix: str, nickname: str, domainFull: str, @@ -80,6 +81,13 @@ def xml2StrToDict(xmlStr: str, moderated: bool, description = description.split('')[0] link = rssItem.split('')[1] link = link.split('')[0] + if '://' not in link: + continue + domain = link.split('://')[1] + if '/' in domain: + domain = domain.split('/')[0] + if isBlockedDomain(baseDir, domain): + continue pubDate = rssItem.split('')[1] pubDate = pubDate.split('')[0] parsed = False @@ -147,6 +155,13 @@ def atomFeedToDict(xmlStr: str, moderated: bool, description = description.split('')[0] link = rssItem.split('')[1] link = link.split('')[0] + if '://' not in link: + continue + domain = link.split('://')[1] + if '/' in domain: + domain = domain.split('/')[0] + if isBlockedDomain(baseDir, domain): + continue pubDate = rssItem.split('')[1] pubDate = pubDate.split('')[0] parsed = False @@ -221,7 +236,8 @@ def getRSS(session, url: str, moderated: bool, try: result = session.get(url, headers=sessionHeaders, params=sessionParams) if result: - if int(len(result) / 1024) < maxFeedSizeKb: + if int(len(result) / 1024) < maxFeedSizeKb and \ + not containsInvalidChars(result): return xmlStrToDict(result.text, moderated, maxPostsPerSource) else: print('WARN: feed is too large: ' + url)