diff --git a/newswire.py b/newswire.py index a71ee2bff..95d357482 100644 --- a/newswire.py +++ b/newswire.py @@ -258,7 +258,7 @@ def xml2StrToDict(baseDir: str, domain: str, xmlStr: str, maxPostsPerSource: int, maxFeedItemSizeKb: int, maxCategoriesFeedItemSizeKb: int) -> {}: - """Converts an xml 2.0 string to a dictionary + """Converts an xml RSS 2.0 string to a dictionary """ if '' not in xmlStr: return {} @@ -332,6 +332,86 @@ def xml2StrToDict(baseDir: str, domain: str, xmlStr: str, return result +def xml1StrToDict(baseDir: str, domain: str, xmlStr: str, + moderated: bool, mirrored: bool, + maxPostsPerSource: int, + maxFeedItemSizeKb: int, + maxCategoriesFeedItemSizeKb: int) -> {}: + """Converts an xml RSS 1.0 string to a dictionary + https://validator.w3.org/feed/docs/rss1.html + """ + if '#categories' in xmlStr: + xml2StrToHashtagCategories(baseDir, xmlStr, + maxCategoriesFeedItemSizeKb) + return {} + + rssItems = xmlStr.split(' maxBytes: + print('WARN: rss feed item is too big') + continue + if '' not in rssItem: + continue + if '' not in rssItem: + continue + if '' not in rssItem: + continue + if '' not in rssItem: + continue + if '' not in rssItem: + continue + if '' not in rssItem: + continue + title = rssItem.split('')[1] + title = removeCDATA(title.split('')[0]) + description = '' + if '' in rssItem and '' in rssItem: + description = rssItem.split('')[1] + description = removeCDATA(description.split('')[0]) + else: + if '' in rssItem and \ + '' in rssItem: + description = rssItem.split('')[1] + description = description.split('')[0] + description = removeCDATA(description) + link = rssItem.split('')[1] + link = link.split('')[0] + if '://' not in link: + continue + itemDomain = link.split('://')[1] + if '/' in itemDomain: + itemDomain = itemDomain.split('/')[0] + if isBlockedDomain(baseDir, itemDomain): + continue + pubDate = rssItem.split('')[1] + pubDate = pubDate.split('')[0] + + pubDateStr = parseFeedDate(pubDate) + if pubDateStr: + postFilename = '' + votesStatus = [] + addNewswireDictEntry(baseDir, domain, + result, pubDateStr, + title, link, + votesStatus, postFilename, + description, moderated, mirrored) + postCtr += 1 + if postCtr >= maxPostsPerSource: + break + if postCtr > 0: + print('Added ' + str(postCtr) + ' rss 1.0 feed items to newswire') + return result + + def atomFeedToDict(baseDir: str, domain: str, xmlStr: str, moderated: bool, mirrored: bool, maxPostsPerSource: int, @@ -489,6 +569,11 @@ def xmlStrToDict(baseDir: str, domain: str, xmlStr: str, xmlStr, moderated, mirrored, maxPostsPerSource, maxFeedItemSizeKb, maxCategoriesFeedItemSizeKb) + elif '' in xmlStr: + return xml1StrToDict(baseDir, domain, + xmlStr, moderated, mirrored, + maxPostsPerSource, maxFeedItemSizeKb, + maxCategoriesFeedItemSizeKb) elif 'xmlns="http://www.w3.org/2005/Atom"' in xmlStr: return atomFeedToDict(baseDir, domain, xmlStr, moderated, mirrored,