diff --git a/newswire.py b/newswire.py index f1d33aee..761446f0 100644 --- a/newswire.py +++ b/newswire.py @@ -234,38 +234,38 @@ def atomFeedToDict(baseDir: str, domain: str, xmlStr: str, if '' not in xmlStr: return {} result = {} - rssItems = xmlStr.split('') + atomItems = xmlStr.split('') postCtr = 0 maxBytes = maxFeedItemSizeKb * 1024 - for rssItem in rssItems: - if len(rssItem) > maxBytes: + for atomItem in atomItems: + if len(atomItem) > maxBytes: print('WARN: atom feed item is too big') continue - if '' not in rssItem: + if '<title>' not in atomItem: continue - if '' not in rssItem: + if '' not in atomItem: continue - if '' not in rssItem: + if '' not in atomItem: continue - if '' not in rssItem: + if '' not in atomItem: continue - if '' not in rssItem: + if '' not in atomItem: continue - if '' not in rssItem: + if '' not in atomItem: continue - title = rssItem.split('')[1] + title = atomItem.split('<title>')[1] title = removeCDATA(title.split('')[0]) description = '' - if '' in rssItem and '' in rssItem: - description = rssItem.split('')[1] + if '' in atomItem and '' in atomItem: + description = atomItem.split('')[1] description = removeCDATA(description.split('')[0]) else: - if '' in rssItem and \ - '' in rssItem: - description = rssItem.split('')[1] + if '' in atomItem and \ + '' in atomItem: + description = atomItem.split('')[1] description = description.split('')[0] description = removeCDATA(description) - link = rssItem.split('')[1] + link = atomItem.split('')[1] link = link.split('')[0] if '://' not in link: continue @@ -274,7 +274,7 @@ def atomFeedToDict(baseDir: str, domain: str, xmlStr: str, itemDomain = itemDomain.split('/')[0] if isBlockedDomain(baseDir, itemDomain): continue - pubDate = rssItem.split('')[1] + pubDate = atomItem.split('')[1] pubDate = pubDate.split('')[0] parsed = False try: @@ -325,42 +325,42 @@ def atomFeedYTToDict(baseDir: str, domain: str, xmlStr: str, if isBlockedDomain(baseDir, 'www.youtube.com'): return {} result = {} - rssItems = xmlStr.split('') + atomItems = xmlStr.split('') postCtr = 0 maxBytes = maxFeedItemSizeKb * 1024 - for rssItem in rssItems: - print('YouTube feed item: ' + rssItem) - if len(rssItem) > maxBytes: + for atomItem in atomItems: + print('YouTube feed item: ' + atomItem) + if len(atomItem) > maxBytes: print('WARN: atom feed item is too big') continue - if '' not in rssItem: + if '<title>' not in atomItem: continue - if '' not in rssItem: + if '' not in atomItem: continue - if '' not in rssItem: + if '' not in atomItem: continue - if '' not in rssItem: + if '' not in atomItem: continue - if '' not in rssItem: + if '' not in atomItem: continue - if '' not in rssItem: + if '' not in atomItem: continue - title = rssItem.split('')[1] + title = atomItem.split('<title>')[1] title = removeCDATA(title.split('')[0]) description = '' - if '' in rssItem and \ - '' in rssItem: - description = rssItem.split('')[1] + if '' in atomItem and \ + '' in atomItem: + description = atomItem.split('')[1] description = description.split('')[0] description = removeCDATA(description) - elif '' in rssItem and '' in rssItem: - description = rssItem.split('')[1] + elif '' in atomItem and '' in atomItem: + description = atomItem.split('')[1] description = description.split('')[0] description = removeCDATA(description) - link = rssItem.split('')[1] + link = atomItem.split('')[1] link = link.split('')[0] link = 'https://www.youtube.com/watch?v=' + link.strip() - pubDate = rssItem.split('')[1] + pubDate = atomItem.split('')[1] pubDate = pubDate.split('')[0] parsed = False try: @@ -417,7 +417,7 @@ def xmlStrToDict(baseDir: str, domain: str, xmlStr: str, xmlStr, moderated, mirrored, maxPostsPerSource, maxFeedItemSizeKb) elif '' in xmlStr and '' in xmlStr: - print ('YouTube feed: reading') + print('YouTube feed: reading') return atomFeedYTToDict(baseDir, domain, xmlStr, moderated, mirrored, maxPostsPerSource, maxFeedItemSizeKb)