diff --git a/newswire.py b/newswire.py index ea4e6397..22beae02 100644 --- a/newswire.py +++ b/newswire.py @@ -302,6 +302,89 @@ def atomFeedToDict(baseDir: str, domain: str, xmlStr: str, return result +def atomFeedYTToDict(baseDir: str, domain: str, xmlStr: str, + moderated: bool, mirrored: bool, + maxPostsPerSource: int, + maxFeedItemSizeKb: int) -> {}: + """Converts an atom-style YouTube feed string to a dictionary + """ + if '' not in xmlStr: + return {} + if isBlockedDomain(baseDir, 'www.youtube.com'): + return {} + result = {} + rssItems = xmlStr.split('') + postCtr = 0 + maxBytes = maxFeedItemSizeKb * 1024 + for rssItem in rssItems: + if len(rssItem) > maxBytes: + print('WARN: atom feed item is too big') + continue + if '' not in rssItem: + continue + if '' not in rssItem: + continue + if '' not in rssItem: + continue + if '' not in rssItem: + continue + if '' not in rssItem: + continue + if '' not in rssItem: + continue + title = rssItem.split('')[1] + title = title.split('')[0] + description = '' + if '' in rssItem and \ + '' in rssItem: + description = rssItem.split('')[1] + description = description.split('')[0] + elif '' in rssItem and '' in rssItem: + description = rssItem.split('')[1] + description = description.split('')[0] + link = rssItem.split('')[1] + link = link.split('')[0] + link = 'https://www.youtube.com/watch?v=' + link.strip() + pubDate = rssItem.split('')[1] + pubDate = pubDate.split('')[0] + parsed = False + try: + publishedDate = \ + datetime.strptime(pubDate, "%Y-%m-%dT%H:%M:%SZ") + postFilename = '' + votesStatus = [] + addNewswireDictEntry(baseDir, domain, + result, str(publishedDate), + title, link, + votesStatus, postFilename, + description, moderated, mirrored) + postCtr += 1 + if postCtr >= maxPostsPerSource: + break + parsed = True + except BaseException: + pass + if not parsed: + try: + publishedDate = \ + datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S UT") + postFilename = '' + votesStatus = [] + addNewswireDictEntry(baseDir, domain, result, + str(publishedDate) + '+00:00', + title, link, + votesStatus, postFilename, + description, moderated, mirrored) + postCtr += 1 + if postCtr >= maxPostsPerSource: + break + parsed = True + except BaseException: + print('WARN: unrecognized atom feed date format: ' + pubDate) + pass + return result + + def xmlStrToDict(baseDir: str, domain: str, xmlStr: str, moderated: bool, mirrored: bool, maxPostsPerSource: int, @@ -316,6 +399,10 @@ def xmlStrToDict(baseDir: str, domain: str, xmlStr: str, return atomFeedToDict(baseDir, domain, xmlStr, moderated, mirrored, maxPostsPerSource, maxFeedItemSizeKb) + elif '' in xmlStr and '' in xmlStr: + return atomFeedYTToDict(baseDir, domain, + xmlStr, moderated, mirrored, + maxPostsPerSource, maxFeedItemSizeKb) return {}