Support for youtube feeds

main
Bob Mottram 2020-11-22 10:34:42 +00:00
parent fad926c7ec
commit 0b9e4c5350
1 changed files with 87 additions and 0 deletions

View File

@ -302,6 +302,89 @@ def atomFeedToDict(baseDir: str, domain: str, xmlStr: str,
return result return result
def atomFeedYTToDict(baseDir: str, domain: str, xmlStr: str,
moderated: bool, mirrored: bool,
maxPostsPerSource: int,
maxFeedItemSizeKb: int) -> {}:
"""Converts an atom-style YouTube feed string to a dictionary
"""
if '<entry>' not in xmlStr:
return {}
if isBlockedDomain(baseDir, 'www.youtube.com'):
return {}
result = {}
rssItems = xmlStr.split('<entry>')
postCtr = 0
maxBytes = maxFeedItemSizeKb * 1024
for rssItem in rssItems:
if len(rssItem) > maxBytes:
print('WARN: atom feed item is too big')
continue
if '<title>' not in rssItem:
continue
if '</title>' not in rssItem:
continue
if '<updated>' not in rssItem:
continue
if '</updated>' not in rssItem:
continue
if '<yt:videoId>' not in rssItem:
continue
if '</yt:videoId>' not in rssItem:
continue
title = rssItem.split('<title>')[1]
title = title.split('</title>')[0]
description = ''
if '<media:description>' in rssItem and \
'</media:description>' in rssItem:
description = rssItem.split('<media:description>')[1]
description = description.split('</media:description>')[0]
elif '<summary>' in rssItem and '</summary>' in rssItem:
description = rssItem.split('<summary>')[1]
description = description.split('</summary>')[0]
link = rssItem.split('<yt:videoId>')[1]
link = link.split('</yt:videoId>')[0]
link = 'https://www.youtube.com/watch?v=' + link.strip()
pubDate = rssItem.split('<updated>')[1]
pubDate = pubDate.split('</updated>')[0]
parsed = False
try:
publishedDate = \
datetime.strptime(pubDate, "%Y-%m-%dT%H:%M:%SZ")
postFilename = ''
votesStatus = []
addNewswireDictEntry(baseDir, domain,
result, str(publishedDate),
title, link,
votesStatus, postFilename,
description, moderated, mirrored)
postCtr += 1
if postCtr >= maxPostsPerSource:
break
parsed = True
except BaseException:
pass
if not parsed:
try:
publishedDate = \
datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S UT")
postFilename = ''
votesStatus = []
addNewswireDictEntry(baseDir, domain, result,
str(publishedDate) + '+00:00',
title, link,
votesStatus, postFilename,
description, moderated, mirrored)
postCtr += 1
if postCtr >= maxPostsPerSource:
break
parsed = True
except BaseException:
print('WARN: unrecognized atom feed date format: ' + pubDate)
pass
return result
def xmlStrToDict(baseDir: str, domain: str, xmlStr: str, def xmlStrToDict(baseDir: str, domain: str, xmlStr: str,
moderated: bool, mirrored: bool, moderated: bool, mirrored: bool,
maxPostsPerSource: int, maxPostsPerSource: int,
@ -316,6 +399,10 @@ def xmlStrToDict(baseDir: str, domain: str, xmlStr: str,
return atomFeedToDict(baseDir, domain, return atomFeedToDict(baseDir, domain,
xmlStr, moderated, mirrored, xmlStr, moderated, mirrored,
maxPostsPerSource, maxFeedItemSizeKb) maxPostsPerSource, maxFeedItemSizeKb)
elif '<yt:videoId>' in xmlStr and '<yt:channelId>' in xmlStr:
return atomFeedYTToDict(baseDir, domain,
xmlStr, moderated, mirrored,
maxPostsPerSource, maxFeedItemSizeKb)
return {} return {}