Handling of dates within feeds

merge-requests/30/head
Bob Mottram 2020-11-22 14:08:29 +00:00
parent 64b4e7fbd8
commit 7be81054ba
1 changed files with 68 additions and 2 deletions

View File

@ -203,6 +203,29 @@ def xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
parsed = True parsed = True
except BaseException: except BaseException:
pass pass
if not parsed:
try:
publishedDate = \
datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S EST")
hoursAdded = datetime.timedelta(hours=5)
publishedDate = publishedDate + hoursAdded
postFilename = ''
votesStatus = []
addNewswireDictEntry(baseDir, domain,
result,
str(publishedDate) + '00:00',
title, link,
votesStatus, postFilename,
description, moderated, mirrored)
postCtr += 1
if postCtr >= maxPostsPerSource:
break
parsed = True
except BaseException:
print('WARN: unrecognized RSS date format: ' + pubDate)
pass
if not parsed: if not parsed:
try: try:
publishedDate = \ publishedDate = \
@ -293,14 +316,15 @@ def atomFeedToDict(baseDir: str, domain: str, xmlStr: str,
parsed = True parsed = True
except BaseException: except BaseException:
pass pass
if not parsed: if not parsed:
try: try:
publishedDate = \ publishedDate = \
datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S UT") datetime.strptime(pubDate, "%Y-%m-%dT%H:%M:%S%z")
postFilename = '' postFilename = ''
votesStatus = [] votesStatus = []
addNewswireDictEntry(baseDir, domain, result, addNewswireDictEntry(baseDir, domain, result,
str(publishedDate) + '+00:00', str(publishedDate),
title, link, title, link,
votesStatus, postFilename, votesStatus, postFilename,
description, moderated, mirrored) description, moderated, mirrored)
@ -311,6 +335,28 @@ def atomFeedToDict(baseDir: str, domain: str, xmlStr: str,
except BaseException: except BaseException:
print('WARN: unrecognized atom feed date format: ' + pubDate) print('WARN: unrecognized atom feed date format: ' + pubDate)
pass pass
if not parsed:
try:
publishedDate = \
datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S EST")
hoursAdded = datetime.timedelta(hours=5)
publishedDate = publishedDate + hoursAdded
postFilename = ''
votesStatus = []
addNewswireDictEntry(baseDir, domain,
result,
str(publishedDate) + '00:00',
title, link,
votesStatus, postFilename,
description, moderated, mirrored)
postCtr += 1
if postCtr >= maxPostsPerSource:
break
parsed = True
except BaseException:
print('WARN: unrecognized RSS date format: ' + pubDate)
pass
return result return result
@ -380,6 +426,26 @@ def atomFeedYTToDict(baseDir: str, domain: str, xmlStr: str,
except BaseException: except BaseException:
print('YouTube feed: failed to parse published date ' + pubDate) print('YouTube feed: failed to parse published date ' + pubDate)
pass pass
if not parsed:
try:
publishedDate = \
datetime.strptime(pubDate, "%Y-%m-%dT%H:%M:%S%z")
postFilename = ''
votesStatus = []
addNewswireDictEntry(baseDir, domain, result,
str(publishedDate),
title, link,
votesStatus, postFilename,
description, moderated, mirrored)
postCtr += 1
if postCtr >= maxPostsPerSource:
break
parsed = True
except BaseException:
print('WARN: unrecognized atom feed date format: ' + pubDate)
pass
if not parsed: if not parsed:
try: try:
publishedDate = \ publishedDate = \