From fb29da5f7a2379b3391aa4785aa9cb9c05184de9 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 22 Nov 2020 19:01:18 +0000 Subject: [PATCH] Date parser returns string --- newswire.py | 34 ++++++++++++++++------------------ tests.py | 6 +++++- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/newswire.py b/newswire.py index b9676af8b..825009eb1 100644 --- a/newswire.py +++ b/newswire.py @@ -136,8 +136,8 @@ def addNewswireDictEntry(baseDir: str, domain: str, ] -def parseFeedDate(pubDate: str): - """Returns a date object based on the given date string +def parseFeedDate(pubDate: str) -> str: + """Returns a UTC date string based on the given date string This tries a number of formats to see which work """ formats = ("%a, %d %b %Y %H:%M:%S %z", @@ -186,7 +186,14 @@ def parseFeedDate(pubDate: str): hoursAdded = timedelta(hours=5) publishedDate = publishedDate + hoursAdded break - return publishedDate + + pubDateStr = None + if publishedDate: + pubDateStr = str(publishedDate) + if not pubDateStr.endswith('+00:00'): + pubDateStr += '+00:00' + + return pubDateStr def xml2StrToDict(baseDir: str, domain: str, xmlStr: str, @@ -241,11 +248,8 @@ def xml2StrToDict(baseDir: str, domain: str, xmlStr: str, pubDate = rssItem.split('')[1] pubDate = pubDate.split('')[0] - publishedDate = parseFeedDate(pubDate) - if publishedDate: - pubDateStr = str(publishedDate) - if not pubDateStr.endswith('+00:00'): - pubDateStr += '+00:00' + pubDateStr = parseFeedDate(pubDate) + if pubDateStr: postFilename = '' votesStatus = [] addNewswireDictEntry(baseDir, domain, @@ -311,11 +315,8 @@ def atomFeedToDict(baseDir: str, domain: str, xmlStr: str, pubDate = atomItem.split('')[1] pubDate = pubDate.split('')[0] - publishedDate = parseFeedDate(pubDate) - if publishedDate: - pubDateStr = str(publishedDate) - if not pubDateStr.endswith('+00:00'): - pubDateStr += '+00:00' + pubDateStr = parseFeedDate(pubDate) + if pubDateStr: postFilename = '' votesStatus = [] addNewswireDictEntry(baseDir, domain, @@ -378,11 +379,8 @@ def atomFeedYTToDict(baseDir: str, domain: str, xmlStr: str, pubDate = atomItem.split('')[1] pubDate = pubDate.split('')[0] - publishedDate = parseFeedDate(pubDate) - if publishedDate: - pubDateStr = str(publishedDate) - if not pubDateStr.endswith('+00:00'): - pubDateStr += '+00:00' + pubDateStr = parseFeedDate(pubDate) + if pubDateStr: postFilename = '' votesStatus = [] addNewswireDictEntry(baseDir, domain, diff --git a/tests.py b/tests.py index 59c0259e2..23efc6c13 100644 --- a/tests.py +++ b/tests.py @@ -2391,7 +2391,11 @@ def testParseFeedDate(): pubDate = "2020-08-27T16:12:34+00:00" publishedDate = parseFeedDate(pubDate) assert publishedDate - print(str(publishedDate)) + + pubDate = "Sun, 22 Nov 2020 19:51:33 +0100" + publishedDate = parseFeedDate(pubDate) + # print(str(publishedDate)) + assert publishedDate def runAllTests():