diff --git a/newswire.py b/newswire.py
index 11867ba2d..3d06bea12 100644
--- a/newswire.py
+++ b/newswire.py
@@ -10,6 +10,7 @@ __module_group__ = "Web Interface Columns"
import os
import json
import requests
+import random
from socket import error as SocketError
import errno
from datetime import datetime
@@ -268,10 +269,21 @@ def _valid_feed_date(pub_date: str, debug: bool = False) -> bool:
return valid_post_date(post_date, 90, debug)
-def parse_feed_date(pub_date: str) -> str:
+def parse_feed_date(pub_date: str, unique_string_identifier: str) -> str:
"""Returns a UTC date string based on the given date string
This tries a number of formats to see which work
"""
+
+ if ':00:00' in pub_date:
+ # If this was published exactly on the hour then assign a
+ # random minute and second to make this item relatively unique
+ randgen = random.Random(unique_string_identifier)
+ rand_min = randgen.randint(0, 59)
+ rand_sec = randgen.randint(0, 59)
+ replace_time_str = \
+ ':' + str(rand_min).zfill(2) + ':' + str(rand_sec).zfill(2)
+ pub_date = pub_date.replace(':00:00', replace_time_str)
+
formats = ("%a, %d %b %Y %H:%M:%S %z",
"%a, %d %b %Y %H:%M:%S Z",
"%a, %d %b %Y %H:%M:%S GMT",
@@ -668,7 +680,8 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
pub_date = rss_item.split('')[1]
pub_date = pub_date.split('')[0]
- pub_date_str = parse_feed_date(pub_date)
+ unique_string_identifier = title + ' ' + link
+ pub_date_str = parse_feed_date(pub_date, unique_string_identifier)
if pub_date_str:
if _valid_feed_date(pub_date_str):
post_filename = ''
@@ -763,7 +776,8 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
pub_date = rss_item.split('')[1]
pub_date = pub_date.split('')[0]
- pub_date_str = parse_feed_date(pub_date)
+ unique_string_identifier = title + ' ' + link
+ pub_date_str = parse_feed_date(pub_date, unique_string_identifier)
if pub_date_str:
if _valid_feed_date(pub_date_str):
post_filename = ''
@@ -846,7 +860,8 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
pub_date = atom_item.split('')[1]
pub_date = pub_date.split('')[0]
- pub_date_str = parse_feed_date(pub_date)
+ unique_string_identifier = title + ' ' + link
+ pub_date_str = parse_feed_date(pub_date, unique_string_identifier)
if pub_date_str:
if _valid_feed_date(pub_date_str):
post_filename = ''
@@ -961,7 +976,8 @@ def _json_feed_v1to_dict(base_dir: str, domain: str, xml_str: str,
continue
pub_date = json_feed_item['date_modified']
- pub_date_str = parse_feed_date(pub_date)
+ unique_string_identifier = title + ' ' + link
+ pub_date_str = parse_feed_date(pub_date, unique_string_identifier)
if pub_date_str:
if _valid_feed_date(pub_date_str):
post_filename = ''
@@ -1045,7 +1061,8 @@ def _atom_feed_yt_to_dict(base_dir: str, domain: str, xml_str: str,
pub_date = atom_item.split('')[1]
pub_date = pub_date.split('')[0]
- pub_date_str = parse_feed_date(pub_date)
+ unique_string_identifier = title + ' ' + link
+ pub_date_str = parse_feed_date(pub_date, unique_string_identifier)
if pub_date_str:
if _valid_feed_date(pub_date_str):
post_filename = ''
diff --git a/tests.py b/tests.py
index 635ef73cc..3df4f5c56 100644
--- a/tests.py
+++ b/tests.py
@@ -4233,25 +4233,32 @@ def _test_first_paragraph_from_string():
assert result_str == test_str
-def _test_parse_feed_date():
+def _test_parse_newswire_feed_date():
print('test_parse_feed_date')
+ unique_string_identifier = 'some string abcd'
+
pub_date = "2020-12-14T00:08:06+00:00"
- published_date = parse_feed_date(pub_date)
+ published_date = parse_feed_date(pub_date, unique_string_identifier)
assert published_date == "2020-12-14 00:08:06+00:00"
pub_date = "Tue, 08 Dec 2020 06:24:38 -0600"
- published_date = parse_feed_date(pub_date)
+ published_date = parse_feed_date(pub_date, unique_string_identifier)
assert published_date == "2020-12-08 12:24:38+00:00"
pub_date = "2020-08-27T16:12:34+00:00"
- published_date = parse_feed_date(pub_date)
+ published_date = parse_feed_date(pub_date, unique_string_identifier)
assert published_date == "2020-08-27 16:12:34+00:00"
pub_date = "Sun, 22 Nov 2020 19:51:33 +0100"
- published_date = parse_feed_date(pub_date)
+ published_date = parse_feed_date(pub_date, unique_string_identifier)
assert published_date == "2020-11-22 18:51:33+00:00"
+ pub_date = "Sun, 22 Nov 2020 00:00:00 +0000"
+ published_date = parse_feed_date(pub_date, unique_string_identifier)
+ assert published_date != "2020-11-22 00:00:00+00:00"
+ assert "2020-11-22 00:" in published_date
+
def _test_valid_nick():
print('test_valid_nickname')
@@ -6576,7 +6583,7 @@ def run_all_tests():
_test_mentioned_people(base_dir)
_test_guess_tag_category()
_test_valid_nick()
- _test_parse_feed_date()
+ _test_parse_newswire_feed_date()
_test_first_paragraph_from_string()
_test_newswire_tags()
_test_hashtag_rules()