Handle fractional seconds within post published date

main
Bob Mottram 2022-03-30 19:13:40 +01:00
parent 8befe357ab
commit b6c2067c71
3 changed files with 29 additions and 3 deletions

View File

@ -2306,11 +2306,16 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
if not message_json['object'].get('published'): if not message_json['object'].get('published'):
return False return False
if 'T' not in message_json['object']['published']: published = message_json['object']['published']
if 'T' not in published:
return False return False
if 'Z' not in message_json['object']['published']: if 'Z' not in published:
return False return False
if not valid_post_date(message_json['object']['published'], 90, debug): if '.' in published:
# converts 2022-03-30T17:37:58.734Z into 2022-03-30T17:37:58Z
published = published.split('.')[0] + 'Z'
message_json['object']['published'] = published
if not valid_post_date(published, 90, debug):
return False return False
summary = None summary = None

View File

@ -268,6 +268,14 @@ def _valid_feed_date(pub_date: str, debug: bool = False) -> bool:
# convert from YY-MM-DD HH:MM:SS+00:00 to # convert from YY-MM-DD HH:MM:SS+00:00 to
# YY-MM-DDTHH:MM:SSZ # YY-MM-DDTHH:MM:SSZ
post_date = pub_date.replace(' ', 'T').replace('+00:00', 'Z') post_date = pub_date.replace(' ', 'T').replace('+00:00', 'Z')
if '.' in post_date:
ending = post_date.split('.')[1]
timezone_str = ''
for ending_char in ending:
if not ending_char.isdigit():
timezone_str += ending_char
if timezone_str:
post_date = post_date.split('.')[0] + timezone_str
return valid_post_date(post_date, 90, debug) return valid_post_date(post_date, 90, debug)
@ -320,6 +328,15 @@ def parse_feed_date(pub_date: str, unique_string_identifier: str) -> str:
if 'UT' in pub_date and 'UT' not in date_format: if 'UT' in pub_date and 'UT' not in date_format:
continue continue
# remove any fraction of a second
if '.' in pub_date:
ending = pub_date.split('.')[1]
timezone_str = ''
for ending_char in ending:
if not ending_char.isdigit():
timezone_str += ending_char
if timezone_str:
pub_date = pub_date.split('.')[0] + timezone_str
try: try:
published_date = datetime.strptime(pub_date, date_format) published_date = datetime.strptime(pub_date, date_format)
except BaseException: except BaseException:

View File

@ -4926,6 +4926,10 @@ def download_announce(session, base_dir: str, http_prefix: str,
base_dir, nickname, domain, post_id, base_dir, nickname, domain, post_id,
recent_posts_cache) recent_posts_cache)
return None return None
if '.' in announced_json['published'] and \
'Z' in announced_json['published']:
announced_json['published'] = \
announced_json['published'].split('.')[0] + 'Z'
if not valid_post_date(announced_json['published'], 90, debug): if not valid_post_date(announced_json['published'], 90, debug):
print('WARN: announced post is not recently published ' + print('WARN: announced post is not recently published ' +
str(announced_json)) str(announced_json))