mirror of https://gitlab.com/bashrc2/epicyon
Validate newswire item dates
So they can't be in the future or too far in the pastmain
parent
31a48db4a4
commit
23301b35a5
24
inbox.py
24
inbox.py
|
@ -10,6 +10,7 @@ import json
|
|||
import os
|
||||
import datetime
|
||||
import time
|
||||
from utils import validPostDate
|
||||
from utils import getFullDomain
|
||||
from utils import isEventPost
|
||||
from utils import removeIdEnding
|
||||
|
@ -71,29 +72,6 @@ from delete import removeOldHashtags
|
|||
from follow import isFollowingActor
|
||||
|
||||
|
||||
def validPostDate(published: str) -> bool:
|
||||
"""Returns true if the published date is recent and is not in the future
|
||||
"""
|
||||
baselineTime = datetime.datetime(1970, 1, 1)
|
||||
|
||||
daysDiff = datetime.datetime.utcnow() - baselineTime
|
||||
nowDaysSinceEpoch = daysDiff.days
|
||||
|
||||
postTimeObject = \
|
||||
datetime.datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
|
||||
daysDiff = postTimeObject - baselineTime
|
||||
postDaysSinceEpoch = daysDiff.days
|
||||
|
||||
if postDaysSinceEpoch > nowDaysSinceEpoch:
|
||||
print("Inbox post has a published date in the future!")
|
||||
return False
|
||||
|
||||
if nowDaysSinceEpoch - postDaysSinceEpoch > 3:
|
||||
print("Inbox post is not recent enough")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def guessHashtagCategory(tagName: str, hashtagCategories: {}) -> str:
|
||||
"""Tries to guess a category for the given hashtag.
|
||||
This works by trying to find the longest similar hashtag
|
||||
|
|
92
newswire.py
92
newswire.py
|
@ -14,6 +14,7 @@ from datetime import datetime
|
|||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
from collections import OrderedDict
|
||||
from utils import validPostDate
|
||||
from utils import setHashtagCategory
|
||||
from utils import firstParagraphFromString
|
||||
from utils import isPublicPost
|
||||
|
@ -142,6 +143,13 @@ def addNewswireDictEntry(baseDir: str, domain: str,
|
|||
]
|
||||
|
||||
|
||||
def validFeedDate(pubDate: str) -> bool:
|
||||
# convert from YY-MM-DD HH:MM:SS+00:00 to
|
||||
# YY-MM-DDTHH:MM:SSZ
|
||||
postDate = pubDate.replace(' ', 'T').replace('+00:00', 'Z')
|
||||
return validPostDate(postDate, 30)
|
||||
|
||||
|
||||
def parseFeedDate(pubDate: str) -> str:
|
||||
"""Returns a UTC date string based on the given date string
|
||||
This tries a number of formats to see which work
|
||||
|
@ -317,16 +325,17 @@ def xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
|
|||
|
||||
pubDateStr = parseFeedDate(pubDate)
|
||||
if pubDateStr:
|
||||
postFilename = ''
|
||||
votesStatus = []
|
||||
addNewswireDictEntry(baseDir, domain,
|
||||
result, pubDateStr,
|
||||
title, link,
|
||||
votesStatus, postFilename,
|
||||
description, moderated, mirrored)
|
||||
postCtr += 1
|
||||
if postCtr >= maxPostsPerSource:
|
||||
break
|
||||
if validFeedDate(pubDateStr):
|
||||
postFilename = ''
|
||||
votesStatus = []
|
||||
addNewswireDictEntry(baseDir, domain,
|
||||
result, pubDateStr,
|
||||
title, link,
|
||||
votesStatus, postFilename,
|
||||
description, moderated, mirrored)
|
||||
postCtr += 1
|
||||
if postCtr >= maxPostsPerSource:
|
||||
break
|
||||
if postCtr > 0:
|
||||
print('Added ' + str(postCtr) + ' rss 2.0 feed items to newswire')
|
||||
return result
|
||||
|
@ -400,16 +409,17 @@ def xml1StrToDict(baseDir: str, domain: str, xmlStr: str,
|
|||
|
||||
pubDateStr = parseFeedDate(pubDate)
|
||||
if pubDateStr:
|
||||
postFilename = ''
|
||||
votesStatus = []
|
||||
addNewswireDictEntry(baseDir, domain,
|
||||
result, pubDateStr,
|
||||
title, link,
|
||||
votesStatus, postFilename,
|
||||
description, moderated, mirrored)
|
||||
postCtr += 1
|
||||
if postCtr >= maxPostsPerSource:
|
||||
break
|
||||
if validFeedDate(pubDateStr):
|
||||
postFilename = ''
|
||||
votesStatus = []
|
||||
addNewswireDictEntry(baseDir, domain,
|
||||
result, pubDateStr,
|
||||
title, link,
|
||||
votesStatus, postFilename,
|
||||
description, moderated, mirrored)
|
||||
postCtr += 1
|
||||
if postCtr >= maxPostsPerSource:
|
||||
break
|
||||
if postCtr > 0:
|
||||
print('Added ' + str(postCtr) + ' rss 1.0 feed items to newswire')
|
||||
return result
|
||||
|
@ -471,16 +481,17 @@ def atomFeedToDict(baseDir: str, domain: str, xmlStr: str,
|
|||
|
||||
pubDateStr = parseFeedDate(pubDate)
|
||||
if pubDateStr:
|
||||
postFilename = ''
|
||||
votesStatus = []
|
||||
addNewswireDictEntry(baseDir, domain,
|
||||
result, pubDateStr,
|
||||
title, link,
|
||||
votesStatus, postFilename,
|
||||
description, moderated, mirrored)
|
||||
postCtr += 1
|
||||
if postCtr >= maxPostsPerSource:
|
||||
break
|
||||
if validFeedDate(pubDateStr):
|
||||
postFilename = ''
|
||||
votesStatus = []
|
||||
addNewswireDictEntry(baseDir, domain,
|
||||
result, pubDateStr,
|
||||
title, link,
|
||||
votesStatus, postFilename,
|
||||
description, moderated, mirrored)
|
||||
postCtr += 1
|
||||
if postCtr >= maxPostsPerSource:
|
||||
break
|
||||
if postCtr > 0:
|
||||
print('Added ' + str(postCtr) + ' atom feed items to newswire')
|
||||
return result
|
||||
|
@ -540,16 +551,17 @@ def atomFeedYTToDict(baseDir: str, domain: str, xmlStr: str,
|
|||
|
||||
pubDateStr = parseFeedDate(pubDate)
|
||||
if pubDateStr:
|
||||
postFilename = ''
|
||||
votesStatus = []
|
||||
addNewswireDictEntry(baseDir, domain,
|
||||
result, pubDateStr,
|
||||
title, link,
|
||||
votesStatus, postFilename,
|
||||
description, moderated, mirrored)
|
||||
postCtr += 1
|
||||
if postCtr >= maxPostsPerSource:
|
||||
break
|
||||
if validFeedDate(pubDateStr):
|
||||
postFilename = ''
|
||||
votesStatus = []
|
||||
addNewswireDictEntry(baseDir, domain,
|
||||
result, pubDateStr,
|
||||
title, link,
|
||||
votesStatus, postFilename,
|
||||
description, moderated, mirrored)
|
||||
postCtr += 1
|
||||
if postCtr >= maxPostsPerSource:
|
||||
break
|
||||
if postCtr > 0:
|
||||
print('Added ' + str(postCtr) + ' YouTube feed items to newswire')
|
||||
return result
|
||||
|
|
23
utils.py
23
utils.py
|
@ -19,6 +19,29 @@ from calendar import monthrange
|
|||
from followingCalendar import addPersonToCalendar
|
||||
|
||||
|
||||
def validPostDate(published: str, maxAgeDays=7) -> bool:
|
||||
"""Returns true if the published date is recent and is not in the future
|
||||
"""
|
||||
baselineTime = datetime.datetime(1970, 1, 1)
|
||||
|
||||
daysDiff = datetime.datetime.utcnow() - baselineTime
|
||||
nowDaysSinceEpoch = daysDiff.days
|
||||
|
||||
postTimeObject = \
|
||||
datetime.datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
|
||||
daysDiff = postTimeObject - baselineTime
|
||||
postDaysSinceEpoch = daysDiff.days
|
||||
|
||||
if postDaysSinceEpoch > nowDaysSinceEpoch:
|
||||
print("Inbox post has a published date in the future!")
|
||||
return False
|
||||
|
||||
if nowDaysSinceEpoch - postDaysSinceEpoch >= maxAgeDays:
|
||||
print("Inbox post is not recent enough")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def getFullDomain(domain: str, port: int) -> str:
|
||||
"""Returns the full domain name, including port number
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue