mirror of https://gitlab.com/bashrc2/epicyon
				
				
				
			Validate newswire item dates
So they can't be in the future or too far in the pastmerge-requests/30/head
							parent
							
								
									31a48db4a4
								
							
						
					
					
						commit
						23301b35a5
					
				
							
								
								
									
										24
									
								
								inbox.py
								
								
								
								
							
							
						
						
									
										24
									
								
								inbox.py
								
								
								
								
							| 
						 | 
				
			
			@ -10,6 +10,7 @@ import json
 | 
			
		|||
import os
 | 
			
		||||
import datetime
 | 
			
		||||
import time
 | 
			
		||||
from utils import validPostDate
 | 
			
		||||
from utils import getFullDomain
 | 
			
		||||
from utils import isEventPost
 | 
			
		||||
from utils import removeIdEnding
 | 
			
		||||
| 
						 | 
				
			
			@ -71,29 +72,6 @@ from delete import removeOldHashtags
 | 
			
		|||
from follow import isFollowingActor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def validPostDate(published: str) -> bool:
 | 
			
		||||
    """Returns true if the published date is recent and is not in the future
 | 
			
		||||
    """
 | 
			
		||||
    baselineTime = datetime.datetime(1970, 1, 1)
 | 
			
		||||
 | 
			
		||||
    daysDiff = datetime.datetime.utcnow() - baselineTime
 | 
			
		||||
    nowDaysSinceEpoch = daysDiff.days
 | 
			
		||||
 | 
			
		||||
    postTimeObject = \
 | 
			
		||||
        datetime.datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
 | 
			
		||||
    daysDiff = postTimeObject - baselineTime
 | 
			
		||||
    postDaysSinceEpoch = daysDiff.days
 | 
			
		||||
 | 
			
		||||
    if postDaysSinceEpoch > nowDaysSinceEpoch:
 | 
			
		||||
        print("Inbox post has a published date in the future!")
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    if nowDaysSinceEpoch - postDaysSinceEpoch > 3:
 | 
			
		||||
        print("Inbox post is not recent enough")
 | 
			
		||||
        return False
 | 
			
		||||
    return True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def guessHashtagCategory(tagName: str, hashtagCategories: {}) -> str:
 | 
			
		||||
    """Tries to guess a category for the given hashtag.
 | 
			
		||||
    This works by trying to find the longest similar hashtag
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										92
									
								
								newswire.py
								
								
								
								
							
							
						
						
									
										92
									
								
								newswire.py
								
								
								
								
							| 
						 | 
				
			
			@ -14,6 +14,7 @@ from datetime import datetime
 | 
			
		|||
from datetime import timedelta
 | 
			
		||||
from datetime import timezone
 | 
			
		||||
from collections import OrderedDict
 | 
			
		||||
from utils import validPostDate
 | 
			
		||||
from utils import setHashtagCategory
 | 
			
		||||
from utils import firstParagraphFromString
 | 
			
		||||
from utils import isPublicPost
 | 
			
		||||
| 
						 | 
				
			
			@ -142,6 +143,13 @@ def addNewswireDictEntry(baseDir: str, domain: str,
 | 
			
		|||
    ]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def validFeedDate(pubDate: str) -> bool:
 | 
			
		||||
    # convert from YY-MM-DD HH:MM:SS+00:00 to
 | 
			
		||||
    # YY-MM-DDTHH:MM:SSZ
 | 
			
		||||
    postDate = pubDate.replace(' ', 'T').replace('+00:00', 'Z')
 | 
			
		||||
    return validPostDate(postDate, 30)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def parseFeedDate(pubDate: str) -> str:
 | 
			
		||||
    """Returns a UTC date string based on the given date string
 | 
			
		||||
    This tries a number of formats to see which work
 | 
			
		||||
| 
						 | 
				
			
			@ -317,16 +325,17 @@ def xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
 | 
			
		|||
 | 
			
		||||
        pubDateStr = parseFeedDate(pubDate)
 | 
			
		||||
        if pubDateStr:
 | 
			
		||||
            postFilename = ''
 | 
			
		||||
            votesStatus = []
 | 
			
		||||
            addNewswireDictEntry(baseDir, domain,
 | 
			
		||||
                                 result, pubDateStr,
 | 
			
		||||
                                 title, link,
 | 
			
		||||
                                 votesStatus, postFilename,
 | 
			
		||||
                                 description, moderated, mirrored)
 | 
			
		||||
            postCtr += 1
 | 
			
		||||
            if postCtr >= maxPostsPerSource:
 | 
			
		||||
                break
 | 
			
		||||
            if validFeedDate(pubDateStr):
 | 
			
		||||
                postFilename = ''
 | 
			
		||||
                votesStatus = []
 | 
			
		||||
                addNewswireDictEntry(baseDir, domain,
 | 
			
		||||
                                     result, pubDateStr,
 | 
			
		||||
                                     title, link,
 | 
			
		||||
                                     votesStatus, postFilename,
 | 
			
		||||
                                     description, moderated, mirrored)
 | 
			
		||||
                postCtr += 1
 | 
			
		||||
                if postCtr >= maxPostsPerSource:
 | 
			
		||||
                    break
 | 
			
		||||
    if postCtr > 0:
 | 
			
		||||
        print('Added ' + str(postCtr) + ' rss 2.0 feed items to newswire')
 | 
			
		||||
    return result
 | 
			
		||||
| 
						 | 
				
			
			@ -400,16 +409,17 @@ def xml1StrToDict(baseDir: str, domain: str, xmlStr: str,
 | 
			
		|||
 | 
			
		||||
        pubDateStr = parseFeedDate(pubDate)
 | 
			
		||||
        if pubDateStr:
 | 
			
		||||
            postFilename = ''
 | 
			
		||||
            votesStatus = []
 | 
			
		||||
            addNewswireDictEntry(baseDir, domain,
 | 
			
		||||
                                 result, pubDateStr,
 | 
			
		||||
                                 title, link,
 | 
			
		||||
                                 votesStatus, postFilename,
 | 
			
		||||
                                 description, moderated, mirrored)
 | 
			
		||||
            postCtr += 1
 | 
			
		||||
            if postCtr >= maxPostsPerSource:
 | 
			
		||||
                break
 | 
			
		||||
            if validFeedDate(pubDateStr):
 | 
			
		||||
                postFilename = ''
 | 
			
		||||
                votesStatus = []
 | 
			
		||||
                addNewswireDictEntry(baseDir, domain,
 | 
			
		||||
                                     result, pubDateStr,
 | 
			
		||||
                                     title, link,
 | 
			
		||||
                                     votesStatus, postFilename,
 | 
			
		||||
                                     description, moderated, mirrored)
 | 
			
		||||
                postCtr += 1
 | 
			
		||||
                if postCtr >= maxPostsPerSource:
 | 
			
		||||
                    break
 | 
			
		||||
    if postCtr > 0:
 | 
			
		||||
        print('Added ' + str(postCtr) + ' rss 1.0 feed items to newswire')
 | 
			
		||||
    return result
 | 
			
		||||
| 
						 | 
				
			
			@ -471,16 +481,17 @@ def atomFeedToDict(baseDir: str, domain: str, xmlStr: str,
 | 
			
		|||
 | 
			
		||||
        pubDateStr = parseFeedDate(pubDate)
 | 
			
		||||
        if pubDateStr:
 | 
			
		||||
            postFilename = ''
 | 
			
		||||
            votesStatus = []
 | 
			
		||||
            addNewswireDictEntry(baseDir, domain,
 | 
			
		||||
                                 result, pubDateStr,
 | 
			
		||||
                                 title, link,
 | 
			
		||||
                                 votesStatus, postFilename,
 | 
			
		||||
                                 description, moderated, mirrored)
 | 
			
		||||
            postCtr += 1
 | 
			
		||||
            if postCtr >= maxPostsPerSource:
 | 
			
		||||
                break
 | 
			
		||||
            if validFeedDate(pubDateStr):
 | 
			
		||||
                postFilename = ''
 | 
			
		||||
                votesStatus = []
 | 
			
		||||
                addNewswireDictEntry(baseDir, domain,
 | 
			
		||||
                                     result, pubDateStr,
 | 
			
		||||
                                     title, link,
 | 
			
		||||
                                     votesStatus, postFilename,
 | 
			
		||||
                                     description, moderated, mirrored)
 | 
			
		||||
                postCtr += 1
 | 
			
		||||
                if postCtr >= maxPostsPerSource:
 | 
			
		||||
                    break
 | 
			
		||||
    if postCtr > 0:
 | 
			
		||||
        print('Added ' + str(postCtr) + ' atom feed items to newswire')
 | 
			
		||||
    return result
 | 
			
		||||
| 
						 | 
				
			
			@ -540,16 +551,17 @@ def atomFeedYTToDict(baseDir: str, domain: str, xmlStr: str,
 | 
			
		|||
 | 
			
		||||
        pubDateStr = parseFeedDate(pubDate)
 | 
			
		||||
        if pubDateStr:
 | 
			
		||||
            postFilename = ''
 | 
			
		||||
            votesStatus = []
 | 
			
		||||
            addNewswireDictEntry(baseDir, domain,
 | 
			
		||||
                                 result, pubDateStr,
 | 
			
		||||
                                 title, link,
 | 
			
		||||
                                 votesStatus, postFilename,
 | 
			
		||||
                                 description, moderated, mirrored)
 | 
			
		||||
            postCtr += 1
 | 
			
		||||
            if postCtr >= maxPostsPerSource:
 | 
			
		||||
                break
 | 
			
		||||
            if validFeedDate(pubDateStr):
 | 
			
		||||
                postFilename = ''
 | 
			
		||||
                votesStatus = []
 | 
			
		||||
                addNewswireDictEntry(baseDir, domain,
 | 
			
		||||
                                     result, pubDateStr,
 | 
			
		||||
                                     title, link,
 | 
			
		||||
                                     votesStatus, postFilename,
 | 
			
		||||
                                     description, moderated, mirrored)
 | 
			
		||||
                postCtr += 1
 | 
			
		||||
                if postCtr >= maxPostsPerSource:
 | 
			
		||||
                    break
 | 
			
		||||
    if postCtr > 0:
 | 
			
		||||
        print('Added ' + str(postCtr) + ' YouTube feed items to newswire')
 | 
			
		||||
    return result
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										23
									
								
								utils.py
								
								
								
								
							
							
						
						
									
										23
									
								
								utils.py
								
								
								
								
							| 
						 | 
				
			
			@ -19,6 +19,29 @@ from calendar import monthrange
 | 
			
		|||
from followingCalendar import addPersonToCalendar
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def validPostDate(published: str, maxAgeDays=7) -> bool:
 | 
			
		||||
    """Returns true if the published date is recent and is not in the future
 | 
			
		||||
    """
 | 
			
		||||
    baselineTime = datetime.datetime(1970, 1, 1)
 | 
			
		||||
 | 
			
		||||
    daysDiff = datetime.datetime.utcnow() - baselineTime
 | 
			
		||||
    nowDaysSinceEpoch = daysDiff.days
 | 
			
		||||
 | 
			
		||||
    postTimeObject = \
 | 
			
		||||
        datetime.datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
 | 
			
		||||
    daysDiff = postTimeObject - baselineTime
 | 
			
		||||
    postDaysSinceEpoch = daysDiff.days
 | 
			
		||||
 | 
			
		||||
    if postDaysSinceEpoch > nowDaysSinceEpoch:
 | 
			
		||||
        print("Inbox post has a published date in the future!")
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    if nowDaysSinceEpoch - postDaysSinceEpoch >= maxAgeDays:
 | 
			
		||||
        print("Inbox post is not recent enough")
 | 
			
		||||
        return False
 | 
			
		||||
    return True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def getFullDomain(domain: str, port: int) -> str:
 | 
			
		||||
    """Returns the full domain name, including port number
 | 
			
		||||
    """
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue