mirror of https://gitlab.com/bashrc2/epicyon
Check for edited posts
parent
054f4bde8e
commit
67bc0d61f4
40
content.py
40
content.py
|
@ -24,6 +24,7 @@ from utils import containsPGPPublicKey
|
|||
from utils import acctDir
|
||||
from utils import isfloat
|
||||
from utils import getCurrencies
|
||||
from utils import removeHtml
|
||||
from petnames import getPetName
|
||||
|
||||
|
||||
|
@ -1133,3 +1134,42 @@ def getPriceFromString(priceStr: str) -> (str, str):
|
|||
if isfloat(priceStr):
|
||||
return priceStr, "EUR"
|
||||
return "0.00", "EUR"
|
||||
|
||||
|
||||
def wordsSimilarity(content1: str, content2: str, minWords: int) -> int:
|
||||
"""Returns percentage similarity
|
||||
"""
|
||||
if content1 == content2:
|
||||
return 100
|
||||
content1 = removeHtml(content1).lower()
|
||||
words1 = content1.split(' ')
|
||||
if len(words1) < minWords:
|
||||
return 0
|
||||
content2 = removeHtml(content2).lower()
|
||||
words2 = content2.split(' ')
|
||||
if len(words2) < minWords:
|
||||
return 0
|
||||
|
||||
histogram1 = {}
|
||||
for index in range(1, len(words1)):
|
||||
combinedWords = words1[index-1] + words1[index]
|
||||
if histogram1.get(combinedWords):
|
||||
histogram1[combinedWords] += 1
|
||||
else:
|
||||
histogram1[combinedWords] = 1
|
||||
|
||||
histogram2 = {}
|
||||
for index in range(1, len(words2)):
|
||||
combinedWords = words2[index-1] + words2[index]
|
||||
if histogram2.get(combinedWords):
|
||||
histogram2[combinedWords] += 1
|
||||
else:
|
||||
histogram2[combinedWords] = 1
|
||||
|
||||
diff = 0
|
||||
for combinedWords, hits in histogram1.items():
|
||||
if not histogram2.get(combinedWords):
|
||||
diff += 1
|
||||
else:
|
||||
diff += abs(histogram2[combinedWords] - histogram1[combinedWords])
|
||||
return 100 - int(diff * 100 / len(histogram1.items()))
|
||||
|
|
|
@ -13,23 +13,50 @@ from utils import acctDir
|
|||
from utils import removeIdEnding
|
||||
|
||||
|
||||
def updateConversation(baseDir: str, nickname: str, domain: str,
|
||||
postJsonObject: {}) -> bool:
|
||||
"""Ads a post to a conversation index in the /conversation subdirectory
|
||||
def _getConversationFilename(baseDir: str, nickname: str, domain: str,
|
||||
postJsonObject: {}) -> str:
|
||||
"""Returns the conversation filename
|
||||
"""
|
||||
if not hasObjectDict(postJsonObject):
|
||||
return False
|
||||
return None
|
||||
if not postJsonObject['object'].get('conversation'):
|
||||
return False
|
||||
return None
|
||||
if not postJsonObject['object'].get('id'):
|
||||
return False
|
||||
return None
|
||||
conversationDir = acctDir(baseDir, nickname, domain) + '/conversation'
|
||||
if not os.path.isdir(conversationDir):
|
||||
os.mkdir(conversationDir)
|
||||
conversationId = postJsonObject['object']['conversation']
|
||||
conversationId = conversationId.replace('/', '#')
|
||||
return conversationDir + '/' + conversationId
|
||||
|
||||
|
||||
def previousConversationPostId(baseDir: str, nickname: str, domain: str,
|
||||
postJsonObject: {}) -> str:
|
||||
"""Returns the previous conversation post id
|
||||
"""
|
||||
conversationFilename = \
|
||||
_getConversationFilename(baseDir, nickname, domain, postJsonObject)
|
||||
if not conversationFilename:
|
||||
return False
|
||||
if not os.path.isfile(conversationFilename):
|
||||
return False
|
||||
with open(conversationFilename, 'r') as fp:
|
||||
lines = fp.readlines()
|
||||
if lines:
|
||||
return lines[-1].replace('\n', '')
|
||||
return False
|
||||
|
||||
|
||||
def updateConversation(baseDir: str, nickname: str, domain: str,
|
||||
postJsonObject: {}) -> bool:
|
||||
"""Ads a post to a conversation index in the /conversation subdirectory
|
||||
"""
|
||||
conversationFilename = \
|
||||
_getConversationFilename(baseDir, nickname, domain, postJsonObject)
|
||||
if not conversationFilename:
|
||||
return False
|
||||
postId = removeIdEnding(postJsonObject['object']['id'])
|
||||
conversationFilename = conversationDir + '/' + conversationId
|
||||
if not os.path.isfile(conversationFilename):
|
||||
try:
|
||||
with open(conversationFilename, 'w+') as fp:
|
||||
|
|
15
inbox.py
15
inbox.py
|
@ -78,6 +78,7 @@ from utils import isDM
|
|||
from utils import isReply
|
||||
from utils import hasActor
|
||||
from httpsig import messageContentDigest
|
||||
from posts import editedPostFilename
|
||||
from posts import savePostToBox
|
||||
from posts import isCreateInsideAnnounce
|
||||
from posts import createDirectMessagePost
|
||||
|
@ -2836,8 +2837,22 @@ def _inboxAfterInitial(recentPostsCache: {}, maxRecentPosts: int,
|
|||
timeDiff + ' mS')
|
||||
|
||||
handleName = handle.split('@')[0]
|
||||
|
||||
# is this an edit of a previous post?
|
||||
# in Mastodon "delete and redraft"
|
||||
# NOTE: this must be done before updateConversation is called
|
||||
editedFilename = \
|
||||
editedPostFilename(baseDir, handleName, domain,
|
||||
postJsonObject, debug, 300)
|
||||
|
||||
updateConversation(baseDir, handleName, domain, postJsonObject)
|
||||
|
||||
# If this was an edit then delete the previous version of the post
|
||||
if editedFilename:
|
||||
deletePost(baseDir, httpPrefix,
|
||||
nickname, domain, editedFilename,
|
||||
debug, recentPostsCache)
|
||||
|
||||
_inboxUpdateCalendar(baseDir, handle, postJsonObject)
|
||||
|
||||
storeHashTags(baseDir, handleName, postJsonObject)
|
||||
|
|
81
posts.py
81
posts.py
|
@ -70,6 +70,7 @@ from utils import localActorUrl
|
|||
from media import attachMedia
|
||||
from media import replaceYouTube
|
||||
from media import replaceTwitter
|
||||
from content import wordsSimilarity
|
||||
from content import limitRepeatedWords
|
||||
from content import tagExists
|
||||
from content import removeLongWords
|
||||
|
@ -85,6 +86,7 @@ from linked_data_sig import generateJsonSignature
|
|||
from petnames import resolvePetnames
|
||||
from video import convertVideoToNote
|
||||
from context import getIndividualPostContext
|
||||
from conversation import previousConversationPostId
|
||||
|
||||
|
||||
def isModerator(baseDir: str, nickname: str) -> bool:
|
||||
|
@ -4959,3 +4961,82 @@ def c2sBoxJson(baseDir: str, session,
|
|||
print('DEBUG: GET c2sBoxJson success')
|
||||
|
||||
return boxJson
|
||||
|
||||
|
||||
def secondsBetweenPublished(published1: str, published2: str) -> int:
|
||||
"""Returns the number of seconds between two published dates
|
||||
"""
|
||||
try:
|
||||
published1Time = \
|
||||
datetime.datetime.strptime(published1, '%Y-%m-%dT%H:%M:%SZ')
|
||||
except BaseException:
|
||||
return -1
|
||||
try:
|
||||
published2Time = \
|
||||
datetime.datetime.strptime(published2, '%Y-%m-%dT%H:%M:%SZ')
|
||||
except BaseException:
|
||||
return -1
|
||||
return (published2Time - published1Time).seconds
|
||||
|
||||
|
||||
def editedPostFilename(baseDir: str, nickname: str, domain: str,
|
||||
postJsonObject: {}, debug: bool,
|
||||
maxTimeDiffSeconds: int) -> str:
|
||||
"""Returns the filename of the edited post
|
||||
"""
|
||||
if not hasObjectDict(postJsonObject):
|
||||
return ''
|
||||
if not postJsonObject['object'].get('published'):
|
||||
return ''
|
||||
if not postJsonObject['object'].get('id'):
|
||||
return ''
|
||||
if not postJsonObject['object'].get('content'):
|
||||
return ''
|
||||
prevConvPostId = \
|
||||
previousConversationPostId(baseDir, nickname, domain,
|
||||
postJsonObject)
|
||||
if not prevConvPostId:
|
||||
return ''
|
||||
prevConvPostFilename = \
|
||||
locatePost(baseDir, nickname, domain, prevConvPostId, False)
|
||||
if not prevConvPostFilename:
|
||||
return ''
|
||||
prevPostJsonObject = loadJson(prevConvPostFilename, 0)
|
||||
if not prevPostJsonObject:
|
||||
return ''
|
||||
if not hasObjectDict(prevPostJsonObject):
|
||||
return ''
|
||||
if not prevPostJsonObject['object'].get('published'):
|
||||
return ''
|
||||
if not prevPostJsonObject['object'].get('id'):
|
||||
return ''
|
||||
if not prevPostJsonObject['object'].get('content'):
|
||||
return ''
|
||||
if prevPostJsonObject['object']['id'] == postJsonObject['object']['id']:
|
||||
return ''
|
||||
id1 = removeIdEnding(prevPostJsonObject['object']['id'])
|
||||
if '/' not in id1:
|
||||
return ''
|
||||
id2 = removeIdEnding(postJsonObject['object']['id'])
|
||||
if '/' not in id2:
|
||||
return ''
|
||||
ending1 = id1.split('/')[-1]
|
||||
if not ending1:
|
||||
return ''
|
||||
ending2 = id2.split('/')[-1]
|
||||
if not ending2:
|
||||
return ''
|
||||
if id1.replace(ending1, '') != id2.replace(ending2, ''):
|
||||
return ''
|
||||
timeDiffSeconds = \
|
||||
secondsBetweenPublished(prevPostJsonObject['object']['published'],
|
||||
postJsonObject['object']['published'])
|
||||
if timeDiffSeconds > maxTimeDiffSeconds:
|
||||
return ''
|
||||
if debug:
|
||||
print(id2 + ' might be an edit of ' + id1)
|
||||
if wordsSimilarity(prevPostJsonObject['object']['content'],
|
||||
postJsonObject['object']['content'], 10) < 75:
|
||||
return ''
|
||||
print(id2 + ' is an edit of ' + id1)
|
||||
return prevConvPostFilename
|
||||
|
|
31
tests.py
31
tests.py
|
@ -45,6 +45,7 @@ from posts import noOfFollowersOnDomain
|
|||
from posts import groupFollowersByDomain
|
||||
from posts import archivePostsForPerson
|
||||
from posts import sendPostViaServer
|
||||
from posts import secondsBetweenPublished
|
||||
from follow import clearFollows
|
||||
from follow import clearFollowers
|
||||
from follow import sendFollowRequestViaServer
|
||||
|
@ -119,6 +120,7 @@ from inbox import jsonPostAllowsComments
|
|||
from inbox import validInbox
|
||||
from inbox import validInboxFilenames
|
||||
from categories import guessHashtagCategory
|
||||
from content import wordsSimilarity
|
||||
from content import getPriceFromString
|
||||
from content import limitRepeatedWords
|
||||
from content import switchWords
|
||||
|
@ -5716,6 +5718,33 @@ def _testCanReplyTo(baseDir: str) -> None:
|
|||
postJsonObject)
|
||||
|
||||
|
||||
def _testSecondsBetweenPublished() -> None:
|
||||
print('testSecondsBetweenPublished')
|
||||
published1 = "2021-10-14T09:39:27Z"
|
||||
published2 = "2021-10-14T09:41:28Z"
|
||||
|
||||
secondsElapsed = secondsBetweenPublished(published1, published2)
|
||||
assert secondsElapsed == 121
|
||||
# invalid date
|
||||
published2 = "2021-10-14N09:41:28Z"
|
||||
secondsElapsed = secondsBetweenPublished(published1, published2)
|
||||
assert secondsElapsed == -1
|
||||
|
||||
|
||||
def _testWordsSimilarity() -> None:
|
||||
print('testWordsSimilarity')
|
||||
minWords = 10
|
||||
content1 = "This is the same"
|
||||
content2 = "This is the same"
|
||||
assert wordsSimilarity(content1, content2, minWords) == 100
|
||||
content1 = "This is our world now... " + \
|
||||
"the world of the electron and the switch, the beauty of the baud"
|
||||
content2 = "This is our world now. " + \
|
||||
"The world of the electron and the webkit, the beauty of the baud"
|
||||
similarity = wordsSimilarity(content1, content2, minWords)
|
||||
assert similarity > 70
|
||||
|
||||
|
||||
def runAllTests():
|
||||
baseDir = os.getcwd()
|
||||
print('Running tests...')
|
||||
|
@ -5723,6 +5752,8 @@ def runAllTests():
|
|||
_translateOntology(baseDir)
|
||||
_testGetPriceFromString()
|
||||
_testFunctions()
|
||||
_testWordsSimilarity()
|
||||
_testSecondsBetweenPublished()
|
||||
_testSignAndVerify()
|
||||
_testDangerousSVG(baseDir)
|
||||
_testCanReplyTo(baseDir)
|
||||
|
|
Loading…
Reference in New Issue