mirror of https://gitlab.com/bashrc2/epicyon
Check for edited posts
parent
054f4bde8e
commit
67bc0d61f4
40
content.py
40
content.py
|
@ -24,6 +24,7 @@ from utils import containsPGPPublicKey
|
||||||
from utils import acctDir
|
from utils import acctDir
|
||||||
from utils import isfloat
|
from utils import isfloat
|
||||||
from utils import getCurrencies
|
from utils import getCurrencies
|
||||||
|
from utils import removeHtml
|
||||||
from petnames import getPetName
|
from petnames import getPetName
|
||||||
|
|
||||||
|
|
||||||
|
@ -1133,3 +1134,42 @@ def getPriceFromString(priceStr: str) -> (str, str):
|
||||||
if isfloat(priceStr):
|
if isfloat(priceStr):
|
||||||
return priceStr, "EUR"
|
return priceStr, "EUR"
|
||||||
return "0.00", "EUR"
|
return "0.00", "EUR"
|
||||||
|
|
||||||
|
|
||||||
|
def wordsSimilarity(content1: str, content2: str, minWords: int) -> int:
|
||||||
|
"""Returns percentage similarity
|
||||||
|
"""
|
||||||
|
if content1 == content2:
|
||||||
|
return 100
|
||||||
|
content1 = removeHtml(content1).lower()
|
||||||
|
words1 = content1.split(' ')
|
||||||
|
if len(words1) < minWords:
|
||||||
|
return 0
|
||||||
|
content2 = removeHtml(content2).lower()
|
||||||
|
words2 = content2.split(' ')
|
||||||
|
if len(words2) < minWords:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
histogram1 = {}
|
||||||
|
for index in range(1, len(words1)):
|
||||||
|
combinedWords = words1[index-1] + words1[index]
|
||||||
|
if histogram1.get(combinedWords):
|
||||||
|
histogram1[combinedWords] += 1
|
||||||
|
else:
|
||||||
|
histogram1[combinedWords] = 1
|
||||||
|
|
||||||
|
histogram2 = {}
|
||||||
|
for index in range(1, len(words2)):
|
||||||
|
combinedWords = words2[index-1] + words2[index]
|
||||||
|
if histogram2.get(combinedWords):
|
||||||
|
histogram2[combinedWords] += 1
|
||||||
|
else:
|
||||||
|
histogram2[combinedWords] = 1
|
||||||
|
|
||||||
|
diff = 0
|
||||||
|
for combinedWords, hits in histogram1.items():
|
||||||
|
if not histogram2.get(combinedWords):
|
||||||
|
diff += 1
|
||||||
|
else:
|
||||||
|
diff += abs(histogram2[combinedWords] - histogram1[combinedWords])
|
||||||
|
return 100 - int(diff * 100 / len(histogram1.items()))
|
||||||
|
|
|
@ -13,23 +13,50 @@ from utils import acctDir
|
||||||
from utils import removeIdEnding
|
from utils import removeIdEnding
|
||||||
|
|
||||||
|
|
||||||
def updateConversation(baseDir: str, nickname: str, domain: str,
|
def _getConversationFilename(baseDir: str, nickname: str, domain: str,
|
||||||
postJsonObject: {}) -> bool:
|
postJsonObject: {}) -> str:
|
||||||
"""Ads a post to a conversation index in the /conversation subdirectory
|
"""Returns the conversation filename
|
||||||
"""
|
"""
|
||||||
if not hasObjectDict(postJsonObject):
|
if not hasObjectDict(postJsonObject):
|
||||||
return False
|
return None
|
||||||
if not postJsonObject['object'].get('conversation'):
|
if not postJsonObject['object'].get('conversation'):
|
||||||
return False
|
return None
|
||||||
if not postJsonObject['object'].get('id'):
|
if not postJsonObject['object'].get('id'):
|
||||||
return False
|
return None
|
||||||
conversationDir = acctDir(baseDir, nickname, domain) + '/conversation'
|
conversationDir = acctDir(baseDir, nickname, domain) + '/conversation'
|
||||||
if not os.path.isdir(conversationDir):
|
if not os.path.isdir(conversationDir):
|
||||||
os.mkdir(conversationDir)
|
os.mkdir(conversationDir)
|
||||||
conversationId = postJsonObject['object']['conversation']
|
conversationId = postJsonObject['object']['conversation']
|
||||||
conversationId = conversationId.replace('/', '#')
|
conversationId = conversationId.replace('/', '#')
|
||||||
|
return conversationDir + '/' + conversationId
|
||||||
|
|
||||||
|
|
||||||
|
def previousConversationPostId(baseDir: str, nickname: str, domain: str,
|
||||||
|
postJsonObject: {}) -> str:
|
||||||
|
"""Returns the previous conversation post id
|
||||||
|
"""
|
||||||
|
conversationFilename = \
|
||||||
|
_getConversationFilename(baseDir, nickname, domain, postJsonObject)
|
||||||
|
if not conversationFilename:
|
||||||
|
return False
|
||||||
|
if not os.path.isfile(conversationFilename):
|
||||||
|
return False
|
||||||
|
with open(conversationFilename, 'r') as fp:
|
||||||
|
lines = fp.readlines()
|
||||||
|
if lines:
|
||||||
|
return lines[-1].replace('\n', '')
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def updateConversation(baseDir: str, nickname: str, domain: str,
|
||||||
|
postJsonObject: {}) -> bool:
|
||||||
|
"""Ads a post to a conversation index in the /conversation subdirectory
|
||||||
|
"""
|
||||||
|
conversationFilename = \
|
||||||
|
_getConversationFilename(baseDir, nickname, domain, postJsonObject)
|
||||||
|
if not conversationFilename:
|
||||||
|
return False
|
||||||
postId = removeIdEnding(postJsonObject['object']['id'])
|
postId = removeIdEnding(postJsonObject['object']['id'])
|
||||||
conversationFilename = conversationDir + '/' + conversationId
|
|
||||||
if not os.path.isfile(conversationFilename):
|
if not os.path.isfile(conversationFilename):
|
||||||
try:
|
try:
|
||||||
with open(conversationFilename, 'w+') as fp:
|
with open(conversationFilename, 'w+') as fp:
|
||||||
|
|
15
inbox.py
15
inbox.py
|
@ -78,6 +78,7 @@ from utils import isDM
|
||||||
from utils import isReply
|
from utils import isReply
|
||||||
from utils import hasActor
|
from utils import hasActor
|
||||||
from httpsig import messageContentDigest
|
from httpsig import messageContentDigest
|
||||||
|
from posts import editedPostFilename
|
||||||
from posts import savePostToBox
|
from posts import savePostToBox
|
||||||
from posts import isCreateInsideAnnounce
|
from posts import isCreateInsideAnnounce
|
||||||
from posts import createDirectMessagePost
|
from posts import createDirectMessagePost
|
||||||
|
@ -2836,8 +2837,22 @@ def _inboxAfterInitial(recentPostsCache: {}, maxRecentPosts: int,
|
||||||
timeDiff + ' mS')
|
timeDiff + ' mS')
|
||||||
|
|
||||||
handleName = handle.split('@')[0]
|
handleName = handle.split('@')[0]
|
||||||
|
|
||||||
|
# is this an edit of a previous post?
|
||||||
|
# in Mastodon "delete and redraft"
|
||||||
|
# NOTE: this must be done before updateConversation is called
|
||||||
|
editedFilename = \
|
||||||
|
editedPostFilename(baseDir, handleName, domain,
|
||||||
|
postJsonObject, debug, 300)
|
||||||
|
|
||||||
updateConversation(baseDir, handleName, domain, postJsonObject)
|
updateConversation(baseDir, handleName, domain, postJsonObject)
|
||||||
|
|
||||||
|
# If this was an edit then delete the previous version of the post
|
||||||
|
if editedFilename:
|
||||||
|
deletePost(baseDir, httpPrefix,
|
||||||
|
nickname, domain, editedFilename,
|
||||||
|
debug, recentPostsCache)
|
||||||
|
|
||||||
_inboxUpdateCalendar(baseDir, handle, postJsonObject)
|
_inboxUpdateCalendar(baseDir, handle, postJsonObject)
|
||||||
|
|
||||||
storeHashTags(baseDir, handleName, postJsonObject)
|
storeHashTags(baseDir, handleName, postJsonObject)
|
||||||
|
|
81
posts.py
81
posts.py
|
@ -70,6 +70,7 @@ from utils import localActorUrl
|
||||||
from media import attachMedia
|
from media import attachMedia
|
||||||
from media import replaceYouTube
|
from media import replaceYouTube
|
||||||
from media import replaceTwitter
|
from media import replaceTwitter
|
||||||
|
from content import wordsSimilarity
|
||||||
from content import limitRepeatedWords
|
from content import limitRepeatedWords
|
||||||
from content import tagExists
|
from content import tagExists
|
||||||
from content import removeLongWords
|
from content import removeLongWords
|
||||||
|
@ -85,6 +86,7 @@ from linked_data_sig import generateJsonSignature
|
||||||
from petnames import resolvePetnames
|
from petnames import resolvePetnames
|
||||||
from video import convertVideoToNote
|
from video import convertVideoToNote
|
||||||
from context import getIndividualPostContext
|
from context import getIndividualPostContext
|
||||||
|
from conversation import previousConversationPostId
|
||||||
|
|
||||||
|
|
||||||
def isModerator(baseDir: str, nickname: str) -> bool:
|
def isModerator(baseDir: str, nickname: str) -> bool:
|
||||||
|
@ -4959,3 +4961,82 @@ def c2sBoxJson(baseDir: str, session,
|
||||||
print('DEBUG: GET c2sBoxJson success')
|
print('DEBUG: GET c2sBoxJson success')
|
||||||
|
|
||||||
return boxJson
|
return boxJson
|
||||||
|
|
||||||
|
|
||||||
|
def secondsBetweenPublished(published1: str, published2: str) -> int:
|
||||||
|
"""Returns the number of seconds between two published dates
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
published1Time = \
|
||||||
|
datetime.datetime.strptime(published1, '%Y-%m-%dT%H:%M:%SZ')
|
||||||
|
except BaseException:
|
||||||
|
return -1
|
||||||
|
try:
|
||||||
|
published2Time = \
|
||||||
|
datetime.datetime.strptime(published2, '%Y-%m-%dT%H:%M:%SZ')
|
||||||
|
except BaseException:
|
||||||
|
return -1
|
||||||
|
return (published2Time - published1Time).seconds
|
||||||
|
|
||||||
|
|
||||||
|
def editedPostFilename(baseDir: str, nickname: str, domain: str,
|
||||||
|
postJsonObject: {}, debug: bool,
|
||||||
|
maxTimeDiffSeconds: int) -> str:
|
||||||
|
"""Returns the filename of the edited post
|
||||||
|
"""
|
||||||
|
if not hasObjectDict(postJsonObject):
|
||||||
|
return ''
|
||||||
|
if not postJsonObject['object'].get('published'):
|
||||||
|
return ''
|
||||||
|
if not postJsonObject['object'].get('id'):
|
||||||
|
return ''
|
||||||
|
if not postJsonObject['object'].get('content'):
|
||||||
|
return ''
|
||||||
|
prevConvPostId = \
|
||||||
|
previousConversationPostId(baseDir, nickname, domain,
|
||||||
|
postJsonObject)
|
||||||
|
if not prevConvPostId:
|
||||||
|
return ''
|
||||||
|
prevConvPostFilename = \
|
||||||
|
locatePost(baseDir, nickname, domain, prevConvPostId, False)
|
||||||
|
if not prevConvPostFilename:
|
||||||
|
return ''
|
||||||
|
prevPostJsonObject = loadJson(prevConvPostFilename, 0)
|
||||||
|
if not prevPostJsonObject:
|
||||||
|
return ''
|
||||||
|
if not hasObjectDict(prevPostJsonObject):
|
||||||
|
return ''
|
||||||
|
if not prevPostJsonObject['object'].get('published'):
|
||||||
|
return ''
|
||||||
|
if not prevPostJsonObject['object'].get('id'):
|
||||||
|
return ''
|
||||||
|
if not prevPostJsonObject['object'].get('content'):
|
||||||
|
return ''
|
||||||
|
if prevPostJsonObject['object']['id'] == postJsonObject['object']['id']:
|
||||||
|
return ''
|
||||||
|
id1 = removeIdEnding(prevPostJsonObject['object']['id'])
|
||||||
|
if '/' not in id1:
|
||||||
|
return ''
|
||||||
|
id2 = removeIdEnding(postJsonObject['object']['id'])
|
||||||
|
if '/' not in id2:
|
||||||
|
return ''
|
||||||
|
ending1 = id1.split('/')[-1]
|
||||||
|
if not ending1:
|
||||||
|
return ''
|
||||||
|
ending2 = id2.split('/')[-1]
|
||||||
|
if not ending2:
|
||||||
|
return ''
|
||||||
|
if id1.replace(ending1, '') != id2.replace(ending2, ''):
|
||||||
|
return ''
|
||||||
|
timeDiffSeconds = \
|
||||||
|
secondsBetweenPublished(prevPostJsonObject['object']['published'],
|
||||||
|
postJsonObject['object']['published'])
|
||||||
|
if timeDiffSeconds > maxTimeDiffSeconds:
|
||||||
|
return ''
|
||||||
|
if debug:
|
||||||
|
print(id2 + ' might be an edit of ' + id1)
|
||||||
|
if wordsSimilarity(prevPostJsonObject['object']['content'],
|
||||||
|
postJsonObject['object']['content'], 10) < 75:
|
||||||
|
return ''
|
||||||
|
print(id2 + ' is an edit of ' + id1)
|
||||||
|
return prevConvPostFilename
|
||||||
|
|
31
tests.py
31
tests.py
|
@ -45,6 +45,7 @@ from posts import noOfFollowersOnDomain
|
||||||
from posts import groupFollowersByDomain
|
from posts import groupFollowersByDomain
|
||||||
from posts import archivePostsForPerson
|
from posts import archivePostsForPerson
|
||||||
from posts import sendPostViaServer
|
from posts import sendPostViaServer
|
||||||
|
from posts import secondsBetweenPublished
|
||||||
from follow import clearFollows
|
from follow import clearFollows
|
||||||
from follow import clearFollowers
|
from follow import clearFollowers
|
||||||
from follow import sendFollowRequestViaServer
|
from follow import sendFollowRequestViaServer
|
||||||
|
@ -119,6 +120,7 @@ from inbox import jsonPostAllowsComments
|
||||||
from inbox import validInbox
|
from inbox import validInbox
|
||||||
from inbox import validInboxFilenames
|
from inbox import validInboxFilenames
|
||||||
from categories import guessHashtagCategory
|
from categories import guessHashtagCategory
|
||||||
|
from content import wordsSimilarity
|
||||||
from content import getPriceFromString
|
from content import getPriceFromString
|
||||||
from content import limitRepeatedWords
|
from content import limitRepeatedWords
|
||||||
from content import switchWords
|
from content import switchWords
|
||||||
|
@ -5716,6 +5718,33 @@ def _testCanReplyTo(baseDir: str) -> None:
|
||||||
postJsonObject)
|
postJsonObject)
|
||||||
|
|
||||||
|
|
||||||
|
def _testSecondsBetweenPublished() -> None:
|
||||||
|
print('testSecondsBetweenPublished')
|
||||||
|
published1 = "2021-10-14T09:39:27Z"
|
||||||
|
published2 = "2021-10-14T09:41:28Z"
|
||||||
|
|
||||||
|
secondsElapsed = secondsBetweenPublished(published1, published2)
|
||||||
|
assert secondsElapsed == 121
|
||||||
|
# invalid date
|
||||||
|
published2 = "2021-10-14N09:41:28Z"
|
||||||
|
secondsElapsed = secondsBetweenPublished(published1, published2)
|
||||||
|
assert secondsElapsed == -1
|
||||||
|
|
||||||
|
|
||||||
|
def _testWordsSimilarity() -> None:
|
||||||
|
print('testWordsSimilarity')
|
||||||
|
minWords = 10
|
||||||
|
content1 = "This is the same"
|
||||||
|
content2 = "This is the same"
|
||||||
|
assert wordsSimilarity(content1, content2, minWords) == 100
|
||||||
|
content1 = "This is our world now... " + \
|
||||||
|
"the world of the electron and the switch, the beauty of the baud"
|
||||||
|
content2 = "This is our world now. " + \
|
||||||
|
"The world of the electron and the webkit, the beauty of the baud"
|
||||||
|
similarity = wordsSimilarity(content1, content2, minWords)
|
||||||
|
assert similarity > 70
|
||||||
|
|
||||||
|
|
||||||
def runAllTests():
|
def runAllTests():
|
||||||
baseDir = os.getcwd()
|
baseDir = os.getcwd()
|
||||||
print('Running tests...')
|
print('Running tests...')
|
||||||
|
@ -5723,6 +5752,8 @@ def runAllTests():
|
||||||
_translateOntology(baseDir)
|
_translateOntology(baseDir)
|
||||||
_testGetPriceFromString()
|
_testGetPriceFromString()
|
||||||
_testFunctions()
|
_testFunctions()
|
||||||
|
_testWordsSimilarity()
|
||||||
|
_testSecondsBetweenPublished()
|
||||||
_testSignAndVerify()
|
_testSignAndVerify()
|
||||||
_testDangerousSVG(baseDir)
|
_testDangerousSVG(baseDir)
|
||||||
_testCanReplyTo(baseDir)
|
_testCanReplyTo(baseDir)
|
||||||
|
|
Loading…
Reference in New Issue