Change the way that edited items are compared

2021-10-18 16:20:22 +01:00 · 2021-10-18 16:20:22 +01:00 · acc70ae594
parent a35c809668
commit acc70ae594
4 changed files with 80 additions and 55 deletions
--- a/conversation.py
+++ b/conversation.py
@ -31,23 +31,6 @@ def _getConversationFilename(baseDir: str, nickname: str, domain: str,
    return conversationDir + '/' + conversationId
 def previousConversationPostId(baseDir: str, nickname: str, domain: str,
                               postJsonObject: {}) -> str:
    """Returns the previous conversation post id
    """
    conversationFilename = \
        _getConversationFilename(baseDir, nickname, domain, postJsonObject)
    if not conversationFilename:
        return None
    if not os.path.isfile(conversationFilename):
        return None
    with open(conversationFilename, 'r') as fp:
        lines = fp.readlines()
        if lines:
            return lines[-1].replace('\n', '')
    return None
 def updateConversation(baseDir: str, nickname: str, domain: str,
                       postJsonObject: {}) -> bool:
    """Ads a post to a conversation index in the /conversation subdirectory
--- a/inbox.py
+++ b/inbox.py
@ -107,6 +107,33 @@ from conversation import updateConversation
 from content import validHashTag
 def _storeLastPostId(baseDir: str, nickname: str, domain: str,
                     postJsonObject: {}) -> None:
    """Stores the id of the last post made by an actor
    """
    actor = postId = None
    if hasObjectDict(postJsonObject):
        if postJsonObject['object'].get('attributedTo'):
            if isinstance(postJsonObject['object']['attributedTo'], str):
                actor = postJsonObject['object']['attributedTo']
                postId = removeIdEnding(postJsonObject['object']['id'])
    if not actor:
        actor = postJsonObject['actor']
        postId = postJsonObject['id']
    if not actor:
        return
    lastpostDir = acctDir(baseDir, nickname, domain) + '/lastpost'
    if not os.path.isdir(lastpostDir):
        os.mkdir(lastpostDir)
    actorFilename = lastpostDir + '/' + actor.replace('/', '#')
    try:
        with open(actorFilename, 'w+') as fp:
            fp.write(postId)
    except BaseException:
        print('Unable to write last post id to ' + actorFilename)
        pass
 def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None:
    """Extracts hashtags from an incoming post and updates the
    relevant tags files.
@ -2889,6 +2916,9 @@ def _inboxAfterInitial(recentPostsCache: {}, maxRecentPosts: int,
                           nickname, domain, editedFilename,
                           debug, recentPostsCache)
            # store the id of the last post made by this actor
            _storeLastPostId(baseDir, nickname, domain, postJsonObject)
            _inboxUpdateCalendar(baseDir, handle, postJsonObject)
            storeHashTags(baseDir, handleName, postJsonObject)
--- a/posts.py
+++ b/posts.py
@ -86,7 +86,6 @@ from linked_data_sig import generateJsonSignature
 from petnames import resolvePetnames
 from video import convertVideoToNote
 from context import getIndividualPostContext
 from conversation import previousConversationPostId
 def isModerator(baseDir: str, nickname: str) -> bool:
@ -5002,54 +5001,56 @@ def editedPostFilename(baseDir: str, nickname: str, domain: str,
        return ''
    if not postJsonObject['object'].get('content'):
        return ''
-    prevConvPostId = \
+    if not postJsonObject['object'].get('attributedTo'):
        previousConversationPostId(baseDir, nickname, domain,
                                   postJsonObject)
    if not prevConvPostId:
        return ''
-    prevConvPostFilename = \
+    if not isinstance(postJsonObject['object']['attributedTo'], str):
        locatePost(baseDir, nickname, domain, prevConvPostId, False)
    if not prevConvPostFilename:
        return ''
-    prevPostJsonObject = loadJson(prevConvPostFilename, 0)
+    actor = postJsonObject['object']['attributedTo']
-    if not prevPostJsonObject:
+    actorFilename = \
        acctDir(baseDir, nickname, domain) + '/lastpost/' + \
        actor.replace('/', '#')
    if not os.path.isfile(actorFilename):
        return ''
-    if not hasObjectDict(prevPostJsonObject):
+    postId = removeIdEnding(postJsonObject['object']['id'])
    lastpostId = None
    try:
        with open(actorFilename, 'r') as fp:
            lastpostId = fp.read()
    except BaseException:
        return ''
-    if not prevPostJsonObject['object'].get('published'):
+    if not lastpostId:
        return ''
-    if not prevPostJsonObject['object'].get('id'):
+    if lastpostId == postId:
        return ''
-    if not prevPostJsonObject['object'].get('content'):
+    lastpostFilename = \
        locatePost(baseDir, nickname, domain, lastpostId, False)
    if not lastpostFilename:
        return ''
-    if prevPostJsonObject['object']['id'] == postJsonObject['object']['id']:
+    lastpostJson = loadJson(lastpostFilename, 0)
    if not lastpostJson:
        return ''
-    id1 = removeIdEnding(prevPostJsonObject['object']['id'])
+    if not lastpostJson['object'].get('published'):
    if '/' not in id1:
        return ''
-    id2 = removeIdEnding(postJsonObject['object']['id'])
+    if not lastpostJson['object'].get('id'):
    if '/' not in id2:
        return ''
-    ending1 = id1.split('/')[-1]
+    if not lastpostJson['object'].get('content'):
    if not ending1:
        return ''
-    ending2 = id2.split('/')[-1]
+    if not lastpostJson['object'].get('attributedTo'):
    if not ending2:
        return ''
-    if id1.replace(ending1, '') != id2.replace(ending2, ''):
+    if not isinstance(lastpostJson['object']['attributedTo'], str):
        return ''
    timeDiffSeconds = \
-        secondsBetweenPublished(prevPostJsonObject['object']['published'],
+        secondsBetweenPublished(lastpostJson['object']['published'],
                                postJsonObject['object']['published'])
    if timeDiffSeconds > maxTimeDiffSeconds:
        return ''
    if debug:
-        print(id2 + ' might be an edit of ' + id1)
+        print(postId + ' might be an edit of ' + lastpostId)
-    if wordsSimilarity(prevPostJsonObject['object']['content'],
+    if wordsSimilarity(lastpostJson['object']['content'],
                       postJsonObject['object']['content'], 10) < 70:
        return ''
-    print(id2 + ' is an edit of ' + id1)
+    print(postId + ' is an edit of ' + lastpostId)
-    return prevConvPostFilename
+    return lastpostFilename
 def getOriginalPostFromAnnounceUrl(announceUrl: str, baseDir: str,
--- a/tests.py
+++ b/tests.py
@ -1976,6 +1976,18 @@ def testSharedItemsFederation(baseDir: str) -> None:
    assert 'DFC:supplies' in catalogJson
    assert len(catalogJson.get('DFC:supplies')) == 3
    # queue item removed
    ctr = 0
    while len([name for name in os.listdir(queuePath)
               if os.path.isfile(os.path.join(queuePath, name))]) > 0:
        ctr += 1
        if ctr > 10:
            break
        time.sleep(1)
 #    assert len([name for name in os.listdir(queuePath)
 #                if os.path.isfile(os.path.join(queuePath, name))]) == 0
    # stop the servers
    thrAlice.kill()
    thrAlice.join()
@ -1985,11 +1997,6 @@ def testSharedItemsFederation(baseDir: str) -> None:
    thrBob.join()
    assert thrBob.is_alive() is False
    # queue item removed
    time.sleep(4)
    assert len([name for name in os.listdir(queuePath)
                if os.path.isfile(os.path.join(queuePath, name))]) == 0
    os.chdir(baseDir)
    shutil.rmtree(baseDir + '/.tests')
    print('Testing federation of shared items between ' +
@ -5743,16 +5750,20 @@ def _testWordsSimilarity() -> None:
        "The world of the electron and the webkit, the beauty of the baud"
    similarity = wordsSimilarity(content1, content2, minWords)
    assert similarity > 70
-    content1 = "<p>We&apos;re growing! </p><p>A new writer and developer is joining TuxPhones. You probably know him already from his open-source work - but let&apos;s not spoil too much \ud83d\udd2e</p>"
+    content1 = "<p>We&apos;re growing! </p><p>A new denizen " + \
-    content2 = "<p>We&apos;re growing! </p><p>A new writer and developer is joining TuxPhones. You probably know them already from their open-source work - but let&apos;s not spoil too much \ud83d\udd2e</p>"
+        "is frequenting HackBucket. You probably know him already " + \
        "from his epic typos - but let&apos;s not spoil too much " + \
        "\ud83d\udd2e</p>"
    content2 = "<p>We&apos;re growing! </p><p>A new denizen " + \
        "is frequenting HackBucket. You probably know them already " + \
        "from their epic typos - but let&apos;s not spoil too much " + \
        "\ud83d\udd2e</p>"
    similarity = wordsSimilarity(content1, content2, minWords)
    assert similarity > 85
 def runAllTests():
    baseDir = os.getcwd()
    _testWordsSimilarity()
    return
    print('Running tests...')
    updateDefaultThemesList(os.getcwd())
    _translateOntology(baseDir)