Add hashtag interpreter

main
Bob Mottram 2020-10-17 13:05:41 +01:00
parent 18034d20fe
commit c691cff82d
3 changed files with 238 additions and 0 deletions

View File

@ -353,6 +353,7 @@ def validHashTag(hashtag: str) -> bool:
# long hashtags are not valid
if len(hashtag) >= 32:
return False
# TODO: this may need to be an international character set
validChars = set('0123456789' +
'abcdefghijklmnopqrstuvwxyz' +
'ABCDEFGHIJKLMNOPQRSTUVWXYZ')

View File

@ -15,6 +15,7 @@ from newswire import getDictFromNewswire
from posts import createNewsPost
from content import removeHtmlTag
from content import dangerousMarkup
from content import validHashTag
from utils import loadJson
from utils import saveJson
from utils import getStatusNumber
@ -71,6 +72,97 @@ def removeControlCharacters(content: str) -> str:
return content
def hasttagRuleResolve(tree: [], hashtags: []) -> bool:
"""Returns whether the tree for a hashtag rule evaluates to true or false
"""
if not tree:
return False
if tree[0] == 'not':
if len(tree) == 2:
if isinstance(tree[1], str):
return tree[1] not in hashtags
elif isinstance(tree[1], list):
return not hasttagRuleResolve(tree[1], hashtags)
elif tree[0] == 'and':
if len(tree) == 3:
firstArg = False
if isinstance(tree[1], str):
firstArg = (tree[1] in hashtags)
elif isinstance(tree[1], list):
firstArg = (hasttagRuleResolve(tree[1], hashtags))
secondArg = False
if isinstance(tree[2], str):
secondArg = (tree[2] in hashtags)
elif isinstance(tree[2], list):
secondArg = (hasttagRuleResolve(tree[2], hashtags))
return firstArg and secondArg
elif tree[0] == 'or':
if len(tree) == 3:
firstArg = False
if isinstance(tree[1], str):
firstArg = (tree[1] in hashtags)
elif isinstance(tree[1], list):
firstArg = (hasttagRuleResolve(tree[1], hashtags))
secondArg = False
if isinstance(tree[2], str):
secondArg = (tree[2] in hashtags)
elif isinstance(tree[2], list):
secondArg = (hasttagRuleResolve(tree[2], hashtags))
return firstArg or secondArg
elif tree[0].startswith('#') and len(tree) == 1:
return tree[0] in hashtags
return False
def hashtagRuleTree(operators: [],
conditionsStr: str,
tagsInConditions: []) -> []:
"""Walks the tree
"""
if not operators and conditionsStr:
conditionsStr = conditionsStr.strip()
if conditionsStr.startswith('#') or conditionsStr in operators:
if conditionsStr.startswith('#'):
if conditionsStr not in tagsInConditions:
if ' ' not in conditionsStr:
tagsInConditions.append(conditionsStr)
return [conditionsStr.strip()]
else:
return None
if not operators or not conditionsStr:
return None
tree = None
conditionsStr = conditionsStr.strip()
if conditionsStr.startswith('#') or conditionsStr in operators:
if conditionsStr.startswith('#'):
if conditionsStr not in tagsInConditions:
if ' ' not in conditionsStr:
tagsInConditions.append(conditionsStr)
tree = [conditionsStr.strip()]
ctr = 0
while ctr < len(operators):
op = operators[ctr]
if op not in conditionsStr:
ctr += 1
continue
else:
tree = [op]
sections = conditionsStr.split(op)
for subConditionStr in sections:
result = hashtagRuleTree(operators[ctr + 1:], subConditionStr,
tagsInConditions)
if result:
tree.append(result)
break
return tree
def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {},
hashtags: str, httpPrefix: str,
domain: str, port: int,
@ -82,6 +174,90 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {},
Returns true if the post should be saved to the news timeline
of this instance
"""
rulesFilename = baseDir + '/accounts/hashtagrules.txt'
if not os.path.isfile(rulesFilename):
return True
rules = []
with open(rulesFilename, "r") as f:
rules = f.readlines()
domainFull = domain
if port:
if port != 80 and port != 443:
domainFull = domain + ':' + str(port)
actionOccurred = False
operators = ('not', 'and', 'or')
for ruleStr in rules:
if not ruleStr:
continue
if not ruleStr.startswith('if '):
continue
if ' then ' not in ruleStr:
continue
conditionsStr = ruleStr.split('if ', 1)[1]
conditionsStr = conditionsStr.split(' then ')[0]
tagsInConditions = []
tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions)
# does the rule contain any hashtags?
if not tagsInConditions:
continue
if not hasttagRuleResolve(tree, hashtags):
continue
# the condition matches, so do something
actionStr = ruleStr.split(' then ')[1].strip()
# add a hashtag
if actionStr.startswith('add '):
addHashtag = actionStr.split('add ', 1)[1].strip()
if addHashtag.startswith('#'):
if addHashtag not in hashtags:
hashtags.append(addHashtag)
htId = addHashtag.replace('#', '')
if validHashTag(htId):
hashtagUrl = \
httpPrefix + "://" + domainFull + "/tags/" + htId
postJsonObject['object']['tag'][htId] = {
'href': hashtagUrl,
'name': addHashtag,
'type': 'Hashtag'
}
hashtagHtml = \
"<a href=\"" + hashtagUrl + \
"\" class=\"mention hashtag\" " + \
"rel=\"tag\">#<span>" + \
htId + "</span></a>"
content = postJsonObject['object']['content']
if content.endswith('</p>'):
content = \
content[:len(content) - len('</p>')] + \
hashtagHtml + '</p>'
else:
content += hashtagHtml
postJsonObject['object']['content'] = content
actionOccurred = True
# remove a hashtag
if actionStr.startswith('remove '):
rmHashtag = actionStr.split('remove ', 1)[1].strip()
if rmHashtag.startswith('#'):
if rmHashtag in hashtags:
hashtags.remove(rmHashtag)
htId = addHashtag.replace('#', '')
hashtagUrl = \
httpPrefix + "://" + domainFull + "/tags/" + htId
hashtagHtml = \
"<a href=\"" + hashtagUrl + \
"\" class=\"mention hashtag\" " + \
"rel=\"tag\">#<span>" + \
htId + "</span></a>"
content = postJsonObject['object']['content']
if hashtagHtml in content:
postJsonObject['object']['content'] = \
content.replace(hashtagHtml, '')
del postJsonObject['object']['tag'][htId]
actionOccurred = True
# TODO
# If routing to another instance
# sendSignedJson(postJsonObject: {}, session, baseDir: str,
@ -91,6 +267,8 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {},
# federationList: [],
# sendThreads: [], postLog: [], cachedWebfingers: {},
# personCache: {}, False, __version__) -> int:
if actionOccurred:
return True
return True
@ -210,6 +388,7 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str,
# save the post and update the index
if savePost:
newswire[originalDateStr][6] = hashtags
if saveJson(blog, filename):
updateFeedsOutboxIndex(baseDir, domain, postId + '.json')

View File

@ -82,6 +82,8 @@ from content import removeHtmlTag
from theme import setCSSparam
from jsonldsig import testSignJsonld
from jsonldsig import jsonldVerify
from newsdaemon import hashtagRuleTree
from newsdaemon import hasttagRuleResolve
testServerAliceRunning = False
testServerBobRunning = False
@ -2173,8 +2175,64 @@ def testRemoveHtmlTag():
"src=\"https://somesiteorother.com/image.jpg\"></p>"
def testHashtagRuleTree():
print('testHashtagRuleTree')
operators = ('not', 'and', 'or')
conditionsStr = '#foo or #bar'
tagsInConditions = []
tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions)
assert str(tree) == str(['or', ['#foo'], ['#bar']])
assert str(tagsInConditions) == str(['#foo', '#bar'])
hashtags = ['#foo']
assert hasttagRuleResolve(tree, hashtags)
hashtags = ['#carrot', '#stick']
assert not hasttagRuleResolve(tree, hashtags)
conditionsStr = 'x'
tagsInConditions = []
tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions)
assert tree is None
assert tagsInConditions == []
hashtags = ['#foo']
assert not hasttagRuleResolve(tree, hashtags)
conditionsStr = '#x'
tagsInConditions = []
tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions)
assert str(tree) == str(['#x'])
assert str(tagsInConditions) == str(['#x'])
hashtags = ['#x']
assert hasttagRuleResolve(tree, hashtags)
hashtags = ['#y', '#z']
assert not hasttagRuleResolve(tree, hashtags)
conditionsStr = 'not #b'
tagsInConditions = []
tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions)
assert str(tree) == str(['not', ['#b']])
assert str(tagsInConditions) == str(['#b'])
hashtags = ['#y', '#z']
assert hasttagRuleResolve(tree, hashtags)
hashtags = ['#a', '#b', '#c']
assert not hasttagRuleResolve(tree, hashtags)
conditionsStr = '#foo or #bar and #a'
tagsInConditions = []
tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions)
assert str(tree) == str(['and', ['or', ['#foo'], ['#bar']], ['#a']])
assert str(tagsInConditions) == str(['#foo', '#bar', '#a'])
hashtags = ['#bar', '#a']
assert hasttagRuleResolve(tree, hashtags)
hashtags = ['#foo', '#a']
assert hasttagRuleResolve(tree, hashtags)
hashtags = ['#x', '#a']
assert not hasttagRuleResolve(tree, hashtags)
def runAllTests():
print('Running tests...')
testHashtagRuleTree()
testRemoveHtmlTag()
testReplaceEmailQuote()
testConstantTimeStringCheck()