forked from indymedia/epicyon
Add hashtag interpreter
parent
18034d20fe
commit
c691cff82d
|
@ -353,6 +353,7 @@ def validHashTag(hashtag: str) -> bool:
|
||||||
# long hashtags are not valid
|
# long hashtags are not valid
|
||||||
if len(hashtag) >= 32:
|
if len(hashtag) >= 32:
|
||||||
return False
|
return False
|
||||||
|
# TODO: this may need to be an international character set
|
||||||
validChars = set('0123456789' +
|
validChars = set('0123456789' +
|
||||||
'abcdefghijklmnopqrstuvwxyz' +
|
'abcdefghijklmnopqrstuvwxyz' +
|
||||||
'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
|
'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
|
||||||
|
|
179
newsdaemon.py
179
newsdaemon.py
|
@ -15,6 +15,7 @@ from newswire import getDictFromNewswire
|
||||||
from posts import createNewsPost
|
from posts import createNewsPost
|
||||||
from content import removeHtmlTag
|
from content import removeHtmlTag
|
||||||
from content import dangerousMarkup
|
from content import dangerousMarkup
|
||||||
|
from content import validHashTag
|
||||||
from utils import loadJson
|
from utils import loadJson
|
||||||
from utils import saveJson
|
from utils import saveJson
|
||||||
from utils import getStatusNumber
|
from utils import getStatusNumber
|
||||||
|
@ -71,6 +72,97 @@ def removeControlCharacters(content: str) -> str:
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def hasttagRuleResolve(tree: [], hashtags: []) -> bool:
|
||||||
|
"""Returns whether the tree for a hashtag rule evaluates to true or false
|
||||||
|
"""
|
||||||
|
if not tree:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if tree[0] == 'not':
|
||||||
|
if len(tree) == 2:
|
||||||
|
if isinstance(tree[1], str):
|
||||||
|
return tree[1] not in hashtags
|
||||||
|
elif isinstance(tree[1], list):
|
||||||
|
return not hasttagRuleResolve(tree[1], hashtags)
|
||||||
|
elif tree[0] == 'and':
|
||||||
|
if len(tree) == 3:
|
||||||
|
|
||||||
|
firstArg = False
|
||||||
|
if isinstance(tree[1], str):
|
||||||
|
firstArg = (tree[1] in hashtags)
|
||||||
|
elif isinstance(tree[1], list):
|
||||||
|
firstArg = (hasttagRuleResolve(tree[1], hashtags))
|
||||||
|
|
||||||
|
secondArg = False
|
||||||
|
if isinstance(tree[2], str):
|
||||||
|
secondArg = (tree[2] in hashtags)
|
||||||
|
elif isinstance(tree[2], list):
|
||||||
|
secondArg = (hasttagRuleResolve(tree[2], hashtags))
|
||||||
|
return firstArg and secondArg
|
||||||
|
elif tree[0] == 'or':
|
||||||
|
if len(tree) == 3:
|
||||||
|
|
||||||
|
firstArg = False
|
||||||
|
if isinstance(tree[1], str):
|
||||||
|
firstArg = (tree[1] in hashtags)
|
||||||
|
elif isinstance(tree[1], list):
|
||||||
|
firstArg = (hasttagRuleResolve(tree[1], hashtags))
|
||||||
|
|
||||||
|
secondArg = False
|
||||||
|
if isinstance(tree[2], str):
|
||||||
|
secondArg = (tree[2] in hashtags)
|
||||||
|
elif isinstance(tree[2], list):
|
||||||
|
secondArg = (hasttagRuleResolve(tree[2], hashtags))
|
||||||
|
return firstArg or secondArg
|
||||||
|
elif tree[0].startswith('#') and len(tree) == 1:
|
||||||
|
return tree[0] in hashtags
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def hashtagRuleTree(operators: [],
|
||||||
|
conditionsStr: str,
|
||||||
|
tagsInConditions: []) -> []:
|
||||||
|
"""Walks the tree
|
||||||
|
"""
|
||||||
|
if not operators and conditionsStr:
|
||||||
|
conditionsStr = conditionsStr.strip()
|
||||||
|
if conditionsStr.startswith('#') or conditionsStr in operators:
|
||||||
|
if conditionsStr.startswith('#'):
|
||||||
|
if conditionsStr not in tagsInConditions:
|
||||||
|
if ' ' not in conditionsStr:
|
||||||
|
tagsInConditions.append(conditionsStr)
|
||||||
|
return [conditionsStr.strip()]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
if not operators or not conditionsStr:
|
||||||
|
return None
|
||||||
|
tree = None
|
||||||
|
conditionsStr = conditionsStr.strip()
|
||||||
|
if conditionsStr.startswith('#') or conditionsStr in operators:
|
||||||
|
if conditionsStr.startswith('#'):
|
||||||
|
if conditionsStr not in tagsInConditions:
|
||||||
|
if ' ' not in conditionsStr:
|
||||||
|
tagsInConditions.append(conditionsStr)
|
||||||
|
tree = [conditionsStr.strip()]
|
||||||
|
ctr = 0
|
||||||
|
while ctr < len(operators):
|
||||||
|
op = operators[ctr]
|
||||||
|
if op not in conditionsStr:
|
||||||
|
ctr += 1
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
tree = [op]
|
||||||
|
sections = conditionsStr.split(op)
|
||||||
|
for subConditionStr in sections:
|
||||||
|
result = hashtagRuleTree(operators[ctr + 1:], subConditionStr,
|
||||||
|
tagsInConditions)
|
||||||
|
if result:
|
||||||
|
tree.append(result)
|
||||||
|
break
|
||||||
|
return tree
|
||||||
|
|
||||||
|
|
||||||
def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {},
|
def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {},
|
||||||
hashtags: str, httpPrefix: str,
|
hashtags: str, httpPrefix: str,
|
||||||
domain: str, port: int,
|
domain: str, port: int,
|
||||||
|
@ -82,6 +174,90 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {},
|
||||||
Returns true if the post should be saved to the news timeline
|
Returns true if the post should be saved to the news timeline
|
||||||
of this instance
|
of this instance
|
||||||
"""
|
"""
|
||||||
|
rulesFilename = baseDir + '/accounts/hashtagrules.txt'
|
||||||
|
if not os.path.isfile(rulesFilename):
|
||||||
|
return True
|
||||||
|
rules = []
|
||||||
|
with open(rulesFilename, "r") as f:
|
||||||
|
rules = f.readlines()
|
||||||
|
|
||||||
|
domainFull = domain
|
||||||
|
if port:
|
||||||
|
if port != 80 and port != 443:
|
||||||
|
domainFull = domain + ':' + str(port)
|
||||||
|
|
||||||
|
actionOccurred = False
|
||||||
|
operators = ('not', 'and', 'or')
|
||||||
|
for ruleStr in rules:
|
||||||
|
if not ruleStr:
|
||||||
|
continue
|
||||||
|
if not ruleStr.startswith('if '):
|
||||||
|
continue
|
||||||
|
if ' then ' not in ruleStr:
|
||||||
|
continue
|
||||||
|
conditionsStr = ruleStr.split('if ', 1)[1]
|
||||||
|
conditionsStr = conditionsStr.split(' then ')[0]
|
||||||
|
tagsInConditions = []
|
||||||
|
tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions)
|
||||||
|
# does the rule contain any hashtags?
|
||||||
|
if not tagsInConditions:
|
||||||
|
continue
|
||||||
|
if not hasttagRuleResolve(tree, hashtags):
|
||||||
|
continue
|
||||||
|
# the condition matches, so do something
|
||||||
|
actionStr = ruleStr.split(' then ')[1].strip()
|
||||||
|
|
||||||
|
# add a hashtag
|
||||||
|
if actionStr.startswith('add '):
|
||||||
|
addHashtag = actionStr.split('add ', 1)[1].strip()
|
||||||
|
if addHashtag.startswith('#'):
|
||||||
|
if addHashtag not in hashtags:
|
||||||
|
hashtags.append(addHashtag)
|
||||||
|
htId = addHashtag.replace('#', '')
|
||||||
|
if validHashTag(htId):
|
||||||
|
hashtagUrl = \
|
||||||
|
httpPrefix + "://" + domainFull + "/tags/" + htId
|
||||||
|
postJsonObject['object']['tag'][htId] = {
|
||||||
|
'href': hashtagUrl,
|
||||||
|
'name': addHashtag,
|
||||||
|
'type': 'Hashtag'
|
||||||
|
}
|
||||||
|
hashtagHtml = \
|
||||||
|
"<a href=\"" + hashtagUrl + \
|
||||||
|
"\" class=\"mention hashtag\" " + \
|
||||||
|
"rel=\"tag\">#<span>" + \
|
||||||
|
htId + "</span></a>"
|
||||||
|
content = postJsonObject['object']['content']
|
||||||
|
if content.endswith('</p>'):
|
||||||
|
content = \
|
||||||
|
content[:len(content) - len('</p>')] + \
|
||||||
|
hashtagHtml + '</p>'
|
||||||
|
else:
|
||||||
|
content += hashtagHtml
|
||||||
|
postJsonObject['object']['content'] = content
|
||||||
|
actionOccurred = True
|
||||||
|
|
||||||
|
# remove a hashtag
|
||||||
|
if actionStr.startswith('remove '):
|
||||||
|
rmHashtag = actionStr.split('remove ', 1)[1].strip()
|
||||||
|
if rmHashtag.startswith('#'):
|
||||||
|
if rmHashtag in hashtags:
|
||||||
|
hashtags.remove(rmHashtag)
|
||||||
|
htId = addHashtag.replace('#', '')
|
||||||
|
hashtagUrl = \
|
||||||
|
httpPrefix + "://" + domainFull + "/tags/" + htId
|
||||||
|
hashtagHtml = \
|
||||||
|
"<a href=\"" + hashtagUrl + \
|
||||||
|
"\" class=\"mention hashtag\" " + \
|
||||||
|
"rel=\"tag\">#<span>" + \
|
||||||
|
htId + "</span></a>"
|
||||||
|
content = postJsonObject['object']['content']
|
||||||
|
if hashtagHtml in content:
|
||||||
|
postJsonObject['object']['content'] = \
|
||||||
|
content.replace(hashtagHtml, '')
|
||||||
|
del postJsonObject['object']['tag'][htId]
|
||||||
|
actionOccurred = True
|
||||||
|
|
||||||
# TODO
|
# TODO
|
||||||
# If routing to another instance
|
# If routing to another instance
|
||||||
# sendSignedJson(postJsonObject: {}, session, baseDir: str,
|
# sendSignedJson(postJsonObject: {}, session, baseDir: str,
|
||||||
|
@ -91,6 +267,8 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {},
|
||||||
# federationList: [],
|
# federationList: [],
|
||||||
# sendThreads: [], postLog: [], cachedWebfingers: {},
|
# sendThreads: [], postLog: [], cachedWebfingers: {},
|
||||||
# personCache: {}, False, __version__) -> int:
|
# personCache: {}, False, __version__) -> int:
|
||||||
|
if actionOccurred:
|
||||||
|
return True
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
@ -210,6 +388,7 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str,
|
||||||
|
|
||||||
# save the post and update the index
|
# save the post and update the index
|
||||||
if savePost:
|
if savePost:
|
||||||
|
newswire[originalDateStr][6] = hashtags
|
||||||
if saveJson(blog, filename):
|
if saveJson(blog, filename):
|
||||||
updateFeedsOutboxIndex(baseDir, domain, postId + '.json')
|
updateFeedsOutboxIndex(baseDir, domain, postId + '.json')
|
||||||
|
|
||||||
|
|
58
tests.py
58
tests.py
|
@ -82,6 +82,8 @@ from content import removeHtmlTag
|
||||||
from theme import setCSSparam
|
from theme import setCSSparam
|
||||||
from jsonldsig import testSignJsonld
|
from jsonldsig import testSignJsonld
|
||||||
from jsonldsig import jsonldVerify
|
from jsonldsig import jsonldVerify
|
||||||
|
from newsdaemon import hashtagRuleTree
|
||||||
|
from newsdaemon import hasttagRuleResolve
|
||||||
|
|
||||||
testServerAliceRunning = False
|
testServerAliceRunning = False
|
||||||
testServerBobRunning = False
|
testServerBobRunning = False
|
||||||
|
@ -2173,8 +2175,64 @@ def testRemoveHtmlTag():
|
||||||
"src=\"https://somesiteorother.com/image.jpg\"></p>"
|
"src=\"https://somesiteorother.com/image.jpg\"></p>"
|
||||||
|
|
||||||
|
|
||||||
|
def testHashtagRuleTree():
|
||||||
|
print('testHashtagRuleTree')
|
||||||
|
operators = ('not', 'and', 'or')
|
||||||
|
|
||||||
|
conditionsStr = '#foo or #bar'
|
||||||
|
tagsInConditions = []
|
||||||
|
tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions)
|
||||||
|
assert str(tree) == str(['or', ['#foo'], ['#bar']])
|
||||||
|
assert str(tagsInConditions) == str(['#foo', '#bar'])
|
||||||
|
hashtags = ['#foo']
|
||||||
|
assert hasttagRuleResolve(tree, hashtags)
|
||||||
|
hashtags = ['#carrot', '#stick']
|
||||||
|
assert not hasttagRuleResolve(tree, hashtags)
|
||||||
|
|
||||||
|
conditionsStr = 'x'
|
||||||
|
tagsInConditions = []
|
||||||
|
tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions)
|
||||||
|
assert tree is None
|
||||||
|
assert tagsInConditions == []
|
||||||
|
hashtags = ['#foo']
|
||||||
|
assert not hasttagRuleResolve(tree, hashtags)
|
||||||
|
|
||||||
|
conditionsStr = '#x'
|
||||||
|
tagsInConditions = []
|
||||||
|
tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions)
|
||||||
|
assert str(tree) == str(['#x'])
|
||||||
|
assert str(tagsInConditions) == str(['#x'])
|
||||||
|
hashtags = ['#x']
|
||||||
|
assert hasttagRuleResolve(tree, hashtags)
|
||||||
|
hashtags = ['#y', '#z']
|
||||||
|
assert not hasttagRuleResolve(tree, hashtags)
|
||||||
|
|
||||||
|
conditionsStr = 'not #b'
|
||||||
|
tagsInConditions = []
|
||||||
|
tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions)
|
||||||
|
assert str(tree) == str(['not', ['#b']])
|
||||||
|
assert str(tagsInConditions) == str(['#b'])
|
||||||
|
hashtags = ['#y', '#z']
|
||||||
|
assert hasttagRuleResolve(tree, hashtags)
|
||||||
|
hashtags = ['#a', '#b', '#c']
|
||||||
|
assert not hasttagRuleResolve(tree, hashtags)
|
||||||
|
|
||||||
|
conditionsStr = '#foo or #bar and #a'
|
||||||
|
tagsInConditions = []
|
||||||
|
tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions)
|
||||||
|
assert str(tree) == str(['and', ['or', ['#foo'], ['#bar']], ['#a']])
|
||||||
|
assert str(tagsInConditions) == str(['#foo', '#bar', '#a'])
|
||||||
|
hashtags = ['#bar', '#a']
|
||||||
|
assert hasttagRuleResolve(tree, hashtags)
|
||||||
|
hashtags = ['#foo', '#a']
|
||||||
|
assert hasttagRuleResolve(tree, hashtags)
|
||||||
|
hashtags = ['#x', '#a']
|
||||||
|
assert not hasttagRuleResolve(tree, hashtags)
|
||||||
|
|
||||||
|
|
||||||
def runAllTests():
|
def runAllTests():
|
||||||
print('Running tests...')
|
print('Running tests...')
|
||||||
|
testHashtagRuleTree()
|
||||||
testRemoveHtmlTag()
|
testRemoveHtmlTag()
|
||||||
testReplaceEmailQuote()
|
testReplaceEmailQuote()
|
||||||
testConstantTimeStringCheck()
|
testConstantTimeStringCheck()
|
||||||
|
|
Loading…
Reference in New Issue