More efficient checking for blocked hashtags

main
Bob Mottram 2020-11-25 11:02:40 +00:00
parent f8aabec732
commit a8365bfaea
1 changed files with 8 additions and 2 deletions

View File

@ -7,7 +7,6 @@ __email__ = "bob@freedombone.net"
__status__ = "Production" __status__ = "Production"
import os import os
from blocking import isBlockedHashtag
from datetime import datetime from datetime import datetime
@ -81,6 +80,12 @@ def htmlHashTagSwarm(baseDir: str, actor: str, translate: {}) -> str:
tagSwarm = [] tagSwarm = []
domainHistogram = {} domainHistogram = {}
blockedStr = ''
globalBlockingFilename = baseDir + '/accounts/blocking.txt'
if os.path.isfile(globalBlockingFilename):
with open(globalBlockingFilename, 'r') as fp:
blockedStr = fp.read()
for subdir, dirs, files in os.walk(baseDir + '/tags'): for subdir, dirs, files in os.walk(baseDir + '/tags'):
for f in files: for f in files:
tagsFilename = os.path.join(baseDir + '/tags', f) tagsFilename = os.path.join(baseDir + '/tags', f)
@ -98,7 +103,7 @@ def htmlHashTagSwarm(baseDir: str, actor: str, translate: {}) -> str:
continue continue
hashTagName = f.split('.')[0] hashTagName = f.split('.')[0]
if isBlockedHashtag(baseDir, hashTagName): if '#' + hashTagName + '\n' in blockedStr:
continue continue
with open(tagsFilename, 'r') as fp: with open(tagsFilename, 'r') as fp:
# only read one line, which saves time and memory # only read one line, which saves time and memory
@ -129,6 +134,7 @@ def htmlHashTagSwarm(baseDir: str, actor: str, translate: {}) -> str:
postDomain = postUrl.split('##')[1] postDomain = postUrl.split('##')[1]
if '#' in postDomain: if '#' in postDomain:
postDomain = postDomain.split('#')[0] postDomain = postDomain.split('#')[0]
if domainHistogram.get(postDomain): if domainHistogram.get(postDomain):
domainHistogram[postDomain] = \ domainHistogram[postDomain] = \
domainHistogram[postDomain] + 1 domainHistogram[postDomain] + 1