From a8365bfaea1beb72367788245c9789473e430922 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Wed, 25 Nov 2020 11:02:40 +0000 Subject: [PATCH] More efficient checking for blocked hashtags --- webapp_hashtagswarm.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/webapp_hashtagswarm.py b/webapp_hashtagswarm.py index e26a8bd3..e5150666 100644 --- a/webapp_hashtagswarm.py +++ b/webapp_hashtagswarm.py @@ -7,7 +7,6 @@ __email__ = "bob@freedombone.net" __status__ = "Production" import os -from blocking import isBlockedHashtag from datetime import datetime @@ -81,6 +80,12 @@ def htmlHashTagSwarm(baseDir: str, actor: str, translate: {}) -> str: tagSwarm = [] domainHistogram = {} + blockedStr = '' + globalBlockingFilename = baseDir + '/accounts/blocking.txt' + if os.path.isfile(globalBlockingFilename): + with open(globalBlockingFilename, 'r') as fp: + blockedStr = fp.read() + for subdir, dirs, files in os.walk(baseDir + '/tags'): for f in files: tagsFilename = os.path.join(baseDir + '/tags', f) @@ -98,7 +103,7 @@ def htmlHashTagSwarm(baseDir: str, actor: str, translate: {}) -> str: continue hashTagName = f.split('.')[0] - if isBlockedHashtag(baseDir, hashTagName): + if '#' + hashTagName + '\n' in blockedStr: continue with open(tagsFilename, 'r') as fp: # only read one line, which saves time and memory @@ -129,6 +134,7 @@ def htmlHashTagSwarm(baseDir: str, actor: str, translate: {}) -> str: postDomain = postUrl.split('##')[1] if '#' in postDomain: postDomain = postDomain.split('#')[0] + if domainHistogram.get(postDomain): domainHistogram[postDomain] = \ domainHistogram[postDomain] + 1