From 6353d80872197f9e7eae1d7593b3f73063c88579 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 19 Nov 2020 16:41:28 +0000 Subject: [PATCH] Hashtag domain histogram --- epicyon-search.css | 10 ++++++ webapp_hashtagswarm.py | 75 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/epicyon-search.css b/epicyon-search.css index 0b04e045..15664a03 100644 --- a/epicyon-search.css +++ b/epicyon-search.css @@ -90,6 +90,16 @@ a:focus { border: 2px solid var(--focus-color); } +.domainHistogram { + border: 0; +} +.domainHistogramLeft { + float: right; +} +.domainHistogramRight { + float: left; +} + .follow { background-image: url("follow-background.jpg"); background-size: cover; diff --git a/webapp_hashtagswarm.py b/webapp_hashtagswarm.py index 7f9e0b3d..cb31d8ab 100644 --- a/webapp_hashtagswarm.py +++ b/webapp_hashtagswarm.py @@ -11,6 +11,64 @@ from blocking import isBlockedHashtag from datetime import datetime +def getHashtagDomainMax(domainHistogram: {}) -> str: + """Returns the domain with the maximum number of hashtags + """ + maxCount = 1 + maxDomain = None + for domain, count in domainHistogram.items(): + if count > maxCount: + maxDomain = domain + maxCount = count + return maxDomain + + +def getHashtagDomainHistogram(domainHistogram: {}) -> str: + """Returns the html for a histogram of domains + from which hashtags are coming + """ + totalCount = 0 + for domain, count in domainHistogram.items(): + totalCount += count + if totalCount == 0: + return '' + + htmlStr = '' + histogramHeaderStr = '
\n' + histogramHeaderStr += ' \n' + histogramHeaderStr += ' \n' + histogramHeaderStr += ' \n' + histogramHeaderStr += ' \n' + histogramHeaderStr += ' \n' + histogramHeaderStr += ' \n' + histogramHeaderStr += ' \n' + + leftColStr = '' + rightColStr = '' + + for i in range(len(domainHistogram)): + domain = getHashtagDomainMax(domainHistogram) + if not domain: + break + percent = int(domainHistogram[domain] * 100 / totalCount) + if histogramHeaderStr: + htmlStr += histogramHeaderStr + histogramHeaderStr = None + leftColStr += str(percent) + '%
' + rightColStr += domain + '
' + del domainHistogram[domain] + + if htmlStr: + htmlStr += ' \n' + htmlStr += ' \n' + htmlStr += ' \n' + htmlStr += ' \n' + htmlStr += '
' + leftColStr + '' + rightColStr + '
\n' + htmlStr += '
\n' + + return htmlStr + + def htmlHashTagSwarm(baseDir: str, actor: str) -> str: """Returns a tag swarm of today's hashtags """ @@ -18,6 +76,7 @@ def htmlHashTagSwarm(baseDir: str, actor: str) -> str: daysSinceEpoch = (currTime - datetime(1970, 1, 1)).days daysSinceEpochStr = str(daysSinceEpoch) + ' ' tagSwarm = [] + domainHistogram = {} for subdir, dirs, files in os.walk(baseDir + '/tags'): for f in files: @@ -44,13 +103,26 @@ def htmlHashTagSwarm(baseDir: str, actor: str) -> str: break elif ' ' not in line: break - postDaysSinceEpochStr = line.split(' ')[0] + sections = line.split(' ') + if len(sections) != 3: + break + postDaysSinceEpochStr = sections[0] if not postDaysSinceEpochStr.isdigit(): break postDaysSinceEpoch = int(postDaysSinceEpochStr) if postDaysSinceEpoch < daysSinceEpoch: break elif postDaysSinceEpoch == daysSinceEpoch: + postUrl = sections[2] + if '##' in postUrl: + postDomain = postUrl.split('##')[1] + if '#' in postDomain: + postDomain = postDomain.split('#')[0] + if domainHistogram.get(postDomain): + domainHistogram[postDomain] = \ + domainHistogram[postDomain] + 1 + else: + domainHistogram[postDomain] = 1 tagSwarm.append(hashTagName) break @@ -65,4 +137,5 @@ def htmlHashTagSwarm(baseDir: str, actor: str) -> str: '" class="hashtagswarm">' + tagName + '\n' ctr += 1 tagSwarmHtml = tagSwarmStr.strip() + '\n' + tagSwarmHtml += getHashtagDomainHistogram(domainHistogram) return tagSwarmHtml