epicyon/webapp_hashtagswarm.py

295 lines
11 KiB
Python
Raw Permalink Normal View History

2020-11-19 14:02:16 +00:00
__filename__ = "webapp_hashtagswarm.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.1.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@freedombone.net"
__status__ = "Production"
import os
from shutil import copyfile
2020-11-19 14:02:16 +00:00
from datetime import datetime
2020-12-02 22:21:56 +00:00
from utils import getConfigParam
from utils import getNicknameFromActor
2020-12-02 16:18:36 +00:00
from utils import getHashtagCategories
2020-12-02 19:43:17 +00:00
from utils import getHashtagCategory
2020-12-02 22:21:56 +00:00
from webapp_utils import getSearchBannerFile
from webapp_utils import getImageFile
2020-12-02 20:15:01 +00:00
from webapp_utils import getContentWarningButton
from webapp_utils import htmlHeaderWithExternalStyle
from webapp_utils import htmlFooter
2020-12-02 16:18:36 +00:00
def getHashtagCategoriesFeed(baseDir: str,
hashtagCategories=None) -> str:
"""Returns an rss feed for hashtag categories
"""
if not hashtagCategories:
hashtagCategories = getHashtagCategories(baseDir)
if not hashtagCategories:
return None
rssStr = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
rssStr += "<rss version=\"2.0\">\n"
rssStr += '<channel>\n'
rssStr += ' <title>#categories</title>\n'
rssDateStr = \
datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S UT")
for categoryStr, hashtagList in hashtagCategories.items():
rssStr += '<item>\n'
rssStr += ' <title>' + categoryStr + '</title>\n'
listStr = ''
for hashtag in hashtagList:
listStr += hashtag + ' '
rssStr += ' <description>' + listStr.strip() + '</description>\n'
2020-12-03 10:13:44 +00:00
rssStr += ' <link/>\n'
2020-12-02 16:18:36 +00:00
rssStr += ' <pubDate>' + rssDateStr + '</pubDate>\n'
rssStr += '</item>\n'
2020-12-03 10:14:37 +00:00
rssStr += '</channel>\n'
rssStr += '</rss>\n'
2020-12-02 16:18:36 +00:00
return rssStr
2020-11-19 14:02:16 +00:00
2020-11-19 16:41:28 +00:00
def getHashtagDomainMax(domainHistogram: {}) -> str:
"""Returns the domain with the maximum number of hashtags
"""
maxCount = 1
maxDomain = None
for domain, count in domainHistogram.items():
if count > maxCount:
maxDomain = domain
maxCount = count
return maxDomain
2020-11-19 17:09:50 +00:00
def getHashtagDomainHistogram(domainHistogram: {}, translate: {}) -> str:
2020-11-19 16:41:28 +00:00
"""Returns the html for a histogram of domains
from which hashtags are coming
"""
totalCount = 0
for domain, count in domainHistogram.items():
totalCount += count
if totalCount == 0:
return ''
htmlStr = ''
2020-11-19 17:13:32 +00:00
histogramHeaderStr = '<br><br><center>\n'
2020-11-19 17:09:50 +00:00
histogramHeaderStr += ' <h1>' + translate['Hashtag origins'] + '</h1>\n'
2020-11-19 16:41:28 +00:00
histogramHeaderStr += ' <table class="domainHistogram">\n'
histogramHeaderStr += ' <colgroup>\n'
histogramHeaderStr += ' <col span="1" class="domainHistogramLeft">\n'
histogramHeaderStr += ' <col span="1" class="domainHistogramRight">\n'
histogramHeaderStr += ' </colgroup>\n'
histogramHeaderStr += ' <tbody>\n'
histogramHeaderStr += ' <tr>\n'
leftColStr = ''
rightColStr = ''
for i in range(len(domainHistogram)):
domain = getHashtagDomainMax(domainHistogram)
if not domain:
break
percent = int(domainHistogram[domain] * 100 / totalCount)
if histogramHeaderStr:
htmlStr += histogramHeaderStr
histogramHeaderStr = None
leftColStr += str(percent) + '%<br>'
rightColStr += domain + '<br>'
del domainHistogram[domain]
if htmlStr:
htmlStr += ' <td>' + leftColStr + '</td>\n'
htmlStr += ' <td>' + rightColStr + '</td>\n'
htmlStr += ' </tr>\n'
htmlStr += ' </tbody>\n'
htmlStr += ' </table>\n'
htmlStr += '</center>\n'
return htmlStr
2020-11-19 17:09:50 +00:00
def htmlHashTagSwarm(baseDir: str, actor: str, translate: {}) -> str:
2020-11-19 14:02:16 +00:00
"""Returns a tag swarm of today's hashtags
"""
currTime = datetime.utcnow()
daysSinceEpoch = (currTime - datetime(1970, 1, 1)).days
daysSinceEpochStr = str(daysSinceEpoch) + ' '
2020-11-21 13:45:37 +00:00
daysSinceEpochStr2 = str(daysSinceEpoch - 1) + ' '
recently = daysSinceEpoch - 1
2020-11-19 14:02:16 +00:00
tagSwarm = []
2020-12-02 19:43:17 +00:00
categorySwarm = []
2020-11-19 16:41:28 +00:00
domainHistogram = {}
2020-11-19 14:02:16 +00:00
2020-11-25 11:09:16 +00:00
# Load the blocked hashtags into memory.
# This avoids needing to repeatedly load the blocked file for each hashtag
blockedStr = ''
globalBlockingFilename = baseDir + '/accounts/blocking.txt'
if os.path.isfile(globalBlockingFilename):
with open(globalBlockingFilename, 'r') as fp:
blockedStr = fp.read()
2020-11-19 14:02:16 +00:00
for subdir, dirs, files in os.walk(baseDir + '/tags'):
for f in files:
2020-12-01 21:44:27 +00:00
if not f.endswith('.txt'):
continue
2020-11-19 14:02:16 +00:00
tagsFilename = os.path.join(baseDir + '/tags', f)
if not os.path.isfile(tagsFilename):
continue
2020-11-19 14:02:16 +00:00
# get last modified datetime
modTimesinceEpoc = os.path.getmtime(tagsFilename)
lastModifiedDate = datetime.fromtimestamp(modTimesinceEpoc)
fileDaysSinceEpoch = (lastModifiedDate - datetime(1970, 1, 1)).days
# check if the file was last modified within the previous
# two days
2020-11-21 13:33:50 +00:00
if fileDaysSinceEpoch < recently:
2020-11-19 14:02:16 +00:00
continue
hashTagName = f.split('.')[0]
if '#' + hashTagName + '\n' in blockedStr:
2020-11-19 14:02:16 +00:00
continue
2020-11-21 13:45:37 +00:00
with open(tagsFilename, 'r') as fp:
# only read one line, which saves time and memory
lastTag = fp.readline()
if not lastTag.startswith(daysSinceEpochStr):
if not lastTag.startswith(daysSinceEpochStr2):
continue
2020-11-19 14:02:16 +00:00
with open(tagsFilename, 'r') as tagsFile:
2020-11-19 14:45:30 +00:00
while True:
line = tagsFile.readline()
if not line:
break
elif ' ' not in line:
2020-11-19 14:32:17 +00:00
break
2020-11-19 16:41:28 +00:00
sections = line.split(' ')
if len(sections) != 3:
break
postDaysSinceEpochStr = sections[0]
2020-11-19 14:02:16 +00:00
if not postDaysSinceEpochStr.isdigit():
2020-11-19 14:32:17 +00:00
break
2020-11-19 14:02:16 +00:00
postDaysSinceEpoch = int(postDaysSinceEpochStr)
if postDaysSinceEpoch < recently:
2020-11-19 14:02:16 +00:00
break
2020-11-19 17:58:42 +00:00
else:
2020-11-19 16:41:28 +00:00
postUrl = sections[2]
2020-11-19 18:07:17 +00:00
if '##' not in postUrl:
2020-11-19 18:01:42 +00:00
break
2020-11-19 18:07:17 +00:00
postDomain = postUrl.split('##')[1]
if '#' in postDomain:
postDomain = postDomain.split('#')[0]
2020-11-19 18:07:17 +00:00
if domainHistogram.get(postDomain):
domainHistogram[postDomain] = \
domainHistogram[postDomain] + 1
else:
domainHistogram[postDomain] = 1
tagSwarm.append(hashTagName)
2020-12-02 19:43:17 +00:00
categoryFilename = \
tagsFilename.replace('.txt', '.category')
if os.path.isfile(categoryFilename):
categoryStr = \
getHashtagCategory(baseDir, hashTagName)
if categoryStr not in categorySwarm:
categorySwarm.append(categoryStr)
2020-11-19 18:07:17 +00:00
break
2020-11-19 14:02:16 +00:00
if not tagSwarm:
return ''
tagSwarm.sort()
2020-12-02 19:43:17 +00:00
# swarm of categories
categorySwarmStr = ''
if categorySwarm:
2020-12-02 20:15:01 +00:00
if len(categorySwarm) > 3:
categorySwarm.sort()
for categoryStr in categorySwarm:
categorySwarmStr += \
'<a href="' + actor + '/category/' + categoryStr + \
'" class="hashtagswarm"><b>' + categoryStr + '</b></a>\n'
2020-12-02 20:22:33 +00:00
categorySwarmStr += '<br>\n'
2020-12-02 19:43:17 +00:00
# swarm of tags
2020-11-19 14:02:16 +00:00
tagSwarmStr = ''
for tagName in tagSwarm:
tagSwarmStr += \
'<a href="' + actor + '/tags/' + tagName + \
'" class="hashtagswarm">' + tagName + '</a>\n'
2020-12-02 19:43:17 +00:00
2020-12-02 20:15:01 +00:00
if categorySwarmStr:
tagSwarmStr = \
getContentWarningButton('alltags', translate, tagSwarmStr)
2020-12-02 19:43:17 +00:00
tagSwarmHtml = categorySwarmStr + tagSwarmStr.strip() + '\n'
2020-12-02 20:21:33 +00:00
# tagSwarmHtml += getHashtagDomainHistogram(domainHistogram, translate)
2020-11-19 14:02:16 +00:00
return tagSwarmHtml
def htmlSearchHashtagCategory(cssCache: {}, translate: {},
baseDir: str, path: str, domain: str) -> str:
"""Show hashtags after selecting a category on the main search screen
"""
actor = path.split('/category/')[0]
categoryStr = path.split('/category/')[1].strip()
2020-12-02 22:21:56 +00:00
searchNickname = getNicknameFromActor(actor)
if os.path.isfile(baseDir + '/img/search-background.png'):
if not os.path.isfile(baseDir + '/accounts/search-background.png'):
copyfile(baseDir + '/img/search-background.png',
baseDir + '/accounts/search-background.png')
cssFilename = baseDir + '/epicyon-search.css'
if os.path.isfile(baseDir + '/search.css'):
cssFilename = baseDir + '/search.css'
htmlStr = htmlHeaderWithExternalStyle(cssFilename)
2020-12-02 22:21:56 +00:00
# show a banner above the search box
searchBannerFile, searchBannerFilename = \
getSearchBannerFile(baseDir, searchNickname, domain)
if not os.path.isfile(searchBannerFilename):
# get the default search banner for the theme
theme = getConfigParam(baseDir, 'theme').lower()
if theme == 'default':
theme = ''
else:
theme = '_' + theme
themeSearchImageFile, themeSearchBannerFilename = \
getImageFile(baseDir, 'search_banner', baseDir + '/img',
searchNickname, domain)
if os.path.isfile(themeSearchBannerFilename):
searchBannerFilename = \
baseDir + '/accounts/' + \
searchNickname + '@' + domain + '/' + themeSearchImageFile
copyfile(themeSearchBannerFilename,
searchBannerFilename)
searchBannerFile = themeSearchImageFile
if os.path.isfile(searchBannerFilename):
htmlStr += '<a href="' + actor + '/search">\n'
htmlStr += '<img loading="lazy" class="timeline-banner" src="' + \
actor + '/' + searchBannerFile + '" /></a>\n'
2020-12-02 22:01:53 +00:00
htmlStr += '<div class="follow">'
2020-12-02 22:10:32 +00:00
htmlStr += '<center><br><br><br>'
2020-12-02 22:08:31 +00:00
htmlStr += '<h1><a href="' + actor + '/search"><b>'
2020-12-02 22:21:56 +00:00
htmlStr += translate['Category'] + ': ' + categoryStr + '</b></a></h1>'
hashtagsDict = getHashtagCategories(baseDir, True, categoryStr)
if hashtagsDict:
for categoryStr2, hashtagList in hashtagsDict.items():
hashtagList.sort()
for tagName in hashtagList:
htmlStr += \
'<a href="' + actor + '/tags/' + tagName + \
'" class="hashtagswarm">' + tagName + '</a>\n'
2020-12-02 21:51:14 +00:00
htmlStr += '</center>'
2020-12-02 22:01:53 +00:00
htmlStr += '</div>'
htmlStr += htmlFooter()
return htmlStr