Hashtag categories as rss feeds

main
Bob Mottram 2020-12-02 16:18:36 +00:00
parent 8ad13c94c7
commit 2f513407e4
3 changed files with 122 additions and 1 deletions

View File

@ -164,6 +164,7 @@ from webapp_search import htmlSearchEmoji
from webapp_search import htmlSearchSharedItems from webapp_search import htmlSearchSharedItems
from webapp_search import htmlSearchEmojiTextEntry from webapp_search import htmlSearchEmojiTextEntry
from webapp_search import htmlSearch from webapp_search import htmlSearch
from webapp_hashtagswarm import getHashtagCategoriesFeed
from shares import getSharesFeedForPerson from shares import getSharesFeedForPerson
from shares import addShare from shares import addShare
from shares import removeShare from shares import removeShare
@ -4810,6 +4811,41 @@ class PubServer(BaseHTTPRequestHandler):
path + ' ' + callingDomain) path + ' ' + callingDomain)
self._404() self._404()
def _getHashtagCategoriesFeed(self, authorized: bool,
callingDomain: str, path: str,
baseDir: str, httpPrefix: str,
domain: str, port: int, proxyType: str,
GETstartTime, GETtimings: {},
debug: bool) -> None:
"""Returns the hashtag categories feed
"""
if not self.server.session:
print('Starting new session during RSS categories request')
self.server.session = \
createSession(proxyType)
if not self.server.session:
print('ERROR: GET failed to create session ' +
'during RSS categories request')
self._404()
return
hashtagCategories = None
msg = \
getHashtagCategoriesFeed(baseDir, hashtagCategories)
if msg:
msg = msg.encode('utf-8')
self._set_headers('text/xml', len(msg),
None, callingDomain)
self._write(msg)
if debug:
print('Sent rss2 categories feed: ' +
path + ' ' + callingDomain)
return
if debug:
print('Failed to get rss2 categories feed: ' +
path + ' ' + callingDomain)
self._404()
def _getRSS3feed(self, authorized: bool, def _getRSS3feed(self, authorized: bool,
callingDomain: str, path: str, callingDomain: str, path: str,
baseDir: str, httpPrefix: str, baseDir: str, httpPrefix: str,
@ -9276,6 +9312,18 @@ class PubServer(BaseHTTPRequestHandler):
self._benchmarkGETtimings(GETstartTime, GETtimings, self._benchmarkGETtimings(GETstartTime, GETtimings,
'fonts', 'sharedInbox enabled') 'fonts', 'sharedInbox enabled')
if self.path == '/categories.xml':
self._getHashtagCategoriesFeed(authorized,
callingDomain, self.path,
self.server.baseDir,
self.server.httpPrefix,
self.server.domain,
self.server.port,
self.server.proxyType,
GETstartTime, GETtimings,
self.server.debug)
return
if self.path == '/newswire.xml': if self.path == '/newswire.xml':
self._getNewswireFeed(authorized, self._getNewswireFeed(authorized,
callingDomain, self.path, callingDomain, self.path,

View File

@ -14,6 +14,7 @@ from datetime import datetime
from datetime import timedelta from datetime import timedelta
from datetime import timezone from datetime import timezone
from collections import OrderedDict from collections import OrderedDict
from utils import setHashtagCategory
from utils import firstParagraphFromString from utils import firstParagraphFromString
from utils import isPublicPost from utils import isPublicPost
from utils import locatePost from utils import locatePost
@ -202,15 +203,53 @@ def parseFeedDate(pubDate: str) -> str:
return pubDateStr return pubDateStr
def xml2StrToHashtagCategories(baseDir: str, xmlStr: str,
maxCategoriesFeedItemSizeKb: int) -> None:
"""Updates hashtag categories based upon an rss feed
"""
rssItems = xmlStr.split('<item>')
maxBytes = maxCategoriesFeedItemSizeKb * 1024
for rssItem in rssItems:
if not rssItem:
continue
if len(rssItem) > maxBytes:
print('WARN: rss categories feed item is too big')
continue
if '<title>' not in rssItem:
continue
if '</title>' not in rssItem:
continue
if '<description>' not in rssItem:
continue
if '</description>' not in rssItem:
continue
categoryStr = rssItem.split('<title>')[1]
categoryStr = categoryStr.split('</title>')[0].strip()
if not categoryStr:
continue
hashtagListStr = rssItem.split('<description>')[1]
hashtagListStr = hashtagListStr.split('</description>')[0].strip()
if not hashtagListStr:
continue
hashtagList = hashtagListStr.split(' ')
for hashtag in hashtagList:
setHashtagCategory(baseDir, hashtag, categoryStr)
def xml2StrToDict(baseDir: str, domain: str, xmlStr: str, def xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
moderated: bool, mirrored: bool, moderated: bool, mirrored: bool,
maxPostsPerSource: int, maxPostsPerSource: int,
maxFeedItemSizeKb: int) -> {}: maxFeedItemSizeKb: int,
maxCategoriesFeedItemSizeKb: int) -> {}:
"""Converts an xml 2.0 string to a dictionary """Converts an xml 2.0 string to a dictionary
""" """
if '<item>' not in xmlStr: if '<item>' not in xmlStr:
return {} return {}
result = {} result = {}
if '<title>#categories</title>' in xmlStr:
xml2StrToHashtagCategories(baseDir, xmlStr,
maxCategoriesFeedItemSizeKb)
return {}
rssItems = xmlStr.split('<item>') rssItems = xmlStr.split('<item>')
postCtr = 0 postCtr = 0
maxBytes = maxFeedItemSizeKb * 1024 maxBytes = maxFeedItemSizeKb * 1024

View File

@ -8,6 +8,40 @@ __status__ = "Production"
import os import os
from datetime import datetime from datetime import datetime
from utils import getHashtagCategories
def getHashtagCategoriesFeed(baseDir: str,
hashtagCategories=None) -> str:
"""Returns an rss feed for hashtag categories
"""
if not hashtagCategories:
hashtagCategories = getHashtagCategories(baseDir)
if not hashtagCategories:
return None
rssStr = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
rssStr += "<rss version=\"2.0\">\n"
rssStr += '<channel>\n'
rssStr += ' <title>#categories</title>\n'
rssDateStr = \
datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S UT")
for categoryStr, hashtagList in hashtagCategories.items():
rssStr += '<item>\n'
rssStr += ' <title>' + categoryStr + '</title>\n'
listStr = ''
for hashtag in hashtagList:
listStr += hashtag + ' '
rssStr += ' <description>' + listStr.strip() + '</description>\n'
rssStr += ' <link></link>\n'
rssStr += ' <pubDate>' + rssDateStr + '</pubDate>\n'
rssStr += '</item>\n'
rssStr = '</channel>'
rssStr += '</rss>'
return rssStr
def getHashtagDomainMax(domainHistogram: {}) -> str: def getHashtagDomainMax(domainHistogram: {}) -> str: