Hashtag categories as rss feeds

merge-requests/8/head
Bob Mottram 2020-12-02 16:18:36 +00:00
parent 8ad13c94c7
commit 2f513407e4
3 changed files with 122 additions and 1 deletions

View File

@ -164,6 +164,7 @@ from webapp_search import htmlSearchEmoji
from webapp_search import htmlSearchSharedItems
from webapp_search import htmlSearchEmojiTextEntry
from webapp_search import htmlSearch
from webapp_hashtagswarm import getHashtagCategoriesFeed
from shares import getSharesFeedForPerson
from shares import addShare
from shares import removeShare
@ -4810,6 +4811,41 @@ class PubServer(BaseHTTPRequestHandler):
path + ' ' + callingDomain)
self._404()
def _getHashtagCategoriesFeed(self, authorized: bool,
callingDomain: str, path: str,
baseDir: str, httpPrefix: str,
domain: str, port: int, proxyType: str,
GETstartTime, GETtimings: {},
debug: bool) -> None:
"""Returns the hashtag categories feed
"""
if not self.server.session:
print('Starting new session during RSS categories request')
self.server.session = \
createSession(proxyType)
if not self.server.session:
print('ERROR: GET failed to create session ' +
'during RSS categories request')
self._404()
return
hashtagCategories = None
msg = \
getHashtagCategoriesFeed(baseDir, hashtagCategories)
if msg:
msg = msg.encode('utf-8')
self._set_headers('text/xml', len(msg),
None, callingDomain)
self._write(msg)
if debug:
print('Sent rss2 categories feed: ' +
path + ' ' + callingDomain)
return
if debug:
print('Failed to get rss2 categories feed: ' +
path + ' ' + callingDomain)
self._404()
def _getRSS3feed(self, authorized: bool,
callingDomain: str, path: str,
baseDir: str, httpPrefix: str,
@ -9276,6 +9312,18 @@ class PubServer(BaseHTTPRequestHandler):
self._benchmarkGETtimings(GETstartTime, GETtimings,
'fonts', 'sharedInbox enabled')
if self.path == '/categories.xml':
self._getHashtagCategoriesFeed(authorized,
callingDomain, self.path,
self.server.baseDir,
self.server.httpPrefix,
self.server.domain,
self.server.port,
self.server.proxyType,
GETstartTime, GETtimings,
self.server.debug)
return
if self.path == '/newswire.xml':
self._getNewswireFeed(authorized,
callingDomain, self.path,

View File

@ -14,6 +14,7 @@ from datetime import datetime
from datetime import timedelta
from datetime import timezone
from collections import OrderedDict
from utils import setHashtagCategory
from utils import firstParagraphFromString
from utils import isPublicPost
from utils import locatePost
@ -202,15 +203,53 @@ def parseFeedDate(pubDate: str) -> str:
return pubDateStr
def xml2StrToHashtagCategories(baseDir: str, xmlStr: str,
maxCategoriesFeedItemSizeKb: int) -> None:
"""Updates hashtag categories based upon an rss feed
"""
rssItems = xmlStr.split('<item>')
maxBytes = maxCategoriesFeedItemSizeKb * 1024
for rssItem in rssItems:
if not rssItem:
continue
if len(rssItem) > maxBytes:
print('WARN: rss categories feed item is too big')
continue
if '<title>' not in rssItem:
continue
if '</title>' not in rssItem:
continue
if '<description>' not in rssItem:
continue
if '</description>' not in rssItem:
continue
categoryStr = rssItem.split('<title>')[1]
categoryStr = categoryStr.split('</title>')[0].strip()
if not categoryStr:
continue
hashtagListStr = rssItem.split('<description>')[1]
hashtagListStr = hashtagListStr.split('</description>')[0].strip()
if not hashtagListStr:
continue
hashtagList = hashtagListStr.split(' ')
for hashtag in hashtagList:
setHashtagCategory(baseDir, hashtag, categoryStr)
def xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
moderated: bool, mirrored: bool,
maxPostsPerSource: int,
maxFeedItemSizeKb: int) -> {}:
maxFeedItemSizeKb: int,
maxCategoriesFeedItemSizeKb: int) -> {}:
"""Converts an xml 2.0 string to a dictionary
"""
if '<item>' not in xmlStr:
return {}
result = {}
if '<title>#categories</title>' in xmlStr:
xml2StrToHashtagCategories(baseDir, xmlStr,
maxCategoriesFeedItemSizeKb)
return {}
rssItems = xmlStr.split('<item>')
postCtr = 0
maxBytes = maxFeedItemSizeKb * 1024

View File

@ -8,6 +8,40 @@ __status__ = "Production"
import os
from datetime import datetime
from utils import getHashtagCategories
def getHashtagCategoriesFeed(baseDir: str,
hashtagCategories=None) -> str:
"""Returns an rss feed for hashtag categories
"""
if not hashtagCategories:
hashtagCategories = getHashtagCategories(baseDir)
if not hashtagCategories:
return None
rssStr = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
rssStr += "<rss version=\"2.0\">\n"
rssStr += '<channel>\n'
rssStr += ' <title>#categories</title>\n'
rssDateStr = \
datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S UT")
for categoryStr, hashtagList in hashtagCategories.items():
rssStr += '<item>\n'
rssStr += ' <title>' + categoryStr + '</title>\n'
listStr = ''
for hashtag in hashtagList:
listStr += hashtag + ' '
rssStr += ' <description>' + listStr.strip() + '</description>\n'
rssStr += ' <link></link>\n'
rssStr += ' <pubDate>' + rssDateStr + '</pubDate>\n'
rssStr += '</item>\n'
rssStr = '</channel>'
rssStr += '</rss>'
return rssStr
def getHashtagDomainMax(domainHistogram: {}) -> str: