From 2f513407e4c8daca3f249e5f1006735ddbb76e7d Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Wed, 2 Dec 2020 16:18:36 +0000 Subject: [PATCH] Hashtag categories as rss feeds --- daemon.py | 48 ++++++++++++++++++++++++++++++++++++++++++ newswire.py | 41 +++++++++++++++++++++++++++++++++++- webapp_hashtagswarm.py | 34 ++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 1 deletion(-) diff --git a/daemon.py b/daemon.py index 714b9bc3..466cdd99 100644 --- a/daemon.py +++ b/daemon.py @@ -164,6 +164,7 @@ from webapp_search import htmlSearchEmoji from webapp_search import htmlSearchSharedItems from webapp_search import htmlSearchEmojiTextEntry from webapp_search import htmlSearch +from webapp_hashtagswarm import getHashtagCategoriesFeed from shares import getSharesFeedForPerson from shares import addShare from shares import removeShare @@ -4810,6 +4811,41 @@ class PubServer(BaseHTTPRequestHandler): path + ' ' + callingDomain) self._404() + def _getHashtagCategoriesFeed(self, authorized: bool, + callingDomain: str, path: str, + baseDir: str, httpPrefix: str, + domain: str, port: int, proxyType: str, + GETstartTime, GETtimings: {}, + debug: bool) -> None: + """Returns the hashtag categories feed + """ + if not self.server.session: + print('Starting new session during RSS categories request') + self.server.session = \ + createSession(proxyType) + if not self.server.session: + print('ERROR: GET failed to create session ' + + 'during RSS categories request') + self._404() + return + + hashtagCategories = None + msg = \ + getHashtagCategoriesFeed(baseDir, hashtagCategories) + if msg: + msg = msg.encode('utf-8') + self._set_headers('text/xml', len(msg), + None, callingDomain) + self._write(msg) + if debug: + print('Sent rss2 categories feed: ' + + path + ' ' + callingDomain) + return + if debug: + print('Failed to get rss2 categories feed: ' + + path + ' ' + callingDomain) + self._404() + def _getRSS3feed(self, authorized: bool, callingDomain: str, path: str, baseDir: str, httpPrefix: str, @@ -9276,6 +9312,18 @@ class PubServer(BaseHTTPRequestHandler): self._benchmarkGETtimings(GETstartTime, GETtimings, 'fonts', 'sharedInbox enabled') + if self.path == '/categories.xml': + self._getHashtagCategoriesFeed(authorized, + callingDomain, self.path, + self.server.baseDir, + self.server.httpPrefix, + self.server.domain, + self.server.port, + self.server.proxyType, + GETstartTime, GETtimings, + self.server.debug) + return + if self.path == '/newswire.xml': self._getNewswireFeed(authorized, callingDomain, self.path, diff --git a/newswire.py b/newswire.py index 84e05f9b..088cbf9a 100644 --- a/newswire.py +++ b/newswire.py @@ -14,6 +14,7 @@ from datetime import datetime from datetime import timedelta from datetime import timezone from collections import OrderedDict +from utils import setHashtagCategory from utils import firstParagraphFromString from utils import isPublicPost from utils import locatePost @@ -202,15 +203,53 @@ def parseFeedDate(pubDate: str) -> str: return pubDateStr +def xml2StrToHashtagCategories(baseDir: str, xmlStr: str, + maxCategoriesFeedItemSizeKb: int) -> None: + """Updates hashtag categories based upon an rss feed + """ + rssItems = xmlStr.split('') + maxBytes = maxCategoriesFeedItemSizeKb * 1024 + for rssItem in rssItems: + if not rssItem: + continue + if len(rssItem) > maxBytes: + print('WARN: rss categories feed item is too big') + continue + if '' not in rssItem: + continue + if '' not in rssItem: + continue + if '' not in rssItem: + continue + if '' not in rssItem: + continue + categoryStr = rssItem.split('')[1] + categoryStr = categoryStr.split('')[0].strip() + if not categoryStr: + continue + hashtagListStr = rssItem.split('')[1] + hashtagListStr = hashtagListStr.split('')[0].strip() + if not hashtagListStr: + continue + hashtagList = hashtagListStr.split(' ') + for hashtag in hashtagList: + setHashtagCategory(baseDir, hashtag, categoryStr) + + def xml2StrToDict(baseDir: str, domain: str, xmlStr: str, moderated: bool, mirrored: bool, maxPostsPerSource: int, - maxFeedItemSizeKb: int) -> {}: + maxFeedItemSizeKb: int, + maxCategoriesFeedItemSizeKb: int) -> {}: """Converts an xml 2.0 string to a dictionary """ if '' not in xmlStr: return {} result = {} + if '#categories' in xmlStr: + xml2StrToHashtagCategories(baseDir, xmlStr, + maxCategoriesFeedItemSizeKb) + return {} rssItems = xmlStr.split('') postCtr = 0 maxBytes = maxFeedItemSizeKb * 1024 diff --git a/webapp_hashtagswarm.py b/webapp_hashtagswarm.py index 68574b1b..b2f8e2f1 100644 --- a/webapp_hashtagswarm.py +++ b/webapp_hashtagswarm.py @@ -8,6 +8,40 @@ __status__ = "Production" import os from datetime import datetime +from utils import getHashtagCategories + + +def getHashtagCategoriesFeed(baseDir: str, + hashtagCategories=None) -> str: + """Returns an rss feed for hashtag categories + """ + if not hashtagCategories: + hashtagCategories = getHashtagCategories(baseDir) + if not hashtagCategories: + return None + + rssStr = "\n" + rssStr += "\n" + rssStr += '\n' + rssStr += ' #categories\n' + + rssDateStr = \ + datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S UT") + + for categoryStr, hashtagList in hashtagCategories.items(): + rssStr += '\n' + rssStr += ' ' + categoryStr + '\n' + listStr = '' + for hashtag in hashtagList: + listStr += hashtag + ' ' + rssStr += ' ' + listStr.strip() + '\n' + rssStr += ' \n' + rssStr += ' ' + rssDateStr + '\n' + rssStr += '\n' + + rssStr = '' + rssStr += '' + return rssStr def getHashtagDomainMax(domainHistogram: {}) -> str: