forked from indymedia/epicyon
Hashtag categories as rss feeds
parent
8ad13c94c7
commit
2f513407e4
48
daemon.py
48
daemon.py
|
@ -164,6 +164,7 @@ from webapp_search import htmlSearchEmoji
|
||||||
from webapp_search import htmlSearchSharedItems
|
from webapp_search import htmlSearchSharedItems
|
||||||
from webapp_search import htmlSearchEmojiTextEntry
|
from webapp_search import htmlSearchEmojiTextEntry
|
||||||
from webapp_search import htmlSearch
|
from webapp_search import htmlSearch
|
||||||
|
from webapp_hashtagswarm import getHashtagCategoriesFeed
|
||||||
from shares import getSharesFeedForPerson
|
from shares import getSharesFeedForPerson
|
||||||
from shares import addShare
|
from shares import addShare
|
||||||
from shares import removeShare
|
from shares import removeShare
|
||||||
|
@ -4810,6 +4811,41 @@ class PubServer(BaseHTTPRequestHandler):
|
||||||
path + ' ' + callingDomain)
|
path + ' ' + callingDomain)
|
||||||
self._404()
|
self._404()
|
||||||
|
|
||||||
|
def _getHashtagCategoriesFeed(self, authorized: bool,
|
||||||
|
callingDomain: str, path: str,
|
||||||
|
baseDir: str, httpPrefix: str,
|
||||||
|
domain: str, port: int, proxyType: str,
|
||||||
|
GETstartTime, GETtimings: {},
|
||||||
|
debug: bool) -> None:
|
||||||
|
"""Returns the hashtag categories feed
|
||||||
|
"""
|
||||||
|
if not self.server.session:
|
||||||
|
print('Starting new session during RSS categories request')
|
||||||
|
self.server.session = \
|
||||||
|
createSession(proxyType)
|
||||||
|
if not self.server.session:
|
||||||
|
print('ERROR: GET failed to create session ' +
|
||||||
|
'during RSS categories request')
|
||||||
|
self._404()
|
||||||
|
return
|
||||||
|
|
||||||
|
hashtagCategories = None
|
||||||
|
msg = \
|
||||||
|
getHashtagCategoriesFeed(baseDir, hashtagCategories)
|
||||||
|
if msg:
|
||||||
|
msg = msg.encode('utf-8')
|
||||||
|
self._set_headers('text/xml', len(msg),
|
||||||
|
None, callingDomain)
|
||||||
|
self._write(msg)
|
||||||
|
if debug:
|
||||||
|
print('Sent rss2 categories feed: ' +
|
||||||
|
path + ' ' + callingDomain)
|
||||||
|
return
|
||||||
|
if debug:
|
||||||
|
print('Failed to get rss2 categories feed: ' +
|
||||||
|
path + ' ' + callingDomain)
|
||||||
|
self._404()
|
||||||
|
|
||||||
def _getRSS3feed(self, authorized: bool,
|
def _getRSS3feed(self, authorized: bool,
|
||||||
callingDomain: str, path: str,
|
callingDomain: str, path: str,
|
||||||
baseDir: str, httpPrefix: str,
|
baseDir: str, httpPrefix: str,
|
||||||
|
@ -9276,6 +9312,18 @@ class PubServer(BaseHTTPRequestHandler):
|
||||||
self._benchmarkGETtimings(GETstartTime, GETtimings,
|
self._benchmarkGETtimings(GETstartTime, GETtimings,
|
||||||
'fonts', 'sharedInbox enabled')
|
'fonts', 'sharedInbox enabled')
|
||||||
|
|
||||||
|
if self.path == '/categories.xml':
|
||||||
|
self._getHashtagCategoriesFeed(authorized,
|
||||||
|
callingDomain, self.path,
|
||||||
|
self.server.baseDir,
|
||||||
|
self.server.httpPrefix,
|
||||||
|
self.server.domain,
|
||||||
|
self.server.port,
|
||||||
|
self.server.proxyType,
|
||||||
|
GETstartTime, GETtimings,
|
||||||
|
self.server.debug)
|
||||||
|
return
|
||||||
|
|
||||||
if self.path == '/newswire.xml':
|
if self.path == '/newswire.xml':
|
||||||
self._getNewswireFeed(authorized,
|
self._getNewswireFeed(authorized,
|
||||||
callingDomain, self.path,
|
callingDomain, self.path,
|
||||||
|
|
41
newswire.py
41
newswire.py
|
@ -14,6 +14,7 @@ from datetime import datetime
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from datetime import timezone
|
from datetime import timezone
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
from utils import setHashtagCategory
|
||||||
from utils import firstParagraphFromString
|
from utils import firstParagraphFromString
|
||||||
from utils import isPublicPost
|
from utils import isPublicPost
|
||||||
from utils import locatePost
|
from utils import locatePost
|
||||||
|
@ -202,15 +203,53 @@ def parseFeedDate(pubDate: str) -> str:
|
||||||
return pubDateStr
|
return pubDateStr
|
||||||
|
|
||||||
|
|
||||||
|
def xml2StrToHashtagCategories(baseDir: str, xmlStr: str,
|
||||||
|
maxCategoriesFeedItemSizeKb: int) -> None:
|
||||||
|
"""Updates hashtag categories based upon an rss feed
|
||||||
|
"""
|
||||||
|
rssItems = xmlStr.split('<item>')
|
||||||
|
maxBytes = maxCategoriesFeedItemSizeKb * 1024
|
||||||
|
for rssItem in rssItems:
|
||||||
|
if not rssItem:
|
||||||
|
continue
|
||||||
|
if len(rssItem) > maxBytes:
|
||||||
|
print('WARN: rss categories feed item is too big')
|
||||||
|
continue
|
||||||
|
if '<title>' not in rssItem:
|
||||||
|
continue
|
||||||
|
if '</title>' not in rssItem:
|
||||||
|
continue
|
||||||
|
if '<description>' not in rssItem:
|
||||||
|
continue
|
||||||
|
if '</description>' not in rssItem:
|
||||||
|
continue
|
||||||
|
categoryStr = rssItem.split('<title>')[1]
|
||||||
|
categoryStr = categoryStr.split('</title>')[0].strip()
|
||||||
|
if not categoryStr:
|
||||||
|
continue
|
||||||
|
hashtagListStr = rssItem.split('<description>')[1]
|
||||||
|
hashtagListStr = hashtagListStr.split('</description>')[0].strip()
|
||||||
|
if not hashtagListStr:
|
||||||
|
continue
|
||||||
|
hashtagList = hashtagListStr.split(' ')
|
||||||
|
for hashtag in hashtagList:
|
||||||
|
setHashtagCategory(baseDir, hashtag, categoryStr)
|
||||||
|
|
||||||
|
|
||||||
def xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
|
def xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
|
||||||
moderated: bool, mirrored: bool,
|
moderated: bool, mirrored: bool,
|
||||||
maxPostsPerSource: int,
|
maxPostsPerSource: int,
|
||||||
maxFeedItemSizeKb: int) -> {}:
|
maxFeedItemSizeKb: int,
|
||||||
|
maxCategoriesFeedItemSizeKb: int) -> {}:
|
||||||
"""Converts an xml 2.0 string to a dictionary
|
"""Converts an xml 2.0 string to a dictionary
|
||||||
"""
|
"""
|
||||||
if '<item>' not in xmlStr:
|
if '<item>' not in xmlStr:
|
||||||
return {}
|
return {}
|
||||||
result = {}
|
result = {}
|
||||||
|
if '<title>#categories</title>' in xmlStr:
|
||||||
|
xml2StrToHashtagCategories(baseDir, xmlStr,
|
||||||
|
maxCategoriesFeedItemSizeKb)
|
||||||
|
return {}
|
||||||
rssItems = xmlStr.split('<item>')
|
rssItems = xmlStr.split('<item>')
|
||||||
postCtr = 0
|
postCtr = 0
|
||||||
maxBytes = maxFeedItemSizeKb * 1024
|
maxBytes = maxFeedItemSizeKb * 1024
|
||||||
|
|
|
@ -8,6 +8,40 @@ __status__ = "Production"
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from utils import getHashtagCategories
|
||||||
|
|
||||||
|
|
||||||
|
def getHashtagCategoriesFeed(baseDir: str,
|
||||||
|
hashtagCategories=None) -> str:
|
||||||
|
"""Returns an rss feed for hashtag categories
|
||||||
|
"""
|
||||||
|
if not hashtagCategories:
|
||||||
|
hashtagCategories = getHashtagCategories(baseDir)
|
||||||
|
if not hashtagCategories:
|
||||||
|
return None
|
||||||
|
|
||||||
|
rssStr = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
|
||||||
|
rssStr += "<rss version=\"2.0\">\n"
|
||||||
|
rssStr += '<channel>\n'
|
||||||
|
rssStr += ' <title>#categories</title>\n'
|
||||||
|
|
||||||
|
rssDateStr = \
|
||||||
|
datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S UT")
|
||||||
|
|
||||||
|
for categoryStr, hashtagList in hashtagCategories.items():
|
||||||
|
rssStr += '<item>\n'
|
||||||
|
rssStr += ' <title>' + categoryStr + '</title>\n'
|
||||||
|
listStr = ''
|
||||||
|
for hashtag in hashtagList:
|
||||||
|
listStr += hashtag + ' '
|
||||||
|
rssStr += ' <description>' + listStr.strip() + '</description>\n'
|
||||||
|
rssStr += ' <link></link>\n'
|
||||||
|
rssStr += ' <pubDate>' + rssDateStr + '</pubDate>\n'
|
||||||
|
rssStr += '</item>\n'
|
||||||
|
|
||||||
|
rssStr = '</channel>'
|
||||||
|
rssStr += '</rss>'
|
||||||
|
return rssStr
|
||||||
|
|
||||||
|
|
||||||
def getHashtagDomainMax(domainHistogram: {}) -> str:
|
def getHashtagDomainMax(domainHistogram: {}) -> str:
|
||||||
|
|
Loading…
Reference in New Issue