Merge branch 'main' of ssh://code.freedombone.net:2222/bashrc/epicyon into main
94
newswire.py
|
|
@ -258,7 +258,7 @@ def xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
|
||||||
maxPostsPerSource: int,
|
maxPostsPerSource: int,
|
||||||
maxFeedItemSizeKb: int,
|
maxFeedItemSizeKb: int,
|
||||||
maxCategoriesFeedItemSizeKb: int) -> {}:
|
maxCategoriesFeedItemSizeKb: int) -> {}:
|
||||||
"""Converts an xml 2.0 string to a dictionary
|
"""Converts an xml RSS 2.0 string to a dictionary
|
||||||
"""
|
"""
|
||||||
if '<item>' not in xmlStr:
|
if '<item>' not in xmlStr:
|
||||||
return {}
|
return {}
|
||||||
|
|
@ -328,7 +328,90 @@ def xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
|
||||||
if postCtr >= maxPostsPerSource:
|
if postCtr >= maxPostsPerSource:
|
||||||
break
|
break
|
||||||
if postCtr > 0:
|
if postCtr > 0:
|
||||||
print('Added ' + str(postCtr) + ' rss feed items to newswire')
|
print('Added ' + str(postCtr) + ' rss 2.0 feed items to newswire')
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def xml1StrToDict(baseDir: str, domain: str, xmlStr: str,
|
||||||
|
moderated: bool, mirrored: bool,
|
||||||
|
maxPostsPerSource: int,
|
||||||
|
maxFeedItemSizeKb: int,
|
||||||
|
maxCategoriesFeedItemSizeKb: int) -> {}:
|
||||||
|
"""Converts an xml RSS 1.0 string to a dictionary
|
||||||
|
https://validator.w3.org/feed/docs/rss1.html
|
||||||
|
"""
|
||||||
|
itemStr = '<item'
|
||||||
|
if itemStr not in xmlStr:
|
||||||
|
return {}
|
||||||
|
result = {}
|
||||||
|
|
||||||
|
# is this an rss feed containing hashtag categories?
|
||||||
|
if '<title>#categories</title>' in xmlStr:
|
||||||
|
xml2StrToHashtagCategories(baseDir, xmlStr,
|
||||||
|
maxCategoriesFeedItemSizeKb)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
rssItems = xmlStr.split(itemStr)
|
||||||
|
postCtr = 0
|
||||||
|
maxBytes = maxFeedItemSizeKb * 1024
|
||||||
|
for rssItem in rssItems:
|
||||||
|
if not rssItem:
|
||||||
|
continue
|
||||||
|
if len(rssItem) > maxBytes:
|
||||||
|
print('WARN: rss 1.0 feed item is too big')
|
||||||
|
continue
|
||||||
|
if rssItem.startswith('s>'):
|
||||||
|
continue
|
||||||
|
if '<title>' not in rssItem:
|
||||||
|
continue
|
||||||
|
if '</title>' not in rssItem:
|
||||||
|
continue
|
||||||
|
if '<link>' not in rssItem:
|
||||||
|
continue
|
||||||
|
if '</link>' not in rssItem:
|
||||||
|
continue
|
||||||
|
if '<dc:date>' not in rssItem:
|
||||||
|
continue
|
||||||
|
if '</dc:date>' not in rssItem:
|
||||||
|
continue
|
||||||
|
title = rssItem.split('<title>')[1]
|
||||||
|
title = removeCDATA(title.split('</title>')[0])
|
||||||
|
description = ''
|
||||||
|
if '<description>' in rssItem and '</description>' in rssItem:
|
||||||
|
description = rssItem.split('<description>')[1]
|
||||||
|
description = removeCDATA(description.split('</description>')[0])
|
||||||
|
else:
|
||||||
|
if '<media:description>' in rssItem and \
|
||||||
|
'</media:description>' in rssItem:
|
||||||
|
description = rssItem.split('<media:description>')[1]
|
||||||
|
description = description.split('</media:description>')[0]
|
||||||
|
description = removeCDATA(description)
|
||||||
|
link = rssItem.split('<link>')[1]
|
||||||
|
link = link.split('</link>')[0]
|
||||||
|
if '://' not in link:
|
||||||
|
continue
|
||||||
|
itemDomain = link.split('://')[1]
|
||||||
|
if '/' in itemDomain:
|
||||||
|
itemDomain = itemDomain.split('/')[0]
|
||||||
|
if isBlockedDomain(baseDir, itemDomain):
|
||||||
|
continue
|
||||||
|
pubDate = rssItem.split('<dc:date>')[1]
|
||||||
|
pubDate = pubDate.split('</dc:date>')[0]
|
||||||
|
|
||||||
|
pubDateStr = parseFeedDate(pubDate)
|
||||||
|
if pubDateStr:
|
||||||
|
postFilename = ''
|
||||||
|
votesStatus = []
|
||||||
|
addNewswireDictEntry(baseDir, domain,
|
||||||
|
result, pubDateStr,
|
||||||
|
title, link,
|
||||||
|
votesStatus, postFilename,
|
||||||
|
description, moderated, mirrored)
|
||||||
|
postCtr += 1
|
||||||
|
if postCtr >= maxPostsPerSource:
|
||||||
|
break
|
||||||
|
if postCtr > 0:
|
||||||
|
print('Added ' + str(postCtr) + ' rss 1.0 feed items to newswire')
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -489,6 +572,11 @@ def xmlStrToDict(baseDir: str, domain: str, xmlStr: str,
|
||||||
xmlStr, moderated, mirrored,
|
xmlStr, moderated, mirrored,
|
||||||
maxPostsPerSource, maxFeedItemSizeKb,
|
maxPostsPerSource, maxFeedItemSizeKb,
|
||||||
maxCategoriesFeedItemSizeKb)
|
maxCategoriesFeedItemSizeKb)
|
||||||
|
elif '<?xml version="1.0"' in xmlStr:
|
||||||
|
return xml1StrToDict(baseDir, domain,
|
||||||
|
xmlStr, moderated, mirrored,
|
||||||
|
maxPostsPerSource, maxFeedItemSizeKb,
|
||||||
|
maxCategoriesFeedItemSizeKb)
|
||||||
elif 'xmlns="http://www.w3.org/2005/Atom"' in xmlStr:
|
elif 'xmlns="http://www.w3.org/2005/Atom"' in xmlStr:
|
||||||
return atomFeedToDict(baseDir, domain,
|
return atomFeedToDict(baseDir, domain,
|
||||||
xmlStr, moderated, mirrored,
|
xmlStr, moderated, mirrored,
|
||||||
|
|
@ -520,7 +608,7 @@ def getRSS(baseDir: str, domain: str, session, url: str,
|
||||||
print('ERROR: getRSS url should be a string')
|
print('ERROR: getRSS url should be a string')
|
||||||
return None
|
return None
|
||||||
headers = {
|
headers = {
|
||||||
'Accept': 'text/xml; charset=UTF-8'
|
'Accept': 'text/xml, application/xml; charset=UTF-8'
|
||||||
}
|
}
|
||||||
params = None
|
params = None
|
||||||
sessionParams = {}
|
sessionParams = {}
|
||||||
|
|
|
||||||
4
tests.py
|
|
@ -2437,6 +2437,10 @@ def testFirstParagraphFromString():
|
||||||
def testParseFeedDate():
|
def testParseFeedDate():
|
||||||
print('testParseFeedDate')
|
print('testParseFeedDate')
|
||||||
|
|
||||||
|
pubDate = "2020-12-14T00:08:06+00:00"
|
||||||
|
publishedDate = parseFeedDate(pubDate)
|
||||||
|
assert publishedDate == "2020-12-14 00:08:06+00:00"
|
||||||
|
|
||||||
pubDate = "Tue, 08 Dec 2020 06:24:38 -0600"
|
pubDate = "Tue, 08 Dec 2020 06:24:38 -0600"
|
||||||
publishedDate = parseFeedDate(pubDate)
|
publishedDate = parseFeedDate(pubDate)
|
||||||
assert publishedDate == "2020-12-08 12:24:38+00:00"
|
assert publishedDate == "2020-12-08 12:24:38+00:00"
|
||||||
|
|
|
||||||
|
Before Width: | Height: | Size: 3.6 KiB After Width: | Height: | Size: 1.4 KiB |
|
Before Width: | Height: | Size: 1.5 KiB After Width: | Height: | Size: 1.4 KiB |
|
Before Width: | Height: | Size: 1.1 KiB After Width: | Height: | Size: 1.1 KiB |
|
Before Width: | Height: | Size: 1.4 KiB After Width: | Height: | Size: 1.4 KiB |
|
Before Width: | Height: | Size: 1.4 KiB After Width: | Height: | Size: 1.4 KiB |
|
Before Width: | Height: | Size: 1.4 KiB After Width: | Height: | Size: 1.4 KiB |
|
After Width: | Height: | Size: 1.2 KiB |
|
Before Width: | Height: | Size: 969 B After Width: | Height: | Size: 1.4 KiB |
|
Before Width: | Height: | Size: 5.8 KiB After Width: | Height: | Size: 12 KiB |
|
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 12 KiB |
|
|
@ -1,4 +1,6 @@
|
||||||
{
|
{
|
||||||
|
"post-separator-margin-top": "10px",
|
||||||
|
"post-separator-margin-bottom": "10px",
|
||||||
"time-color": "grey",
|
"time-color": "grey",
|
||||||
"event-color": "white",
|
"event-color": "white",
|
||||||
"login-bg-color": "#567726",
|
"login-bg-color": "#567726",
|
||||||
|
|
|
||||||