forked from indymedia/epicyon
Move rss functions
parent
bf04691ba2
commit
4d4bdceb76
|
@ -28,7 +28,7 @@ from posts import getUserUrl
|
||||||
from posts import checkDomains
|
from posts import checkDomains
|
||||||
from session import createSession
|
from session import createSession
|
||||||
from session import getJson
|
from session import getJson
|
||||||
from session import getRSS
|
from newswire import getRSS
|
||||||
from filters import addFilter
|
from filters import addFilter
|
||||||
from filters import removeFilter
|
from filters import removeFilter
|
||||||
import os
|
import os
|
||||||
|
|
|
@ -0,0 +1,130 @@
|
||||||
|
__filename__ = "newswire.py"
|
||||||
|
__author__ = "Bob Mottram"
|
||||||
|
__license__ = "AGPL3+"
|
||||||
|
__version__ = "1.1.0"
|
||||||
|
__maintainer__ = "Bob Mottram"
|
||||||
|
__email__ = "bob@freedombone.net"
|
||||||
|
__status__ = "Production"
|
||||||
|
|
||||||
|
import os
|
||||||
|
import requests
|
||||||
|
from socket import error as SocketError
|
||||||
|
import errno
|
||||||
|
from datetime import datetime
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
|
||||||
|
def xml2StrToDict(xmlStr: str) -> {}:
|
||||||
|
"""Converts an xml 2.0 string to a dictionary
|
||||||
|
"""
|
||||||
|
if '<item>' not in xmlStr:
|
||||||
|
return {}
|
||||||
|
result = {}
|
||||||
|
rssItems = xmlStr.split('<item>')
|
||||||
|
for rssItem in rssItems:
|
||||||
|
if '<title>' not in rssItem:
|
||||||
|
continue
|
||||||
|
if '</title>' not in rssItem:
|
||||||
|
continue
|
||||||
|
if '<link>' not in rssItem:
|
||||||
|
continue
|
||||||
|
if '</link>' not in rssItem:
|
||||||
|
continue
|
||||||
|
if '<pubDate>' not in rssItem:
|
||||||
|
continue
|
||||||
|
if '</pubDate>' not in rssItem:
|
||||||
|
continue
|
||||||
|
title = rssItem.split('<title>')[1]
|
||||||
|
title = title.split('</title>')[0]
|
||||||
|
link = rssItem.split('<link>')[1]
|
||||||
|
link = link.split('</link>')[0]
|
||||||
|
pubDate = rssItem.split('<pubDate>')[1]
|
||||||
|
pubDate = pubDate.split('</pubDate>')[0]
|
||||||
|
parsed = False
|
||||||
|
try:
|
||||||
|
publishedDate = \
|
||||||
|
datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %z")
|
||||||
|
result[str(publishedDate)] = [title, link]
|
||||||
|
parsed = True
|
||||||
|
except BaseException:
|
||||||
|
pass
|
||||||
|
if not parsed:
|
||||||
|
try:
|
||||||
|
publishedDate = \
|
||||||
|
datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S UT")
|
||||||
|
result[str(publishedDate) + '+00:00'] = [title, link]
|
||||||
|
parsed = True
|
||||||
|
except BaseException:
|
||||||
|
print('WARN: unrecognized RSS date format: ' + pubDate)
|
||||||
|
pass
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def xmlStrToDict(xmlStr: str) -> {}:
|
||||||
|
"""Converts an xml string to a dictionary
|
||||||
|
"""
|
||||||
|
if 'rss version="2.0"' in xmlStr:
|
||||||
|
return xml2StrToDict(xmlStr)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def getRSS(session, url: str) -> {}:
|
||||||
|
"""Returns an RSS url as a dict
|
||||||
|
"""
|
||||||
|
if not isinstance(url, str):
|
||||||
|
print('url: ' + str(url))
|
||||||
|
print('ERROR: getRSS url should be a string')
|
||||||
|
return None
|
||||||
|
headers = {
|
||||||
|
'Accept': 'text/xml; charset=UTF-8'
|
||||||
|
}
|
||||||
|
params = None
|
||||||
|
sessionParams = {}
|
||||||
|
sessionHeaders = {}
|
||||||
|
if headers:
|
||||||
|
sessionHeaders = headers
|
||||||
|
if params:
|
||||||
|
sessionParams = params
|
||||||
|
sessionHeaders['User-Agent'] = \
|
||||||
|
'Mozilla/5.0 (X11; Linux x86_64; rv:81.0) Gecko/20100101 Firefox/81.0'
|
||||||
|
if not session:
|
||||||
|
print('WARN: no session specified for getRSS')
|
||||||
|
try:
|
||||||
|
result = session.get(url, headers=sessionHeaders, params=sessionParams)
|
||||||
|
return xmlStrToDict(result.text)
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print('ERROR: getRSS failed\nurl: ' + str(url) + '\n' +
|
||||||
|
'headers: ' + str(sessionHeaders) + '\n' +
|
||||||
|
'params: ' + str(sessionParams) + '\n')
|
||||||
|
print(e)
|
||||||
|
except ValueError as e:
|
||||||
|
print('ERROR: getRSS failed\nurl: ' + str(url) + '\n' +
|
||||||
|
'headers: ' + str(sessionHeaders) + '\n' +
|
||||||
|
'params: ' + str(sessionParams) + '\n')
|
||||||
|
print(e)
|
||||||
|
except SocketError as e:
|
||||||
|
if e.errno == errno.ECONNRESET:
|
||||||
|
print('WARN: connection was reset during getRSS')
|
||||||
|
print(e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def getRSSFromSubscriptions(session, subscriptionsFilename: str) -> {}:
|
||||||
|
"""Gets rss feeds as a dictionary from a list of feeds stored in a file
|
||||||
|
"""
|
||||||
|
if not os.path.isfile(subscriptionsFilename):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
rssFeed = []
|
||||||
|
with open(subscriptionsFilename, 'r') as fp:
|
||||||
|
rssFeed = fp.readlines()
|
||||||
|
result = {}
|
||||||
|
for url in rssFeed:
|
||||||
|
url = url.strip()
|
||||||
|
if '://' not in url:
|
||||||
|
continue
|
||||||
|
if url.startswith('#'):
|
||||||
|
continue
|
||||||
|
result = dict(result.items() + getRSS(session, url).items())
|
||||||
|
sortedResult = OrderedDict(sorted(result.items(), reverse=False))
|
||||||
|
return sortedResult
|
118
session.py
118
session.py
|
@ -12,8 +12,6 @@ from utils import urlPermitted
|
||||||
import json
|
import json
|
||||||
from socket import error as SocketError
|
from socket import error as SocketError
|
||||||
import errno
|
import errno
|
||||||
from datetime import datetime
|
|
||||||
from collections import OrderedDict
|
|
||||||
|
|
||||||
baseDirectory = None
|
baseDirectory = None
|
||||||
|
|
||||||
|
@ -94,122 +92,6 @@ def getJson(session, url: str, headers: {}, params: {},
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def xml2StrToDict(xmlStr: str) -> {}:
|
|
||||||
"""Converts an xml 2.0 string to a dictionary
|
|
||||||
"""
|
|
||||||
if '<item>' not in xmlStr:
|
|
||||||
return {}
|
|
||||||
result = {}
|
|
||||||
rssItems = xmlStr.split('<item>')
|
|
||||||
for rssItem in rssItems:
|
|
||||||
if '<title>' not in rssItem:
|
|
||||||
continue
|
|
||||||
if '</title>' not in rssItem:
|
|
||||||
continue
|
|
||||||
if '<link>' not in rssItem:
|
|
||||||
continue
|
|
||||||
if '</link>' not in rssItem:
|
|
||||||
continue
|
|
||||||
if '<pubDate>' not in rssItem:
|
|
||||||
continue
|
|
||||||
if '</pubDate>' not in rssItem:
|
|
||||||
continue
|
|
||||||
title = rssItem.split('<title>')[1]
|
|
||||||
title = title.split('</title>')[0]
|
|
||||||
link = rssItem.split('<link>')[1]
|
|
||||||
link = link.split('</link>')[0]
|
|
||||||
pubDate = rssItem.split('<pubDate>')[1]
|
|
||||||
pubDate = pubDate.split('</pubDate>')[0]
|
|
||||||
parsed = False
|
|
||||||
try:
|
|
||||||
publishedDate = \
|
|
||||||
datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %z")
|
|
||||||
result[str(publishedDate)] = [title, link]
|
|
||||||
parsed = True
|
|
||||||
except BaseException:
|
|
||||||
pass
|
|
||||||
if not parsed:
|
|
||||||
try:
|
|
||||||
publishedDate = \
|
|
||||||
datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S UT")
|
|
||||||
result[str(publishedDate) + '+00:00'] = [title, link]
|
|
||||||
parsed = True
|
|
||||||
except BaseException:
|
|
||||||
print('WARN: unrecognized RSS date format: ' + pubDate)
|
|
||||||
pass
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def xmlStrToDict(xmlStr: str) -> {}:
|
|
||||||
"""Converts an xml string to a dictionary
|
|
||||||
"""
|
|
||||||
if 'rss version="2.0"' in xmlStr:
|
|
||||||
return xml2StrToDict(xmlStr)
|
|
||||||
return {}
|
|
||||||
|
|
||||||
|
|
||||||
def getRSS(session, url: str) -> {}:
|
|
||||||
"""Returns an RSS url as a dict
|
|
||||||
"""
|
|
||||||
if not isinstance(url, str):
|
|
||||||
print('url: ' + str(url))
|
|
||||||
print('ERROR: getRSS url should be a string')
|
|
||||||
return None
|
|
||||||
headers = {
|
|
||||||
'Accept': 'text/xml; charset=UTF-8'
|
|
||||||
}
|
|
||||||
params = None
|
|
||||||
sessionParams = {}
|
|
||||||
sessionHeaders = {}
|
|
||||||
if headers:
|
|
||||||
sessionHeaders = headers
|
|
||||||
if params:
|
|
||||||
sessionParams = params
|
|
||||||
sessionHeaders['User-Agent'] = \
|
|
||||||
'Mozilla/5.0 (X11; Linux x86_64; rv:81.0) Gecko/20100101 Firefox/81.0'
|
|
||||||
if not session:
|
|
||||||
print('WARN: no session specified for getRSS')
|
|
||||||
try:
|
|
||||||
result = session.get(url, headers=sessionHeaders, params=sessionParams)
|
|
||||||
return xmlStrToDict(result.text)
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
print('ERROR: getRSS failed\nurl: ' + str(url) + '\n' +
|
|
||||||
'headers: ' + str(sessionHeaders) + '\n' +
|
|
||||||
'params: ' + str(sessionParams) + '\n')
|
|
||||||
print(e)
|
|
||||||
except ValueError as e:
|
|
||||||
print('ERROR: getRSS failed\nurl: ' + str(url) + '\n' +
|
|
||||||
'headers: ' + str(sessionHeaders) + '\n' +
|
|
||||||
'params: ' + str(sessionParams) + '\n')
|
|
||||||
print(e)
|
|
||||||
except SocketError as e:
|
|
||||||
if e.errno == errno.ECONNRESET:
|
|
||||||
print('WARN: connection was reset during getRSS')
|
|
||||||
print(e)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def getRSSFromSubscriptions(session, subscriptionsFilename: str) -> {}:
|
|
||||||
"""Gets rss feeds as a dictionary from a list of feeds stored in a file
|
|
||||||
"""
|
|
||||||
if not os.path.isfile(subscriptionsFilename):
|
|
||||||
return {}
|
|
||||||
|
|
||||||
rssFeed = []
|
|
||||||
with open(subscriptionsFilename, 'r') as fp:
|
|
||||||
rssFeed = fp.readlines()
|
|
||||||
result = {}
|
|
||||||
for url in rssFeed:
|
|
||||||
url = url.strip()
|
|
||||||
if '://' not in url:
|
|
||||||
continue
|
|
||||||
if url.startswith('#'):
|
|
||||||
continue
|
|
||||||
result = dict(result.items() + getRSS(session, url).items())
|
|
||||||
sortedResult = OrderedDict(sorted(result.items(), reverse=False))
|
|
||||||
return sortedResult
|
|
||||||
|
|
||||||
|
|
||||||
def postJson(session, postJsonObject: {}, federationList: [],
|
def postJson(session, postJsonObject: {}, federationList: [],
|
||||||
inboxUrl: str, headers: {}) -> str:
|
inboxUrl: str, headers: {}) -> str:
|
||||||
"""Post a json message to the inbox of another person
|
"""Post a json message to the inbox of another person
|
||||||
|
|
Loading…
Reference in New Issue