epicyon/newswire.py

__filename__ = "newswire.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.1.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@freedombone.net"
__status__ = "Production"

import os
import requests
from socket import error as SocketError
import errno
from datetime import datetime
from collections import OrderedDict
from utils import locatePost
from utils import loadJson
from utils import saveJson
from utils import isSuspended


def rss2Header(httpPrefix: str,
               nickname: str, domainFull: str,
               title: str, translate: {}) -> str:
    """Header for an RSS 2.0 feed
    """
    rssStr = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
    rssStr += "<rss version=\"2.0\">"
    rssStr += '<channel>'

    if title.startswith('News'):
        rssStr += '    <title>Newswire</title>'
        rssStr += '    <link>' + httpPrefix + '://' + domainFull + \
            '/newswire.xml' + '</link>'
    elif title.startswith('Site'):
        rssStr += '    <title>' + domainFull + '</title>'
        rssStr += '    <link>' + httpPrefix + '://' + domainFull + \
            '/blog/rss.xml' + '</link>'
    else:
        rssStr += '    <title>' + translate[title] + '</title>'
        rssStr += '    <link>' + httpPrefix + '://' + domainFull + \
            '/users/' + nickname + '/rss.xml' + '</link>'
    return rssStr


def rss2Footer() -> str:
    """Footer for an RSS 2.0 feed
    """
    rssStr = '</channel>'
    rssStr += '</rss>'
    return rssStr


def xml2StrToDict(xmlStr: str, moderated: bool,
                  maxPostsPerSource: int) -> {}:
    """Converts an xml 2.0 string to a dictionary
    """
    if '<item>' not in xmlStr:
        return {}
    result = {}
    rssItems = xmlStr.split('<item>')
    postCtr = 0
    for rssItem in rssItems:
        if '<title>' not in rssItem:
            continue
        if '</title>' not in rssItem:
            continue
        if '<link>' not in rssItem:
            continue
        if '</link>' not in rssItem:
            continue
        if '<pubDate>' not in rssItem:
            continue
        if '</pubDate>' not in rssItem:
            continue
        title = rssItem.split('<title>')[1]
        title = title.split('</title>')[0]
        description = ''
        if '<description>' in rssItem and '</description>' in rssItem:
            description = rssItem.split('<description>')[1]
            description = description.split('</description>')[0]
        link = rssItem.split('<link>')[1]
        link = link.split('</link>')[0]
        pubDate = rssItem.split('<pubDate>')[1]
        pubDate = pubDate.split('</pubDate>')[0]
        parsed = False
        try:
            publishedDate = \
                datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %z")
            postFilename = ''
            votesStatus = []
            result[str(publishedDate)] = [title, link,
                                          votesStatus, postFilename,
                                          description, moderated]
            postCtr += 1
            if postCtr >= maxPostsPerSource:
                break
            parsed = True
        except BaseException:
            pass
        if not parsed:
            try:
                publishedDate = \
                    datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S UT")
                postFilename = ''
                votesStatus = []
                result[str(publishedDate) + '+00:00'] = \
                    [title, link,
                     votesStatus, postFilename,
                     description, moderated]
                postCtr += 1
                if postCtr >= maxPostsPerSource:
                    break
                parsed = True
            except BaseException:
                print('WARN: unrecognized RSS date format: ' + pubDate)
                pass
    return result


def atomFeedToDict(xmlStr: str, moderated: bool,
                   maxPostsPerSource: int) -> {}:
    """Converts an atom feed string to a dictionary
    """
    if '<entry>' not in xmlStr:
        return {}
    result = {}
    rssItems = xmlStr.split('<entry>')
    postCtr = 0
    for rssItem in rssItems:
        if '<title>' not in rssItem:
            continue
        if '</title>' not in rssItem:
            continue
        if '<link>' not in rssItem:
            continue
        if '</link>' not in rssItem:
            continue
        if '<updated>' not in rssItem:
            continue
        if '</updated>' not in rssItem:
            continue
        title = rssItem.split('<title>')[1]
        title = title.split('</title>')[0]
        description = ''
        if '<summary>' in rssItem and '</summary>' in rssItem:
            description = rssItem.split('<summary>')[1]
            description = description.split('</summary>')[0]
        link = rssItem.split('<link>')[1]
        link = link.split('</link>')[0]
        pubDate = rssItem.split('<updated>')[1]
        pubDate = pubDate.split('</updated>')[0]
        parsed = False
        try:
            publishedDate = \
                datetime.strptime(pubDate, "%Y-%m-%dT%H:%M:%SZ")
            postFilename = ''
            votesStatus = []
            result[str(publishedDate)] = [title, link,
                                          votesStatus, postFilename,
                                          description, moderated]
            postCtr += 1
            if postCtr >= maxPostsPerSource:
                break
            parsed = True
        except BaseException:
            pass
        if not parsed:
            try:
                publishedDate = \
                    datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S UT")
                postFilename = ''
                votesStatus = []
                result[str(publishedDate) + '+00:00'] = \
                    [title, link,
                     votesStatus, postFilename,
                     description, moderated]
                postCtr += 1
                if postCtr >= maxPostsPerSource:
                    break
                parsed = True
            except BaseException:
                print('WARN: unrecognized atom feed date format: ' + pubDate)
                pass
    return result


def xmlStrToDict(xmlStr: str, moderated: bool,
                 maxPostsPerSource: int) -> {}:
    """Converts an xml string to a dictionary
    """
    if 'rss version="2.0"' in xmlStr:
        return xml2StrToDict(xmlStr, moderated, maxPostsPerSource)
    elif 'xmlns="http://www.w3.org/2005/Atom"' in xmlStr:
        return atomFeedToDict(xmlStr, moderated, maxPostsPerSource)
    return {}


def getRSS(session, url: str, moderated: bool,
           maxPostsPerSource: int,
           maxFeedSizeKb: int) -> {}:
    """Returns an RSS url as a dict
    """
    if not isinstance(url, str):
        print('url: ' + str(url))
        print('ERROR: getRSS url should be a string')
        return None
    headers = {
        'Accept': 'text/xml; charset=UTF-8'
    }
    params = None
    sessionParams = {}
    sessionHeaders = {}
    if headers:
        sessionHeaders = headers
    if params:
        sessionParams = params
    sessionHeaders['User-Agent'] = \
        'Mozilla/5.0 (X11; Linux x86_64; rv:81.0) Gecko/20100101 Firefox/81.0'
    if not session:
        print('WARN: no session specified for getRSS')
    try:
        result = session.get(url, headers=sessionHeaders, params=sessionParams)
        if result:
            if int(len(result) / 1024) < maxFeedSizeKb:
                return xmlStrToDict(result.text, moderated, maxPostsPerSource)
            else:
                print('WARN: feed is too large: ' + url)
    except requests.exceptions.RequestException as e:
        print('ERROR: getRSS failed\nurl: ' + str(url) + '\n' +
              'headers: ' + str(sessionHeaders) + '\n' +
              'params: ' + str(sessionParams) + '\n')
        print(e)
    except ValueError as e:
        print('ERROR: getRSS failed\nurl: ' + str(url) + '\n' +
              'headers: ' + str(sessionHeaders) + '\n' +
              'params: ' + str(sessionParams) + '\n')
        print(e)
    except SocketError as e:
        if e.errno == errno.ECONNRESET:
            print('WARN: connection was reset during getRSS')
        print(e)
    return None


def getRSSfromDict(baseDir: str, newswire: {},
                   httpPrefix: str, domainFull: str,
                   title: str, translate: {}) -> str:
    """Returns an rss feed from the current newswire dict.
    This allows other instances to subscribe to the same newswire
    """
    rssStr = rss2Header(httpPrefix,
                        None, domainFull,
                        'Newswire', translate)
    for published, fields in newswire.items():
        published = published.replace('+00:00', 'Z').strip()
        published = published.replace(' ', 'T')
        try:
            pubDate = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
        except BaseException:
            continue
        rssStr += '<item>\n'
        rssStr += '  <title>' + fields[0] + '</title>\n'
        url = fields[1]
        if domainFull not in url:
            url = httpPrefix + '://' + domainFull + url
        rssStr += '  <link>' + url + '</link>\n'

        rssDateStr = pubDate.strftime("%a, %d %b %Y %H:%M:%S UT")
        rssStr += '  <pubDate>' + rssDateStr + '</pubDate>\n'
        rssStr += '</item>\n'
    rssStr += rss2Footer()
    return rssStr


def isaBlogPost(postJsonObject: {}) -> bool:
    """Is the given object a blog post?
    """
    if not postJsonObject:
        return False
    if not postJsonObject.get('object'):
        return False
    if not isinstance(postJsonObject['object'], dict):
        return False
    if postJsonObject['object'].get('summary') and \
       postJsonObject['object'].get('url') and \
       postJsonObject['object'].get('published'):
        return True
    return False


def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
                              newswire: {},
                              maxBlogsPerAccount: int,
                              indexFilename: str) -> None:
    """Adds blogs for the given account to the newswire
    """
    if not os.path.isfile(indexFilename):
        return
    # local blog entries are unmoderated by default
    moderated = False

    # local blogs can potentially be moderated
    moderatedFilename = \
        baseDir + '/accounts/' + nickname + '@' + domain + \
        '/.newswiremoderated'
    if os.path.isfile(moderatedFilename):
        moderated = True

    with open(indexFilename, 'r') as indexFile:
        postFilename = 'start'
        ctr = 0
        while postFilename:
            postFilename = indexFile.readline()
            if postFilename:
                # if this is a full path then remove the directories
                if '/' in postFilename:
                    postFilename = postFilename.split('/')[-1]

                # filename of the post without any extension or path
                # This should also correspond to any index entry in
                # the posts cache
                postUrl = \
                    postFilename.replace('\n', '').replace('\r', '')
                postUrl = postUrl.replace('.json', '').strip()

                # read the post from file
                fullPostFilename = \
                    locatePost(baseDir, nickname,
                               domain, postUrl, False)
                if not fullPostFilename:
                    print('Unable to locate post ' + postUrl)
                    ctr += 1
                    if ctr >= maxBlogsPerAccount:
                        break
                    continue

                postJsonObject = None
                if fullPostFilename:
                    postJsonObject = loadJson(fullPostFilename)
                if isaBlogPost(postJsonObject):
                    published = postJsonObject['object']['published']
                    published = published.replace('T', ' ')
                    published = published.replace('Z', '+00:00')
                    votes = []
                    if os.path.isfile(fullPostFilename + '.votes'):
                        votes = loadJson(fullPostFilename + '.votes')
                    description = ''
                    newswire[published] = \
                        [postJsonObject['object']['summary'],
                         postJsonObject['object']['url'], votes,
                         fullPostFilename, description, moderated]

            ctr += 1
            if ctr >= maxBlogsPerAccount:
                break


def addBlogsToNewswire(baseDir: str, newswire: {},
                       maxBlogsPerAccount: int) -> None:
    """Adds blogs from each user account into the newswire
    """
    moderationDict = {}

    # go through each account
    for subdir, dirs, files in os.walk(baseDir + '/accounts'):
        for handle in dirs:
            if '@' not in handle:
                continue
            if 'inbox@' in handle:
                continue

            nickname = handle.split('@')[0]

            # has this account been suspended?
            if isSuspended(baseDir, nickname):
                continue

            if os.path.isfile(baseDir + '/accounts/' + handle +
                              '/.nonewswire'):
                continue

            # is there a blogs timeline for this account?
            accountDir = os.path.join(baseDir + '/accounts', handle)
            blogsIndex = accountDir + '/tlblogs.index'
            if os.path.isfile(blogsIndex):
                domain = handle.split('@')[1]
                addAccountBlogsToNewswire(baseDir, nickname, domain,
                                          newswire, maxBlogsPerAccount,
                                          blogsIndex)

    # sort the moderation dict into chronological order, latest first
    sortedModerationDict = \
        OrderedDict(sorted(moderationDict.items(), reverse=True))
    # save the moderation queue details for later display
    newswireModerationFilename = baseDir + '/accounts/newswiremoderation.txt'
    if sortedModerationDict:
        saveJson(sortedModerationDict, newswireModerationFilename)
    else:
        # remove the file if there is nothing to moderate
        if os.path.isfile(newswireModerationFilename):
            os.remove(newswireModerationFilename)


def getDictFromNewswire(session, baseDir: str,
                        maxPostsPerSource: int, maxFeedSizeKb: int) -> {}:
    """Gets rss feeds as a dictionary from newswire file
    """
    subscriptionsFilename = baseDir + '/accounts/newswire.txt'
    if not os.path.isfile(subscriptionsFilename):
        return {}

    maxPostsPerSource = 5

    # add rss feeds
    rssFeed = []
    with open(subscriptionsFilename, 'r') as fp:
        rssFeed = fp.readlines()
    result = {}
    for url in rssFeed:
        url = url.strip()

        # Does this contain a url?
        if '://' not in url:
            continue

        # is this a comment?
        if url.startswith('#'):
            continue

        # should this feed be moderated?
        moderated = False
        if '*' in url:
            moderated = True
            url = url.replace('*', '').strip()

        itemsList = getRSS(session, url, moderated,
                           maxPostsPerSource, maxFeedSizeKb)
        for dateStr, item in itemsList.items():
            result[dateStr] = item

    # add blogs from each user account
    addBlogsToNewswire(baseDir, result, maxPostsPerSource)

    # sort into chronological order, latest first
    sortedResult = OrderedDict(sorted(result.items(), reverse=True))
    return sortedResult
Move rss functions 2020-10-04 09:51:12 +00:00			`__filename__ = "newswire.py"`
			`__author__ = "Bob Mottram"`
			`__license__ = "AGPL3+"`
			`__version__ = "1.1.0"`
			`__maintainer__ = "Bob Mottram"`
			`__email__ = "bob@freedombone.net"`
			`__status__ = "Production"`

			`import os`
			`import requests`
			`from socket import error as SocketError`
			`import errno`
			`from datetime import datetime`
			`from collections import OrderedDict`
Add local blog posts to the newswire 2020-10-05 11:11:48 +00:00			`from utils import locatePost`
			`from utils import loadJson`
Save the current newswire moderation state to a file 2020-10-06 11:28:32 +00:00			`from utils import saveJson`
Tidying 2020-10-06 08:58:44 +00:00			`from utils import isSuspended`
Move rss functions 2020-10-04 09:51:12 +00:00

Newswire rss feed 2020-10-04 12:29:07 +00:00			`def rss2Header(httpPrefix: str,`
			`nickname: str, domainFull: str,`
			`title: str, translate: {}) -> str:`
Comment 2020-10-06 09:22:23 +00:00			`"""Header for an RSS 2.0 feed`
			`"""`
Newswire rss feed 2020-10-04 12:29:07 +00:00			`rssStr = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"`
			`rssStr += "<rss version=\"2.0\">"`
			`rssStr += '<channel>'`
Title for site rss feed 2020-10-13 17:14:57 +00:00
Newswire rss feed 2020-10-04 12:29:07 +00:00			`if title.startswith('News'):`
			`rssStr += ' <title>Newswire</title>'`
			`rssStr += ' <link>' + httpPrefix + '://' + domainFull + \`
			`'/newswire.xml' + '</link>'`
Title for site rss feed 2020-10-13 17:14:57 +00:00			`elif title.startswith('Site'):`
			`rssStr += ' <title>' + domainFull + '</title>'`
			`rssStr += ' <link>' + httpPrefix + '://' + domainFull + \`
Singular 2020-10-13 17:17:17 +00:00			`'/blog/rss.xml' + '</link>'`
Newswire rss feed 2020-10-04 12:29:07 +00:00			`else:`
Title for site rss feed 2020-10-13 17:14:57 +00:00			`rssStr += ' <title>' + translate[title] + '</title>'`
Newswire rss feed 2020-10-04 12:29:07 +00:00			`rssStr += ' <link>' + httpPrefix + '://' + domainFull + \`
			`'/users/' + nickname + '/rss.xml' + '</link>'`
			`return rssStr`


			`def rss2Footer() -> str:`
Comment 2020-10-06 09:22:23 +00:00			`"""Footer for an RSS 2.0 feed`
			`"""`
Newswire rss feed 2020-10-04 12:29:07 +00:00			`rssStr = '</channel>'`
			`rssStr += '</rss>'`
			`return rssStr`


Set an upper limit on the number of newswire posts per rss feed Helps to avoid having a giant list of items 2020-10-16 10:13:14 +00:00			`def xml2StrToDict(xmlStr: str, moderated: bool,`
			`maxPostsPerSource: int) -> {}:`
Move rss functions 2020-10-04 09:51:12 +00:00			`"""Converts an xml 2.0 string to a dictionary`
			`"""`
			`if '<item>' not in xmlStr:`
			`return {}`
			`result = {}`
			`rssItems = xmlStr.split('<item>')`
Set an upper limit on the number of newswire posts per rss feed Helps to avoid having a giant list of items 2020-10-16 10:13:14 +00:00			`postCtr = 0`
Move rss functions 2020-10-04 09:51:12 +00:00			`for rssItem in rssItems:`
			`if '<title>' not in rssItem:`
			`continue`
			`if '</title>' not in rssItem:`
			`continue`
			`if '<link>' not in rssItem:`
			`continue`
			`if '</link>' not in rssItem:`
			`continue`
			`if '<pubDate>' not in rssItem:`
			`continue`
			`if '</pubDate>' not in rssItem:`
			`continue`
			`title = rssItem.split('<title>')[1]`
			`title = title.split('</title>')[0]`
Move news daemon functions 2020-10-07 12:05:49 +00:00			`description = ''`
			`if '<description>' in rssItem and '</description>' in rssItem:`
			`description = rssItem.split('<description>')[1]`
			`description = description.split('</description>')[0]`
Move rss functions 2020-10-04 09:51:12 +00:00			`link = rssItem.split('<link>')[1]`
			`link = link.split('</link>')[0]`
			`pubDate = rssItem.split('<pubDate>')[1]`
			`pubDate = pubDate.split('</pubDate>')[0]`
			`parsed = False`
			`try:`
			`publishedDate = \`
			`datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %z")`
More obvious what list entries mean 2020-10-09 08:40:41 +00:00			`postFilename = ''`
			`votesStatus = []`
			`result[str(publishedDate)] = [title, link,`
			`votesStatus, postFilename,`
Add a moderated flag to newswire entries 2020-10-09 10:33:06 +00:00			`description, moderated]`
Set an upper limit on the number of newswire posts per rss feed Helps to avoid having a giant list of items 2020-10-16 10:13:14 +00:00			`postCtr += 1`
			`if postCtr >= maxPostsPerSource:`
			`break`
Move rss functions 2020-10-04 09:51:12 +00:00			`parsed = True`
			`except BaseException:`
			`pass`
			`if not parsed:`
			`try:`
			`publishedDate = \`
			`datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S UT")`
Set an upper limit on the number of newswire posts per rss feed Helps to avoid having a giant list of items 2020-10-16 10:13:14 +00:00			`postFilename = ''`
			`votesStatus = []`
			`result[str(publishedDate) + '+00:00'] = \`
			`[title, link,`
			`votesStatus, postFilename,`
			`description, moderated]`
			`postCtr += 1`
			`if postCtr >= maxPostsPerSource:`
			`break`
Move rss functions 2020-10-04 09:51:12 +00:00			`parsed = True`
			`except BaseException:`
			`print('WARN: unrecognized RSS date format: ' + pubDate)`
			`pass`
			`return result`


Set an upper limit on the number of newswire posts per rss feed Helps to avoid having a giant list of items 2020-10-16 10:13:14 +00:00			`def atomFeedToDict(xmlStr: str, moderated: bool,`
			`maxPostsPerSource: int) -> {}:`
Same date format as rss 2020-10-10 12:24:14 +00:00			`"""Converts an atom feed string to a dictionary`
			`"""`
			`if '<entry>' not in xmlStr:`
			`return {}`
			`result = {}`
			`rssItems = xmlStr.split('<entry>')`
Set an upper limit on the number of newswire posts per rss feed Helps to avoid having a giant list of items 2020-10-16 10:13:14 +00:00			`postCtr = 0`
Same date format as rss 2020-10-10 12:24:14 +00:00			`for rssItem in rssItems:`
			`if '<title>' not in rssItem:`
			`continue`
			`if '</title>' not in rssItem:`
			`continue`
			`if '<link>' not in rssItem:`
			`continue`
			`if '</link>' not in rssItem:`
			`continue`
			`if '<updated>' not in rssItem:`
			`continue`
			`if '</updated>' not in rssItem:`
			`continue`
			`title = rssItem.split('<title>')[1]`
			`title = title.split('</title>')[0]`
			`description = ''`
			`if '<summary>' in rssItem and '</summary>' in rssItem:`
			`description = rssItem.split('<summary>')[1]`
			`description = description.split('</summary>')[0]`
			`link = rssItem.split('<link>')[1]`
			`link = link.split('</link>')[0]`
			`pubDate = rssItem.split('<updated>')[1]`
			`pubDate = pubDate.split('</updated>')[0]`
			`parsed = False`
			`try:`
			`publishedDate = \`
			`datetime.strptime(pubDate, "%Y-%m-%dT%H:%M:%SZ")`
			`postFilename = ''`
			`votesStatus = []`
			`result[str(publishedDate)] = [title, link,`
			`votesStatus, postFilename,`
			`description, moderated]`
Set an upper limit on the number of newswire posts per rss feed Helps to avoid having a giant list of items 2020-10-16 10:13:14 +00:00			`postCtr += 1`
			`if postCtr >= maxPostsPerSource:`
			`break`
Same date format as rss 2020-10-10 12:24:14 +00:00			`parsed = True`
			`except BaseException:`
			`pass`
			`if not parsed:`
			`try:`
			`publishedDate = \`
			`datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S UT")`
Set an upper limit on the number of newswire posts per rss feed Helps to avoid having a giant list of items 2020-10-16 10:13:14 +00:00			`postFilename = ''`
			`votesStatus = []`
			`result[str(publishedDate) + '+00:00'] = \`
			`[title, link,`
			`votesStatus, postFilename,`
			`description, moderated]`
			`postCtr += 1`
			`if postCtr >= maxPostsPerSource:`
			`break`
Same date format as rss 2020-10-10 12:24:14 +00:00			`parsed = True`
			`except BaseException:`
			`print('WARN: unrecognized atom feed date format: ' + pubDate)`
			`pass`
			`return result`


Set an upper limit on the number of newswire posts per rss feed Helps to avoid having a giant list of items 2020-10-16 10:13:14 +00:00			`def xmlStrToDict(xmlStr: str, moderated: bool,`
			`maxPostsPerSource: int) -> {}:`
Move rss functions 2020-10-04 09:51:12 +00:00			`"""Converts an xml string to a dictionary`
			`"""`
			`if 'rss version="2.0"' in xmlStr:`
Set an upper limit on the number of newswire posts per rss feed Helps to avoid having a giant list of items 2020-10-16 10:13:14 +00:00			`return xml2StrToDict(xmlStr, moderated, maxPostsPerSource)`
Same date format as rss 2020-10-10 12:24:14 +00:00			`elif 'xmlns="http://www.w3.org/2005/Atom"' in xmlStr:`
Set an upper limit on the number of newswire posts per rss feed Helps to avoid having a giant list of items 2020-10-16 10:13:14 +00:00			`return atomFeedToDict(xmlStr, moderated, maxPostsPerSource)`
Move rss functions 2020-10-04 09:51:12 +00:00			`return {}`


Set an upper limit on the number of newswire posts per rss feed Helps to avoid having a giant list of items 2020-10-16 10:13:14 +00:00			`def getRSS(session, url: str, moderated: bool,`
Maximum size for rss/atom feeds Prevents a hacked news source from implementing a denial of service 2020-10-16 11:40:01 +00:00			`maxPostsPerSource: int,`
			`maxFeedSizeKb: int) -> {}:`
Move rss functions 2020-10-04 09:51:12 +00:00			`"""Returns an RSS url as a dict`
			`"""`
			`if not isinstance(url, str):`
			`print('url: ' + str(url))`
			`print('ERROR: getRSS url should be a string')`
			`return None`
			`headers = {`
			`'Accept': 'text/xml; charset=UTF-8'`
			`}`
			`params = None`
			`sessionParams = {}`
			`sessionHeaders = {}`
			`if headers:`
			`sessionHeaders = headers`
			`if params:`
			`sessionParams = params`
			`sessionHeaders['User-Agent'] = \`
			`'Mozilla/5.0 (X11; Linux x86_64; rv:81.0) Gecko/20100101 Firefox/81.0'`
			`if not session:`
			`print('WARN: no session specified for getRSS')`
			`try:`
			`result = session.get(url, headers=sessionHeaders, params=sessionParams)`
Maximum size for rss/atom feeds Prevents a hacked news source from implementing a denial of service 2020-10-16 11:40:01 +00:00			`if result:`
			`if int(len(result) / 1024) < maxFeedSizeKb:`
			`return xmlStrToDict(result.text, moderated, maxPostsPerSource)`
			`else:`
			`print('WARN: feed is too large: ' + url)`
Move rss functions 2020-10-04 09:51:12 +00:00			`except requests.exceptions.RequestException as e:`
			`print('ERROR: getRSS failed\nurl: ' + str(url) + '\n' +`
			`'headers: ' + str(sessionHeaders) + '\n' +`
			`'params: ' + str(sessionParams) + '\n')`
			`print(e)`
			`except ValueError as e:`
			`print('ERROR: getRSS failed\nurl: ' + str(url) + '\n' +`
			`'headers: ' + str(sessionHeaders) + '\n' +`
			`'params: ' + str(sessionParams) + '\n')`
			`print(e)`
			`except SocketError as e:`
			`if e.errno == errno.ECONNRESET:`
			`print('WARN: connection was reset during getRSS')`
			`print(e)`
			`return None`


Newswire rss feed 2020-10-04 12:29:07 +00:00			`def getRSSfromDict(baseDir: str, newswire: {},`
			`httpPrefix: str, domainFull: str,`
			`title: str, translate: {}) -> str:`
			`"""Returns an rss feed from the current newswire dict.`
			`This allows other instances to subscribe to the same newswire`
			`"""`
			`rssStr = rss2Header(httpPrefix,`
			`None, domainFull,`
			`'Newswire', translate)`
			`for published, fields in newswire.items():`
Date format 2020-10-04 22:16:00 +00:00			`published = published.replace('+00:00', 'Z').strip()`
			`published = published.replace(' ', 'T')`
Date format 2020-10-04 22:08:13 +00:00			`try:`
Date format 2020-10-04 22:12:27 +00:00			`pubDate = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")`
Date format 2020-10-04 22:08:13 +00:00			`except BaseException:`
			`continue`
Newswire rss feed 2020-10-04 12:29:07 +00:00			`rssStr += '<item>\n'`
			`rssStr += ' <title>' + fields[0] + '</title>\n'`
Full url in rss link 2020-10-08 15:07:06 +00:00			`url = fields[1]`
			`if domainFull not in url:`
			`url = httpPrefix + '://' + domainFull + url`
			`rssStr += ' <link>' + url + '</link>\n'`
Date format 2020-10-04 22:12:27 +00:00
Newswire rss feed 2020-10-04 12:29:07 +00:00			`rssDateStr = pubDate.strftime("%a, %d %b %Y %H:%M:%S UT")`
			`rssStr += ' <pubDate>' + rssDateStr + '</pubDate>\n'`
			`rssStr += '</item>\n'`
			`rssStr += rss2Footer()`
			`return rssStr`


Save the current newswire moderation state to a file 2020-10-06 11:28:32 +00:00			`def isaBlogPost(postJsonObject: {}) -> bool:`
			`"""Is the given object a blog post?`
			`"""`
			`if not postJsonObject:`
			`return False`
			`if not postJsonObject.get('object'):`
			`return False`
			`if not isinstance(postJsonObject['object'], dict):`
			`return False`
			`if postJsonObject['object'].get('summary') and \`
			`postJsonObject['object'].get('url') and \`
			`postJsonObject['object'].get('published'):`
			`return True`
			`return False`


Add local blog posts to the newswire 2020-10-05 11:11:48 +00:00			`def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,`
			`newswire: {},`
			`maxBlogsPerAccount: int,`
			`indexFilename: str) -> None:`
			`"""Adds blogs for the given account to the newswire`
			`"""`
			`if not os.path.isfile(indexFilename):`
			`return`
Add a moderated flag to newswire entries 2020-10-09 10:33:06 +00:00			`# local blog entries are unmoderated by default`
			`moderated = False`

			`# local blogs can potentially be moderated`
			`moderatedFilename = \`
			`baseDir + '/accounts/' + nickname + '@' + domain + \`
			`'/.newswiremoderated'`
			`if os.path.isfile(moderatedFilename):`
			`moderated = True`

Add local blog posts to the newswire 2020-10-05 11:11:48 +00:00			`with open(indexFilename, 'r') as indexFile:`
			`postFilename = 'start'`
			`ctr = 0`
			`while postFilename:`
			`postFilename = indexFile.readline()`
			`if postFilename:`
			`# if this is a full path then remove the directories`
			`if '/' in postFilename:`
			`postFilename = postFilename.split('/')[-1]`

			`# filename of the post without any extension or path`
			`# This should also correspond to any index entry in`
			`# the posts cache`
			`postUrl = \`
			`postFilename.replace('\n', '').replace('\r', '')`
			`postUrl = postUrl.replace('.json', '').strip()`

			`# read the post from file`
			`fullPostFilename = \`
			`locatePost(baseDir, nickname,`
			`domain, postUrl, False)`
Check that post is located 2020-10-06 13:05:15 +00:00			`if not fullPostFilename:`
			`print('Unable to locate post ' + postUrl)`
			`ctr += 1`
			`if ctr >= maxBlogsPerAccount:`
			`break`
Continue if post is not located 2020-10-06 13:34:04 +00:00			`continue`
Check that post is located 2020-10-06 13:05:15 +00:00
Add local blog posts to the newswire 2020-10-05 11:11:48 +00:00			`postJsonObject = None`
			`if fullPostFilename:`
			`postJsonObject = loadJson(fullPostFilename)`
Save the current newswire moderation state to a file 2020-10-06 11:28:32 +00:00			`if isaBlogPost(postJsonObject):`
			`published = postJsonObject['object']['published']`
			`published = published.replace('T', ' ')`
			`published = published.replace('Z', '+00:00')`
Voting on newswire items 2020-10-06 20:17:34 +00:00			`votes = []`
			`if os.path.isfile(fullPostFilename + '.votes'):`
			`votes = loadJson(fullPostFilename + '.votes')`
Add a moderated flag to newswire entries 2020-10-09 10:33:06 +00:00			`description = ''`
Save the current newswire moderation state to a file 2020-10-06 11:28:32 +00:00			`newswire[published] = \`
			`[postJsonObject['object']['summary'],`
Voting on newswire items 2020-10-06 20:17:34 +00:00			`postJsonObject['object']['url'], votes,`
Add a moderated flag to newswire entries 2020-10-09 10:33:06 +00:00			`fullPostFilename, description, moderated]`
Add local blog posts to the newswire 2020-10-05 11:11:48 +00:00
			`ctr += 1`
			`if ctr >= maxBlogsPerAccount:`
			`break`


Blog posts going into the newswire may not always be local. They may be whatever federated to each users blog timeline. 2020-10-06 09:47:58 +00:00			`def addBlogsToNewswire(baseDir: str, newswire: {},`
			`maxBlogsPerAccount: int) -> None:`
			`"""Adds blogs from each user account into the newswire`
Tidying 2020-10-06 09:37:22 +00:00			`"""`
Create a dictionary of blog posts to be moderated 2020-10-06 10:34:56 +00:00			`moderationDict = {}`

Add local blog posts to the newswire 2020-10-05 11:11:48 +00:00			`# go through each account`
			`for subdir, dirs, files in os.walk(baseDir + '/accounts'):`
			`for handle in dirs:`
			`if '@' not in handle:`
			`continue`
			`if 'inbox@' in handle:`
			`continue`
Create a dictionary of blog posts to be moderated 2020-10-06 10:34:56 +00:00
Tidying 2020-10-06 09:37:22 +00:00			`nickname = handle.split('@')[0]`
Don't include blogs from suspended accounts within newswire 2020-10-05 11:30:11 +00:00
			`# has this account been suspended?`
Tidying 2020-10-06 08:58:44 +00:00			`if isSuspended(baseDir, nickname):`
			`continue`
Don't include blogs from suspended accounts within newswire 2020-10-05 11:30:11 +00:00
No slash 2020-10-06 21:28:40 +00:00			`if os.path.isfile(baseDir + '/accounts/' + handle +`
			`'/.nonewswire'):`
			`continue`

Add local blog posts to the newswire 2020-10-05 11:11:48 +00:00			`# is there a blogs timeline for this account?`
Logic sequence 2020-10-06 09:41:04 +00:00			`accountDir = os.path.join(baseDir + '/accounts', handle)`
Add local blog posts to the newswire 2020-10-05 11:11:48 +00:00			`blogsIndex = accountDir + '/tlblogs.index'`
			`if os.path.isfile(blogsIndex):`
			`domain = handle.split('@')[1]`
			`addAccountBlogsToNewswire(baseDir, nickname, domain,`
			`newswire, maxBlogsPerAccount,`
			`blogsIndex)`

Save the current newswire moderation state to a file 2020-10-06 11:28:32 +00:00			`# sort the moderation dict into chronological order, latest first`
			`sortedModerationDict = \`
			`OrderedDict(sorted(moderationDict.items(), reverse=True))`
Comments 2020-10-06 12:15:35 +00:00			`# save the moderation queue details for later display`
Save the current newswire moderation state to a file 2020-10-06 11:28:32 +00:00			`newswireModerationFilename = baseDir + '/accounts/newswiremoderation.txt'`
Edit button changes color when there are newswire items to be moderated 2020-10-06 14:32:53 +00:00			`if sortedModerationDict:`
			`saveJson(sortedModerationDict, newswireModerationFilename)`
			`else:`
			`# remove the file if there is nothing to moderate`
			`if os.path.isfile(newswireModerationFilename):`
			`os.remove(newswireModerationFilename)`
Save the current newswire moderation state to a file 2020-10-06 11:28:32 +00:00
Add local blog posts to the newswire 2020-10-05 11:11:48 +00:00
Maximum size for rss/atom feeds Prevents a hacked news source from implementing a denial of service 2020-10-16 11:40:01 +00:00			`def getDictFromNewswire(session, baseDir: str,`
			`maxPostsPerSource: int, maxFeedSizeKb: int) -> {}:`
Rename function 2020-10-04 09:59:55 +00:00			`"""Gets rss feeds as a dictionary from newswire file`
Move rss functions 2020-10-04 09:51:12 +00:00			`"""`
Rename function 2020-10-04 09:59:55 +00:00			`subscriptionsFilename = baseDir + '/accounts/newswire.txt'`
Move rss functions 2020-10-04 09:51:12 +00:00			`if not os.path.isfile(subscriptionsFilename):`
			`return {}`

Set an upper limit on the number of newswire posts per rss feed Helps to avoid having a giant list of items 2020-10-16 10:13:14 +00:00			`maxPostsPerSource = 5`

Add local blog posts to the newswire 2020-10-05 11:11:48 +00:00			`# add rss feeds`
Move rss functions 2020-10-04 09:51:12 +00:00			`rssFeed = []`
			`with open(subscriptionsFilename, 'r') as fp:`
			`rssFeed = fp.readlines()`
			`result = {}`
			`for url in rssFeed:`
			`url = url.strip()`
Add a moderated flag to newswire entries 2020-10-09 10:33:06 +00:00
			`# Does this contain a url?`
Move rss functions 2020-10-04 09:51:12 +00:00			`if '://' not in url:`
			`continue`
Add a moderated flag to newswire entries 2020-10-09 10:33:06 +00:00
			`# is this a comment?`
Move rss functions 2020-10-04 09:51:12 +00:00			`if url.startswith('#'):`
			`continue`
Add a moderated flag to newswire entries 2020-10-09 10:33:06 +00:00
			`# should this feed be moderated?`
			`moderated = False`
			`if '*' in url:`
			`moderated = True`
			`url = url.replace('*', '').strip()`

Maximum size for rss/atom feeds Prevents a hacked news source from implementing a denial of service 2020-10-16 11:40:01 +00:00			`itemsList = getRSS(session, url, moderated,`
			`maxPostsPerSource, maxFeedSizeKb)`
Create dictionary of rss items 2020-10-04 21:23:33 +00:00			`for dateStr, item in itemsList.items():`
			`result[dateStr] = item`
Add local blog posts to the newswire 2020-10-05 11:11:48 +00:00
Blog posts going into the newswire may not always be local. They may be whatever federated to each users blog timeline. 2020-10-06 09:47:58 +00:00			`# add blogs from each user account`
Set an upper limit on the number of newswire posts per rss feed Helps to avoid having a giant list of items 2020-10-16 10:13:14 +00:00			`addBlogsToNewswire(baseDir, result, maxPostsPerSource)`
Add local blog posts to the newswire 2020-10-05 11:11:48 +00:00
			`# sort into chronological order, latest first`
Reverse order 2020-10-04 21:45:46 +00:00			`sortedResult = OrderedDict(sorted(result.items(), reverse=True))`
Move rss functions 2020-10-04 09:51:12 +00:00			`return sortedResult`