Rss feeds for hashtags

2020-09-26 19:23:43 +01:00 · 2020-09-26 19:23:43 +01:00 · 83ff1b03a3
parent b8f122004b
commit 83ff1b03a3
3 changed files with 183 additions and 3 deletions
--- a/blog.py
+++ b/blog.py
@ -282,7 +282,8 @@ def htmlBlogPostRSS2(authorized: bool,
        messageLink = postJsonObject['object']['id'].replace('/statuses/', '/')
        if not restrictToDomain or \
           (restrictToDomain and '/' + domain in messageLink):
-            if postJsonObject['object'].get('summary'):
+            if postJsonObject['object'].get('summary') and \
+               postJsonObject['object'].get('published'):
                published = postJsonObject['object']['published']
                pubDate = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
                titleStr = postJsonObject['object']['summary']
@ -307,7 +308,8 @@ def htmlBlogPostRSS3(authorized: bool,
        messageLink = postJsonObject['object']['id'].replace('/statuses/', '/')
        if not restrictToDomain or \
           (restrictToDomain and '/' + domain in messageLink):
-            if postJsonObject['object'].get('summary'):
+            if postJsonObject['object'].get('summary') and \
+               postJsonObject['object'].get('published'):
                published = postJsonObject['object']['published']
                pubDate = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
                titleStr = postJsonObject['object']['summary']
--- a/daemon.py
+++ b/daemon.py
@ -148,6 +148,7 @@ from webinterface import htmlTermsOfService
 from webinterface import htmlSkillsSearch
 from webinterface import htmlHistorySearch
 from webinterface import htmlHashtagSearch
+from webinterface import rssHashtagSearch
 from webinterface import htmlModerationInfo
 from webinterface import htmlSearchSharedItems
 from webinterface import htmlHashtagBlocked
@ -4093,6 +4094,60 @@ class PubServer(BaseHTTPRequestHandler):
                                  'login shown done',
                                  'hashtag search')

+    def _hashtagSearchRSS2(self, callingDomain: str,
+                           path: str, cookie: str,
+                           baseDir: str, httpPrefix: str,
+                           domain: str, domainFull: str, port: int,
+                           onionDomain: str, i2pDomain: str,
+                           GETstartTime, GETtimings: {}):
+        """Return an RSS 2 feed for a hashtag
+        """
+        hashtag = path.split('/tags/rss2/')[1]
+        if isBlockedHashtag(baseDir, hashtag):
+            self._400()
+            self.server.GETbusy = False
+            return
+        nickname = None
+        if '/users/' in path:
+            actor = \
+                httpPrefix + '://' + domainFull + path
+            nickname = \
+                getNicknameFromActor(actor)
+        hashtagStr = \
+            rssHashtagSearch(nickname,
+                             domain, port,
+                             self.server.recentPostsCache,
+                             self.server.maxRecentPosts,
+                             self.server.translate,
+                             baseDir, hashtag,
+                             maxPostsInFeed, self.server.session,
+                             self.server.cachedWebfingers,
+                             self.server.personCache,
+                             httpPrefix,
+                             self.server.projectVersion,
+                             self.server.YTReplacementDomain)
+        if hashtagStr:
+            msg = hashtagStr.encode('utf-8')
+            self._set_headers('text/xml', len(msg),
+                              cookie, callingDomain)
+            self._write(msg)
+        else:
+            originPathStr = path.split('/tags/rss2/')[0]
+            originPathStrAbsolute = \
+                httpPrefix + '://' + domainFull + originPathStr
+            if callingDomain.endswith('.onion') and onionDomain:
+                originPathStrAbsolute = \
+                    'http://' + onionDomain + originPathStr
+            elif (callingDomain.endswith('.i2p') and onionDomain):
+                originPathStrAbsolute = \
+                    'http://' + i2pDomain + originPathStr
+            self._redirect_headers(originPathStrAbsolute + '/search',
+                                   cookie, callingDomain)
+        self.server.GETbusy = False
+        self._benchmarkGETtimings(GETstartTime, GETtimings,
+                                  'login shown done',
+                                  'hashtag rss feed')
+
    def _announceButton(self, callingDomain: str, path: str,
                        baseDir: str,
                        cookie: str, proxyType: str,
@ -8068,6 +8123,18 @@ class PubServer(BaseHTTPRequestHandler):
        # hashtag search
        if self.path.startswith('/tags/') or \
           (authorized and '/tags/' in self.path):
+            if self.path.startswith('/tags/rss2/'):
+                self._hashtagSearchRSS2(callingDomain,
+                                        self.path, cookie,
+                                        self.server.baseDir,
+                                        self.server.httpPrefix,
+                                        self.server.domain,
+                                        self.server.domainFull,
+                                        self.server.port,
+                                        self.server.onionDomain,
+                                        self.server.i2pDomain,
+                                        GETstartTime, GETtimings)
+                return
            self._hashtagSearch(callingDomain,
                                self.path, cookie,
                                self.server.baseDir,
--- a/webinterface.py
+++ b/webinterface.py
@ -787,7 +787,7 @@ def htmlHashtagSearch(nickname: str, domain: str, port: int,
        else:
            postFields = postId.split('  ')
            if len(postFields) != 3:
-                index = +1
+                index += 1
                continue
            nickname = postFields[1]
            postId = postFields[2]
@ -833,6 +833,117 @@ def htmlHashtagSearch(nickname: str, domain: str, port: int,
    return hashtagSearchForm


+def rss2TagHeader(hashtag: str, httpPrefix: str, domainFull: str) -> str:
+    rssStr = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
+    rssStr += "<rss version=\"2.0\">"
+    rssStr += '<channel>'
+    rssStr += '    <title>#' + hashtag + '</title>'
+    rssStr += '    <link>' + httpPrefix + '://' + domainFull + \
+        '/tags/rss2/' + hashtag + '</link>'
+    return rssStr
+
+
+def rss2TagFooter() -> str:
+    rssStr = '</channel>'
+    rssStr += '</rss>'
+    return rssStr
+
+
+def rssHashtagSearch(nickname: str, domain: str, port: int,
+                     recentPostsCache: {}, maxRecentPosts: int,
+                     translate: {},
+                     baseDir: str, hashtag: str,
+                     postsPerPage: int,
+                     session, wfRequest: {}, personCache: {},
+                     httpPrefix: str, projectVersion: str,
+                     YTReplacementDomain: str) -> str:
+    """Show an rss feed for a hashtag
+    """
+    if hashtag.startswith('#'):
+        hashtag = hashtag[1:]
+    hashtag = urllib.parse.unquote(hashtag)
+    hashtagIndexFile = baseDir + '/tags/' + hashtag + '.txt'
+    if not os.path.isfile(hashtagIndexFile):
+        if hashtag != hashtag.lower():
+            hashtag = hashtag.lower()
+            hashtagIndexFile = baseDir + '/tags/' + hashtag + '.txt'
+    if not os.path.isfile(hashtagIndexFile):
+        print('WARN: hashtag file not found ' + hashtagIndexFile)
+        return None
+
+    # check that the directory for the nickname exists
+    if nickname:
+        if not os.path.isdir(baseDir + '/accounts/' +
+                             nickname + '@' + domain):
+            nickname = None
+
+    # read the index
+    lines = []
+    with open(hashtagIndexFile, "r") as f:
+        lines = f.readlines()
+    if not lines:
+        return None
+
+    domainFull = domain
+    if port:
+        if port != 80 and port != 443:
+            domainFull = domain + ':' + str(port)
+
+    maxFeedLength = 10
+    hashtagFeed = \
+        rss2TagHeader(hashtag, httpPrefix, domainFull)
+    for index in range(len(lines)):
+        postId = lines[index].strip('\n').strip('\r')
+        if '  ' not in postId:
+            nickname = getNicknameFromActor(postId)
+            if not nickname:
+                index += 1
+                if index >= maxFeedLength:
+                    break
+                continue
+        else:
+            postFields = postId.split('  ')
+            if len(postFields) != 3:
+                index += 1
+                if index >= maxFeedLength:
+                    break
+                continue
+            nickname = postFields[1]
+            postId = postFields[2]
+        postFilename = locatePost(baseDir, nickname, domain, postId)
+        if not postFilename:
+            index += 1
+            if index >= maxFeedLength:
+                break
+            continue
+        postJsonObject = loadJson(postFilename)
+        if postJsonObject:
+            if not isPublicPost(postJsonObject):
+                index += 1
+                if index >= maxFeedLength:
+                    break
+                continue
+            # add to feed
+            if postJsonObject['object'].get('id') and \
+               postJsonObject['object'].get('published'):
+                messageLink = \
+                    postJsonObject['object']['id'].replace('/statuses/', '/')
+                published = postJsonObject['object']['published']
+                pubDate = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
+                rssDateStr = pubDate.strftime("%a, %d %b %Y %H:%M:%S UT")
+                hashtagFeed += '     <item>'
+                hashtagFeed += \
+                    '         <link>' + messageLink + '</link>'
+                hashtagFeed += \
+                    '         <pubDate>' + rssDateStr + '</pubDate>'
+                hashtagFeed += '     </item>'
+        index += 1
+        if index >= maxFeedLength:
+            break
+
+    return hashtagFeed + rss2TagFooter()
+
+
 def htmlSkillsSearch(translate: {}, baseDir: str,
                     httpPrefix: str,
                     skillsearch: str, instanceOnly: bool,