Remove html before checking for hashtags

main
Bob Mottram 2020-10-25 12:47:16 +00:00
parent e5c436fa3f
commit 3fd0723684
6 changed files with 24 additions and 23 deletions

View File

@ -561,25 +561,6 @@ def removeTextFormatting(content: str) -> str:
return content
def removeHtml(content: str) -> str:
"""Removes html links from the given content.
Used to ensure that profile descriptions don't contain dubious content
"""
if '<' not in content:
return content
removing = False
content = content.replace('<q>', '"').replace('</q>', '"')
result = ''
for ch in content:
if ch == '<':
removing = True
elif ch == '>':
removing = False
elif not removing:
result += ch
return result
def removeLongWords(content: str, maxWordLength: int,
longWordsList: []) -> str:
"""Breaks up long words so that on mobile screens this doesn't

View File

@ -18,6 +18,7 @@ from utils import loadJson
from utils import saveJson
from utils import isSuspended
from utils import containsInvalidChars
from utils import removeHtml
from blocking import isBlockedDomain
from blocking import isBlockedHashtag
from filters import isFiltered
@ -88,7 +89,7 @@ def addNewswireDictEntry(baseDir: str, domain: str,
tags=[], maxTags=32) -> None:
"""Update the newswire dictionary
"""
allText = title + ' ' + description
allText = removeHtml(title + ' ' + description)
# check that none of the text is filtered against
if isFiltered(baseDir, 'news', domain, allText):

View File

@ -49,9 +49,9 @@ from utils import getConfigParam
from utils import locateNewsVotes
from utils import locateNewsArrival
from utils import votesOnNewswireItem
from utils import removeHtml
from media import attachMedia
from media import replaceYouTube
from content import removeHtml
from content import removeLongWords
from content import addHtmlTags
from content import replaceEmojiFromTags

View File

@ -43,6 +43,7 @@ from utils import loadJson
from utils import saveJson
from utils import getStatusNumber
from utils import getFollowersOfPerson
from utils import removeHtml
from follow import followerOfPerson
from follow import unfollowPerson
from follow import unfollowerOfPerson
@ -71,7 +72,6 @@ from inbox import validInboxFilenames
from content import htmlReplaceEmailQuote
from content import htmlReplaceQuoteMarks
from content import dangerousMarkup
from content import removeHtml
from content import addWebLinks
from content import replaceEmojiFromTags
from content import addHtmlTags

View File

@ -19,6 +19,25 @@ from calendar import monthrange
from followingCalendar import addPersonToCalendar
def removeHtml(content: str) -> str:
"""Removes html links from the given content.
Used to ensure that profile descriptions don't contain dubious content
"""
if '<' not in content:
return content
removing = False
content = content.replace('<q>', '"').replace('</q>', '"')
result = ''
for ch in content:
if ch == '<':
removing = True
elif ch == '>':
removing = False
elif not removing:
result += ch
return result
def isSystemAccount(nickname: str) -> bool:
"""Returns true if the given nickname is a system account
"""

View File

@ -45,6 +45,7 @@ from utils import getCachedPostFilename
from utils import loadJson
from utils import getConfigParam
from utils import votesOnNewswireItem
from utils import removeHtml
from follow import isFollowingActor
from webfinger import webfingerHandle
from posts import isDM
@ -71,7 +72,6 @@ from content import getMentionsFromHtml
from content import addHtmlTags
from content import replaceEmojiFromTags
from content import removeLongWords
from content import removeHtml
from skills import getSkills
from cache import getPersonFromCache
from cache import storePersonInCache