forked from indymedia/epicyon
Remove html before checking for hashtags
parent
e5c436fa3f
commit
3fd0723684
19
content.py
19
content.py
|
@ -561,25 +561,6 @@ def removeTextFormatting(content: str) -> str:
|
|||
return content
|
||||
|
||||
|
||||
def removeHtml(content: str) -> str:
|
||||
"""Removes html links from the given content.
|
||||
Used to ensure that profile descriptions don't contain dubious content
|
||||
"""
|
||||
if '<' not in content:
|
||||
return content
|
||||
removing = False
|
||||
content = content.replace('<q>', '"').replace('</q>', '"')
|
||||
result = ''
|
||||
for ch in content:
|
||||
if ch == '<':
|
||||
removing = True
|
||||
elif ch == '>':
|
||||
removing = False
|
||||
elif not removing:
|
||||
result += ch
|
||||
return result
|
||||
|
||||
|
||||
def removeLongWords(content: str, maxWordLength: int,
|
||||
longWordsList: []) -> str:
|
||||
"""Breaks up long words so that on mobile screens this doesn't
|
||||
|
|
|
@ -18,6 +18,7 @@ from utils import loadJson
|
|||
from utils import saveJson
|
||||
from utils import isSuspended
|
||||
from utils import containsInvalidChars
|
||||
from utils import removeHtml
|
||||
from blocking import isBlockedDomain
|
||||
from blocking import isBlockedHashtag
|
||||
from filters import isFiltered
|
||||
|
@ -88,7 +89,7 @@ def addNewswireDictEntry(baseDir: str, domain: str,
|
|||
tags=[], maxTags=32) -> None:
|
||||
"""Update the newswire dictionary
|
||||
"""
|
||||
allText = title + ' ' + description
|
||||
allText = removeHtml(title + ' ' + description)
|
||||
|
||||
# check that none of the text is filtered against
|
||||
if isFiltered(baseDir, 'news', domain, allText):
|
||||
|
|
2
posts.py
2
posts.py
|
@ -49,9 +49,9 @@ from utils import getConfigParam
|
|||
from utils import locateNewsVotes
|
||||
from utils import locateNewsArrival
|
||||
from utils import votesOnNewswireItem
|
||||
from utils import removeHtml
|
||||
from media import attachMedia
|
||||
from media import replaceYouTube
|
||||
from content import removeHtml
|
||||
from content import removeLongWords
|
||||
from content import addHtmlTags
|
||||
from content import replaceEmojiFromTags
|
||||
|
|
2
tests.py
2
tests.py
|
@ -43,6 +43,7 @@ from utils import loadJson
|
|||
from utils import saveJson
|
||||
from utils import getStatusNumber
|
||||
from utils import getFollowersOfPerson
|
||||
from utils import removeHtml
|
||||
from follow import followerOfPerson
|
||||
from follow import unfollowPerson
|
||||
from follow import unfollowerOfPerson
|
||||
|
@ -71,7 +72,6 @@ from inbox import validInboxFilenames
|
|||
from content import htmlReplaceEmailQuote
|
||||
from content import htmlReplaceQuoteMarks
|
||||
from content import dangerousMarkup
|
||||
from content import removeHtml
|
||||
from content import addWebLinks
|
||||
from content import replaceEmojiFromTags
|
||||
from content import addHtmlTags
|
||||
|
|
19
utils.py
19
utils.py
|
@ -19,6 +19,25 @@ from calendar import monthrange
|
|||
from followingCalendar import addPersonToCalendar
|
||||
|
||||
|
||||
def removeHtml(content: str) -> str:
|
||||
"""Removes html links from the given content.
|
||||
Used to ensure that profile descriptions don't contain dubious content
|
||||
"""
|
||||
if '<' not in content:
|
||||
return content
|
||||
removing = False
|
||||
content = content.replace('<q>', '"').replace('</q>', '"')
|
||||
result = ''
|
||||
for ch in content:
|
||||
if ch == '<':
|
||||
removing = True
|
||||
elif ch == '>':
|
||||
removing = False
|
||||
elif not removing:
|
||||
result += ch
|
||||
return result
|
||||
|
||||
|
||||
def isSystemAccount(nickname: str) -> bool:
|
||||
"""Returns true if the given nickname is a system account
|
||||
"""
|
||||
|
|
|
@ -45,6 +45,7 @@ from utils import getCachedPostFilename
|
|||
from utils import loadJson
|
||||
from utils import getConfigParam
|
||||
from utils import votesOnNewswireItem
|
||||
from utils import removeHtml
|
||||
from follow import isFollowingActor
|
||||
from webfinger import webfingerHandle
|
||||
from posts import isDM
|
||||
|
@ -71,7 +72,6 @@ from content import getMentionsFromHtml
|
|||
from content import addHtmlTags
|
||||
from content import replaceEmojiFromTags
|
||||
from content import removeLongWords
|
||||
from content import removeHtml
|
||||
from skills import getSkills
|
||||
from cache import getPersonFromCache
|
||||
from cache import storePersonInCache
|
||||
|
|
Loading…
Reference in New Issue