From 89c1c1c5a88a9785ebadc970bcff55fa89e8be20 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 30 Dec 2021 20:24:05 +0000 Subject: [PATCH] Snake case --- content.py | 1174 ++++++++++++++++++++++++------------------------ tests.py | 4 +- webapp_post.py | 4 +- 3 files changed, 599 insertions(+), 583 deletions(-) diff --git a/content.py b/content.py index 041d5d28f..a0412f5a9 100644 --- a/content.py +++ b/content.py @@ -29,22 +29,56 @@ from utils import remove_html from petnames import get_pet_name from session import download_image +MUSIC_SITES = ('soundcloud.com', 'bandcamp.com') -def remove_htmlTag(htmlStr: str, tag: str) -> str: +MAX_LINK_LENGTH = 40 + +VALID_HASHTAG_CHARS = \ + set('0123456789' + + 'abcdefghijklmnopqrstuvwxyz' + + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + + '¡¿ÄäÀàÁáÂâÃãÅåǍǎĄąĂăÆæĀā' + + 'ÇçĆćĈĉČčĎđĐďðÈèÉéÊêËëĚěĘęĖėĒē' + + 'ĜĝĢģĞğĤĥÌìÍíÎîÏïıĪīĮįĴĵĶķ' + + 'ĹĺĻļŁłĽľĿŀÑñŃńŇňŅņÖöÒòÓóÔôÕõŐőØøŒœ' + + 'ŔŕŘřẞߌśŜŝŞşŠšȘșŤťŢţÞþȚțÜüÙùÚúÛûŰűŨũŲųŮůŪū' + + 'ŴŵÝýŸÿŶŷŹźŽžŻż') + +REMOVE_MARKUP = ( + 'b', 'i', 'ul', 'ol', 'li', 'em', 'strong', + 'blockquote', 'h1', 'h2', 'h3', 'h4', 'h5' +) + +INVALID_CONTENT_STRINGS = ( + 'mute', 'unmute', 'editeventpost', 'notifypost', + 'delete', 'options', 'page', 'repeat', + 'bm', 'tl', 'actor', 'unrepeat', 'eventid', + 'unannounce', 'like', 'unlike', 'bookmark', + 'unbookmark', 'likedBy', 'time', + 'year', 'month', 'day', 'editnewpost', + 'graph', 'showshare', 'category', 'showwanted', + 'rmshare', 'rmwanted', 'repeatprivate', + 'unrepeatprivate', 'replyto', + 'replyfollowers', 'replydm', 'editblogpost', + 'handle', 'blockdomain' +) + + +def remove_html_tag(html_str: str, tag: str) -> str: """Removes a given tag from a html string """ - tagFound = True - while tagFound: - matchStr = ' ' + tag + '="' - if matchStr not in htmlStr: - tagFound = False + tag_found = True + while tag_found: + match_str = ' ' + tag + '="' + if match_str not in html_str: + tag_found = False break - sections = htmlStr.split(matchStr, 1) + sections = html_str.split(match_str, 1) if '"' not in sections[1]: - tagFound = False + tag_found = False break - htmlStr = sections[0] + sections[1].split('"', 1)[1] - return htmlStr + html_str = sections[0] + sections[1].split('"', 1)[1] + return html_str def _remove_quotes_within_quotes(content: str) -> str: @@ -58,17 +92,17 @@ def _remove_quotes_within_quotes(content: str) -> str: found = True while found: prefix = content.split('
', ctr)[0] + '
' - quotedStr = content.split('
', ctr)[1] - if '
' not in quotedStr: + quoted_str = content.split('
', ctr)[1] + if '
' not in quoted_str: found = False else: - endStr = quotedStr.split('
')[1] - quotedStr = quotedStr.split('
')[0] - if '
' not in endStr: + end_str = quoted_str.split('
')[1] + quoted_str = quoted_str.split('')[0] + if '
' not in end_str: found = False - if '
' in quotedStr: - quotedStr = quotedStr.replace('
', '') - content = prefix + quotedStr + '
' + endStr + if '
' in quoted_str: + quoted_str = quoted_str.replace('
', '') + content = prefix + quoted_str + '
' + end_str ctr += 1 return content @@ -92,27 +126,27 @@ def html_replace_email_quote(content: str) -> str: # replace email style quote if '>> ' not in content: return content - contentStr = content.replace('

', '') - contentLines = contentStr.split('

') - newContent = '' - for lineStr in contentLines: - if not lineStr: + content_str = content.replace('

', '') + content_lines = content_str.split('

') + new_content = '' + for line_str in content_lines: + if not line_str: continue - if '>> ' not in lineStr: - if lineStr.startswith('> '): - lineStr = lineStr.replace('> ', '
') - lineStr = lineStr.replace('>', '
') - newContent += '

' + lineStr + '

' + if '>> ' not in line_str: + if line_str.startswith('> '): + line_str = line_str.replace('> ', '
') + line_str = line_str.replace('>', '
') + new_content += '

' + line_str + '

' else: - newContent += '

' + lineStr + '

' + new_content += '

' + line_str + '

' else: - lineStr = lineStr.replace('>> ', '>
') - if lineStr.startswith('>'): - lineStr = lineStr.replace('>', '
', 1) + line_str = line_str.replace('>> ', '>
') + if line_str.startswith('>'): + line_str = line_str.replace('>', '
', 1) else: - lineStr = lineStr.replace('>', '
') - newContent += '

' + lineStr + '

' - return _remove_quotes_within_quotes(newContent) + line_str = line_str.replace('>', '
') + new_content += '

' + line_str + '

' + return _remove_quotes_within_quotes(new_content) def html_replace_quote_marks(content: str) -> str: @@ -131,44 +165,44 @@ def html_replace_quote_marks(content: str) -> str: if content.count('"') > 4: return content - newContent = content + new_content = content if '"' in content: sections = content.split('"') if len(sections) > 1: - newContent = '' - openQuote = True + new_content = '' + open_quote = True markup = False - for ch in content: - currChar = ch - if ch == '<': + for char in content: + curr_char = char + if char == '<': markup = True - elif ch == '>': + elif char == '>': markup = False - elif ch == '"' and not markup: - if openQuote: - currChar = '“' + elif char == '"' and not markup: + if open_quote: + curr_char = '“' else: - currChar = '”' - openQuote = not openQuote - newContent += currChar + curr_char = '”' + open_quote = not open_quote + new_content += curr_char - if '"' in newContent: - openQuote = True - content = newContent - newContent = '' + if '"' in new_content: + open_quote = True + content = new_content + new_content = '' ctr = 0 sections = content.split('"') - noOfSections = len(sections) - for s in sections: - newContent += s - if ctr < noOfSections - 1: - if openQuote: - newContent += '“' + no_of_sections = len(sections) + for sec in sections: + new_content += sec + if ctr < no_of_sections - 1: + if open_quote: + new_content += '“' else: - newContent += '”' - openQuote = not openQuote + new_content += '”' + open_quote = not open_quote ctr += 1 - return newContent + return new_content def dangerous_css(filename: str, allow_local_network_access: bool) -> bool: @@ -180,37 +214,41 @@ def dangerous_css(filename: str, allow_local_network_access: bool) -> bool: content = None try: - with open(filename, 'r') as fp: - content = fp.read().lower() + with open(filename, 'r') as css_file: + content = css_file.read().lower() except OSError: print('EX: unable to read css file ' + filename) - if content: - cssMatches = ('behavior:', ':expression', '?php', '.php', - 'google', 'regexp', 'localhost', - '127.0.', '192.168', '10.0.', '@import') - for cssmatch in cssMatches: - if cssmatch in content: - return True + if not content: + return False - # search for non-local web links - if 'url(' in content: - urlList = content.split('url(') - ctr = 0 - for urlStr in urlList: - if ctr > 0: - if ')' in urlStr: - urlStr = urlStr.split(')')[0] - if 'http' in urlStr: - print('ERROR: non-local web link in CSS ' + - filename) - return True - ctr += 1 - - # an attacker can include html inside of the css - # file as a comment and this may then be run from the html - if dangerous_markup(content, allow_local_network_access): + css_matches = ( + 'behavior:', ':expression', '?php', '.php', + 'google', 'regexp', 'localhost', + '127.0.', '192.168', '10.0.', '@import' + ) + for cssmatch in css_matches: + if cssmatch in content: return True + + # search for non-local web links + if 'url(' in content: + url_list = content.split('url(') + ctr = 0 + for url_str in url_list: + if ctr > 0: + if ')' in url_str: + url_str = url_str.split(')')[0] + if 'http' in url_str: + print('ERROR: non-local web link in CSS ' + + filename) + return True + ctr += 1 + + # an attacker can include html inside of the css + # file as a comment and this may then be run from the html + if dangerous_markup(content, allow_local_network_access): + return True return False @@ -227,25 +265,25 @@ def switch_words(base_dir: str, nickname: str, domain: str, content: str, if not os.path.isfile(switch_words_filename): return content try: - with open(switch_words_filename, 'r') as fp: - rules = fp.readlines() + with open(switch_words_filename, 'r') as words_file: + rules = words_file.readlines() except OSError: print('EX: unable to read switches ' + switch_words_filename) for line in rules: - replaceStr = line.replace('\n', '').replace('\r', '') + replace_str = line.replace('\n', '').replace('\r', '') splitters = ('->', ':', ',', ';', '-') - wordTransform = None - for splitStr in splitters: - if splitStr in replaceStr: - wordTransform = replaceStr.split(splitStr) + word_transform = None + for split_str in splitters: + if split_str in replace_str: + word_transform = replace_str.split(split_str) break - if not wordTransform: + if not word_transform: continue - if len(wordTransform) == 2: - replaceStr1 = wordTransform[0].strip().replace('"', '') - replaceStr2 = wordTransform[1].strip().replace('"', '') - content = content.replace(replaceStr1, replaceStr2) + if len(word_transform) == 2: + replace_str1 = word_transform[0].strip().replace('"', '') + replace_str2 = word_transform[1].strip().replace('"', '') + content = content.replace(replace_str1, replace_str2) return content @@ -265,112 +303,112 @@ def _save_custom_emoji(session, base_dir: str, emojiName: str, url: str, print('EX: Custom emoji is wrong format ' + url) return emojiName = emojiName.replace(':', '').strip().lower() - customEmojiDir = base_dir + '/emojicustom' - if not os.path.isdir(customEmojiDir): - os.mkdir(customEmojiDir) - emojiImageFilename = customEmojiDir + '/' + emojiName + '.' + ext + custom_emoji_dir = base_dir + '/emojicustom' + if not os.path.isdir(custom_emoji_dir): + os.mkdir(custom_emoji_dir) + emoji_image_filename = custom_emoji_dir + '/' + emojiName + '.' + ext if not download_image(session, base_dir, url, - emojiImageFilename, debug, False): + emoji_image_filename, debug, False): if debug: print('EX: custom emoji not downloaded ' + url) return - emojiJsonFilename = customEmojiDir + '/emoji.json' - emojiJson = {} - if os.path.isfile(emojiJsonFilename): - emojiJson = load_json(emojiJsonFilename, 0, 1) - if not emojiJson: - emojiJson = {} - if not emojiJson.get(emojiName): - emojiJson[emojiName] = emojiName - save_json(emojiJson, emojiJsonFilename) + emoji_json_filename = custom_emoji_dir + '/emoji.json' + emoji_json = {} + if os.path.isfile(emoji_json_filename): + emoji_json = load_json(emoji_json_filename, 0, 1) + if not emoji_json: + emoji_json = {} + if not emoji_json.get(emojiName): + emoji_json[emojiName] = emojiName + save_json(emoji_json, emoji_json_filename) if debug: - print('EX: Saved custom emoji ' + emojiJsonFilename) + print('EX: Saved custom emoji ' + emoji_json_filename) elif debug: print('EX: cusom emoji already saved') def replace_emoji_from_tags(session, base_dir: str, - content: str, tag: [], messageType: str, + content: str, tag: [], message_type: str, debug: bool) -> str: """Uses the tags to replace :emoji: with html image markup """ - for tagItem in tag: - if not tagItem.get('type'): + for tag_item in tag: + if not tag_item.get('type'): continue - if tagItem['type'] != 'Emoji': + if tag_item['type'] != 'Emoji': continue - if not tagItem.get('name'): + if not tag_item.get('name'): continue - if not tagItem.get('icon'): + if not tag_item.get('icon'): continue - if not tagItem['icon'].get('url'): + if not tag_item['icon'].get('url'): continue - if '/' not in tagItem['icon']['url']: + if '/' not in tag_item['icon']['url']: continue - if tagItem['name'] not in content: + if tag_item['name'] not in content: continue - iconName = tagItem['icon']['url'].split('/')[-1] - if iconName: - if len(iconName) > 1: - if iconName[0].isdigit(): - if '.' in iconName: - iconName = iconName.split('.')[0] + icon_name = tag_item['icon']['url'].split('/')[-1] + if icon_name: + if len(icon_name) > 1: + if icon_name[0].isdigit(): + if '.' in icon_name: + icon_name = icon_name.split('.')[0] # see https://unicode.org/ # emoji/charts/full-emoji-list.html - if '-' not in iconName: + if '-' not in icon_name: # a single code replaced = False try: - replaceChar = chr(int("0x" + iconName, 16)) - content = content.replace(tagItem['name'], - replaceChar) + replace_char = chr(int("0x" + icon_name, 16)) + content = content.replace(tag_item['name'], + replace_char) replaced = True except BaseException: print('EX: replace_emoji_from_tags 1 ' + 'no conversion of ' + - str(iconName) + ' to chr ' + - tagItem['name'] + ' ' + - tagItem['icon']['url']) + str(icon_name) + ' to chr ' + + tag_item['name'] + ' ' + + tag_item['icon']['url']) if not replaced: _save_custom_emoji(session, base_dir, - tagItem['name'], - tagItem['icon']['url'], + tag_item['name'], + tag_item['icon']['url'], debug) else: # sequence of codes - iconCodes = iconName.split('-') - iconCodeSequence = '' - for icode in iconCodes: + icon_codes = icon_name.split('-') + icon_code_sequence = '' + for icode in icon_codes: replaced = False try: - iconCodeSequence += chr(int("0x" + - icode, 16)) + icon_code_sequence += chr(int("0x" + + icode, 16)) replaced = True except BaseException: - iconCodeSequence = '' + icon_code_sequence = '' print('EX: replace_emoji_from_tags 2 ' + 'no conversion of ' + str(icode) + ' to chr ' + - tagItem['name'] + ' ' + - tagItem['icon']['url']) + tag_item['name'] + ' ' + + tag_item['icon']['url']) if not replaced: _save_custom_emoji(session, base_dir, - tagItem['name'], - tagItem['icon']['url'], + tag_item['name'], + tag_item['icon']['url'], debug) - if iconCodeSequence: - content = content.replace(tagItem['name'], - iconCodeSequence) + if icon_code_sequence: + content = content.replace(tag_item['name'], + icon_code_sequence) - htmlClass = 'emoji' - if messageType == 'post header': - htmlClass = 'emojiheader' - if messageType == 'profile': - htmlClass = 'emojiprofile' - emojiHtml = "\""" - content = content.replace(tagItem['name'], emojiHtml) + html_class = 'emoji' + if message_type == 'post header': + html_class = 'emojiheader' + if message_type == 'profile': + html_class = 'emojiprofile' + emoji_html = "\""" + content = content.replace(tag_item['name'], emoji_html) return content @@ -384,13 +422,12 @@ def _add_music_tag(content: str, tag: str) -> str: tag = '#' + tag if tag in content: return content - musicSites = ('soundcloud.com', 'bandcamp.com') - musicSiteFound = False - for site in musicSites: + music_site_found = False + for site in MUSIC_SITES: if site + '/' in content: - musicSiteFound = True + music_site_found = True break - if not musicSiteFound: + if not music_site_found: return content return ':music: ' + content + ' ' + tag + ' ' @@ -404,55 +441,54 @@ def add_web_links(content: str) -> str: prefixes = get_link_prefixes() # do any of these prefixes exist within the content? - prefixFound = False + prefix_found = False for prefix in prefixes: if prefix in content: - prefixFound = True + prefix_found = True break # if there are no prefixes then just keep the content we have - if not prefixFound: + if not prefix_found: return content - maxLinkLength = 40 content = content.replace('\r', '') words = content.replace('\n', ' --linebreak-- ').split(' ') - replaceDict = {} - for w in words: - if ':' not in w: + replace_dict = {} + for wrd in words: + if ':' not in wrd: continue # does the word begin with a prefix? - prefixFound = False + prefix_found = False for prefix in prefixes: - if w.startswith(prefix): - prefixFound = True + if wrd.startswith(prefix): + prefix_found = True break - if not prefixFound: + if not prefix_found: continue # the word contains a prefix - if w.endswith('.') or w.endswith(';'): - w = w[:-1] - markup = '' for prefix in prefixes: - if w.startswith(prefix): + if wrd.startswith(prefix): markup += '' break - linkText = w + link_text = wrd for prefix in prefixes: - linkText = linkText.replace(prefix, '') + link_text = link_text.replace(prefix, '') # prevent links from becoming too long - if len(linkText) > maxLinkLength: + if len(link_text) > MAX_LINK_LENGTH: markup += '' + \ - linkText[:maxLinkLength] + '' + link_text[:MAX_LINK_LENGTH] + '' markup += '' + link_text[MAX_LINK_LENGTH:] + '' else: - markup += '' + linkText + '' - replaceDict[w] = markup + markup += '' + link_text + '' + replace_dict[wrd] = markup # do the replacements - for url, markup in replaceDict.items(): + for url, markup in replace_dict.items(): content = content.replace(url, markup) # replace any line breaks @@ -467,81 +503,72 @@ def valid_hash_tag(hashtag: str) -> bool: # long hashtags are not valid if len(hashtag) >= 32: return False - validChars = set('0123456789' + - 'abcdefghijklmnopqrstuvwxyz' + - 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + - '¡¿ÄäÀàÁáÂâÃãÅåǍǎĄąĂăÆæĀā' + - 'ÇçĆćĈĉČčĎđĐďðÈèÉéÊêËëĚěĘęĖėĒē' + - 'ĜĝĢģĞğĤĥÌìÍíÎîÏïıĪīĮįĴĵĶķ' + - 'ĹĺĻļŁłĽľĿŀÑñŃńŇňŅņÖöÒòÓóÔôÕõŐőØøŒœ' + - 'ŔŕŘřẞߌśŜŝŞşŠšȘșŤťŢţÞþȚțÜüÙùÚúÛûŰűŨũŲųŮůŪū' + - 'ŴŵÝýŸÿŶŷŹźŽžŻż') - if set(hashtag).issubset(validChars): + if set(hashtag).issubset(VALID_HASHTAG_CHARS): return True if is_valid_language(hashtag): return True return False -def _add_hash_tags(wordStr: str, http_prefix: str, domain: str, - replaceHashTags: {}, postHashtags: {}) -> bool: +def _add_hash_tags(word_str: str, http_prefix: str, domain: str, + replace_hashtags: {}, post_hashtags: {}) -> bool: """Detects hashtags and adds them to the replacements dict Also updates the hashtags list to be added to the post """ - if replaceHashTags.get(wordStr): + if replace_hashtags.get(word_str): return True - hashtag = wordStr[1:] + hashtag = word_str[1:] if not valid_hash_tag(hashtag): return False - hashtagUrl = http_prefix + "://" + domain + "/tags/" + hashtag - postHashtags[hashtag] = { - 'href': hashtagUrl, + hashtag_url = http_prefix + "://" + domain + "/tags/" + hashtag + post_hashtags[hashtag] = { + 'href': hashtag_url, 'name': '#' + hashtag, 'type': 'Hashtag' } - replaceHashTags[wordStr] = "#" + \ hashtag + "" return True -def _add_emoji(base_dir: str, wordStr: str, +def _add_emoji(base_dir: str, word_str: str, http_prefix: str, domain: str, - replaceEmoji: {}, postTags: {}, - emojiDict: {}) -> bool: + replace_emoji: {}, post_tags: {}, + emoji_dict: {}) -> bool: """Detects Emoji and adds them to the replacements dict Also updates the tags list to be added to the post """ - if not wordStr.startswith(':'): + if not word_str.startswith(':'): return False - if not wordStr.endswith(':'): + if not word_str.endswith(':'): return False - if len(wordStr) < 3: + if len(word_str) < 3: return False - if replaceEmoji.get(wordStr): + if replace_emoji.get(word_str): return True # remove leading and trailing : characters - emoji = wordStr[1:] + emoji = word_str[1:] emoji = emoji[:-1] # is the text of the emoji valid? if not valid_hash_tag(emoji): return False - if not emojiDict.get(emoji): + if not emoji_dict.get(emoji): return False - emojiFilename = base_dir + '/emoji/' + emojiDict[emoji] + '.png' - if not os.path.isfile(emojiFilename): + emoji_filename = base_dir + '/emoji/' + emoji_dict[emoji] + '.png' + if not os.path.isfile(emoji_filename): return False - emojiUrl = http_prefix + "://" + domain + \ - "/emoji/" + emojiDict[emoji] + '.png' - postTags[emoji] = { + emoji_url = http_prefix + "://" + domain + \ + "/emoji/" + emoji_dict[emoji] + '.png' + post_tags[emoji] = { 'icon': { 'mediaType': 'image/png', 'type': 'Image', - 'url': emojiUrl + 'url': emoji_url }, 'name': ':' + emoji + ':', - "updated": file_last_modified(emojiFilename), - "id": emojiUrl.replace('.png', ''), + "updated": file_last_modified(emoji_filename), + "id": emoji_url.replace('.png', ''), 'type': 'Emoji' } return True @@ -556,114 +583,115 @@ def post_tag_exists(tagType: str, tagName: str, tags: {}) -> bool: return False -def _add_mention(wordStr: str, http_prefix: str, following: str, petnames: str, - replaceMentions: {}, recipients: [], tags: {}) -> bool: +def _add_mention(word_str: str, http_prefix: str, following: str, + petnames: str, replace_mentions: {}, + recipients: [], tags: {}) -> bool: """Detects mentions and adds them to the replacements dict and recipients list """ - possibleHandle = wordStr[1:] + possible_handle = word_str[1:] # @nick - if following and '@' not in possibleHandle: + if following and '@' not in possible_handle: # fall back to a best effort match against the following list # if no domain was specified. eg. @nick - possibleNickname = possibleHandle + possible_nickname = possible_handle for follow in following: if '@' not in follow: continue - followNick = follow.split('@')[0] - if possibleNickname == followNick: - followStr = follow.replace('\n', '').replace('\r', '') - replaceDomain = followStr.split('@')[1] - recipientActor = http_prefix + "://" + \ - replaceDomain + "/@" + possibleNickname - if recipientActor not in recipients: - recipients.append(recipientActor) - tags[wordStr] = { - 'href': recipientActor, - 'name': wordStr, + follow_nick = follow.split('@')[0] + if possible_nickname == follow_nick: + follow_str = follow.replace('\n', '').replace('\r', '') + replace_domain = follow_str.split('@')[1] + recipient_actor = http_prefix + "://" + \ + replace_domain + "/@" + possible_nickname + if recipient_actor not in recipients: + recipients.append(recipient_actor) + tags[word_str] = { + 'href': recipient_actor, + 'name': word_str, 'type': 'Mention' } - replaceMentions[wordStr] = \ + replace_mentions[word_str] = \ "@" + possibleNickname + \ - "" + "://" + replace_domain + "/@" + possible_nickname + \ + "\" class=\"u-url mention\">@" + \ + possible_nickname + "" return True # try replacing petnames with mentions - followCtr = 0 + follow_ctr = 0 for follow in following: if '@' not in follow: - followCtr += 1 + follow_ctr += 1 continue - pet = petnames[followCtr].replace('\n', '') + pet = petnames[follow_ctr].replace('\n', '') if pet: - if possibleNickname == pet: - followStr = follow.replace('\n', '').replace('\r', '') - replaceNickname = followStr.split('@')[0] - replaceDomain = followStr.split('@')[1] - recipientActor = http_prefix + "://" + \ - replaceDomain + "/@" + replaceNickname - if recipientActor not in recipients: - recipients.append(recipientActor) - tags[wordStr] = { - 'href': recipientActor, - 'name': wordStr, + if possible_nickname == pet: + follow_str = follow.replace('\n', '').replace('\r', '') + replace_nickname = follow_str.split('@')[0] + replace_domain = follow_str.split('@')[1] + recipient_actor = http_prefix + "://" + \ + replace_domain + "/@" + replace_nickname + if recipient_actor not in recipients: + recipients.append(recipient_actor) + tags[word_str] = { + 'href': recipient_actor, + 'name': word_str, 'type': 'Mention' } - replaceMentions[wordStr] = \ + replace_mentions[word_str] = \ "@" + \ - replaceNickname + "" + replace_nickname + "" return True - followCtr += 1 + follow_ctr += 1 return False - possibleNickname = None - possibleDomain = None - if '@' not in possibleHandle: + possible_nickname = None + possible_domain = None + if '@' not in possible_handle: return False - possibleNickname = possibleHandle.split('@')[0] - if not possibleNickname: + possible_nickname = possible_handle.split('@')[0] + if not possible_nickname: return False - possibleDomain = \ - possibleHandle.split('@')[1].strip('\n').strip('\r') - if not possibleDomain: + possible_domain = \ + possible_handle.split('@')[1].strip('\n').strip('\r') + if not possible_domain: return False if following: for follow in following: - if follow.replace('\n', '').replace('\r', '') != possibleHandle: + if follow.replace('\n', '').replace('\r', '') != possible_handle: continue - recipientActor = http_prefix + "://" + \ - possibleDomain + "/@" + possibleNickname - if recipientActor not in recipients: - recipients.append(recipientActor) - tags[wordStr] = { - 'href': recipientActor, - 'name': wordStr, + recipient_actor = http_prefix + "://" + \ + possible_domain + "/@" + possible_nickname + if recipient_actor not in recipients: + recipients.append(recipient_actor) + tags[word_str] = { + 'href': recipient_actor, + 'name': word_str, 'type': 'Mention' } - replaceMentions[wordStr] = \ + replace_mentions[word_str] = \ "@" + possibleNickname + \ + "://" + possible_domain + "/@" + possible_nickname + \ + "\" class=\"u-url mention\">@" + possible_nickname + \ "" return True # @nick@domain - if not (possibleDomain == 'localhost' or '.' in possibleDomain): + if not (possible_domain == 'localhost' or '.' in possible_domain): return False - recipientActor = http_prefix + "://" + \ - possibleDomain + "/@" + possibleNickname - if recipientActor not in recipients: - recipients.append(recipientActor) - tags[wordStr] = { - 'href': recipientActor, - 'name': wordStr, + recipient_actor = http_prefix + "://" + \ + possible_domain + "/@" + possible_nickname + if recipient_actor not in recipients: + recipients.append(recipient_actor) + tags[word_str] = { + 'href': recipient_actor, + 'name': word_str, 'type': 'Mention' } - replaceMentions[wordStr] = \ + replace_mentions[word_str] = \ "@" + possibleNickname + \ + "://" + possible_domain + "/@" + possible_nickname + \ + "\" class=\"u-url mention\">@" + possible_nickname + \ "" return True @@ -688,9 +716,7 @@ def remove_text_formatting(content: str) -> str: return content if '<' not in content: return content - removeMarkup = ('b', 'i', 'ul', 'ol', 'li', 'em', 'strong', - 'blockquote', 'h1', 'h2', 'h3', 'h4', 'h5') - for markup in removeMarkup: + for markup in REMOVE_MARKUP: content = content.replace('<' + markup + '>', '') content = content.replace('', '') content = content.replace('<' + markup.upper() + '>', '') @@ -698,8 +724,8 @@ def remove_text_formatting(content: str) -> str: return content -def remove_long_words(content: str, maxWordLength: int, - longWordsList: []) -> str: +def remove_long_words(content: str, max_word_length: int, + long_words_list: []) -> str: """Breaks up long words so that on mobile screens this doesn't disrupt the layout """ @@ -708,72 +734,72 @@ def remove_long_words(content: str, maxWordLength: int, content = replace_content_duplicates(content) if ' ' not in content: # handle a single very long string with no spaces - contentStr = content.replace('

', '').replace(r'<\p>', '') - if '://' not in contentStr: - if len(contentStr) > maxWordLength: + content_str = content.replace('

', '').replace(r'<\p>', '') + if '://' not in content_str: + if len(content_str) > max_word_length: if '

' in content: - content = '

' + contentStr[:maxWordLength] + r'<\p>' + content = '

' + content_str[:max_word_length] + r'<\p>' else: - content = content[:maxWordLength] + content = content[:max_word_length] return content words = content.split(' ') - if not longWordsList: - longWordsList = [] - for wordStr in words: - if len(wordStr) > maxWordLength: - if wordStr not in longWordsList: - longWordsList.append(wordStr) - for wordStr in longWordsList: - if wordStr.startswith('

'): - wordStr = wordStr.replace('

', '') - if wordStr.startswith('<'): + if not long_words_list: + long_words_list = [] + for word_str in words: + if len(word_str) > max_word_length: + if word_str not in long_words_list: + long_words_list.append(word_str) + for word_str in long_words_list: + if word_str.startswith('

'): + word_str = word_str.replace('

', '') + if word_str.startswith('<'): continue - if len(wordStr) == 76: - if wordStr.upper() == wordStr: + if len(word_str) == 76: + if word_str.upper() == word_str: # tox address continue - if '=\"' in wordStr: + if '=\"' in word_str: continue - if '@' in wordStr: - if '@@' not in wordStr: + if '@' in word_str: + if '@@' not in word_str: continue - if '=.ed25519' in wordStr: + if '=.ed25519' in word_str: continue - if '.onion' in wordStr: + if '.onion' in word_str: continue - if '.i2p' in wordStr: + if '.i2p' in word_str: continue - if 'https:' in wordStr: + if 'https:' in word_str: continue - elif 'http:' in wordStr: + elif 'http:' in word_str: continue - elif 'i2p:' in wordStr: + elif 'i2p:' in word_str: continue - elif 'gnunet:' in wordStr: + elif 'gnunet:' in word_str: continue - elif 'dat:' in wordStr: + elif 'dat:' in word_str: continue - elif 'rad:' in wordStr: + elif 'rad:' in word_str: continue - elif 'hyper:' in wordStr: + elif 'hyper:' in word_str: continue - elif 'briar:' in wordStr: + elif 'briar:' in word_str: continue - if '<' in wordStr: - replaceWord = wordStr.split('<', 1)[0] - # if len(replaceWord) > maxWordLength: - # replaceWord = replaceWord[:maxWordLength] - content = content.replace(wordStr, replaceWord) - wordStr = replaceWord - if '/' in wordStr: + if '<' in word_str: + replace_word = word_str.split('<', 1)[0] + # if len(replace_word) > max_word_length: + # replace_word = replace_word[:max_word_length] + content = content.replace(word_str, replace_word) + word_str = replace_word + if '/' in word_str: continue - if len(wordStr[maxWordLength:]) < maxWordLength: - content = content.replace(wordStr, - wordStr[:maxWordLength] + '\n' + - wordStr[maxWordLength:]) + if len(word_str[max_word_length:]) < max_word_length: + content = content.replace(word_str, + word_str[:max_word_length] + '\n' + + word_str[max_word_length:]) else: - content = content.replace(wordStr, - wordStr[:maxWordLength]) + content = content.replace(word_str, + word_str[:max_word_length]) if content.startswith('

'): if not content.endswith('

'): content = content.strip() + '

' @@ -788,273 +814,273 @@ def _load_auto_tags(base_dir: str, nickname: str, domain: str) -> []: if not os.path.isfile(filename): return [] try: - with open(filename, 'r') as f: - return f.readlines() + with open(filename, 'r') as tags_file: + return tags_file.readlines() except OSError: print('EX: unable to read auto tags ' + filename) return [] def _auto_tag(base_dir: str, nickname: str, domain: str, - wordStr: str, autoTagList: [], - appendTags: []): + word_str: str, auto_tag_list: [], + append_tags: []): """Generates a list of tags to be automatically appended to the content """ - for tagRule in autoTagList: - if wordStr not in tagRule: + for tag_rule in auto_tag_list: + if word_str not in tag_rule: continue - if '->' not in tagRule: + if '->' not in tag_rule: continue - rulematch = tagRule.split('->')[0].strip() - if rulematch != wordStr: + rulematch = tag_rule.split('->')[0].strip() + if rulematch != word_str: continue - tagName = tagRule.split('->')[1].strip() - if tagName.startswith('#'): - if tagName not in appendTags: - appendTags.append(tagName) + tag_name = tag_rule.split('->')[1].strip() + if tag_name.startswith('#'): + if tag_name not in append_tags: + append_tags.append(tag_name) else: - if '#' + tagName not in appendTags: - appendTags.append('#' + tagName) + if '#' + tag_name not in append_tags: + append_tags.append('#' + tag_name) def add_html_tags(base_dir: str, http_prefix: str, nickname: str, domain: str, content: str, recipients: [], hashtags: {}, - isJsonContent: bool = False) -> str: + is_json_content: bool = False) -> str: """ Replaces plaintext mentions such as @nick@domain into html by matching against known following accounts """ if content.startswith('

'): content = html_replace_email_quote(content) return html_replace_quote_marks(content) - maxWordLength = 40 + max_word_length = 40 content = content.replace('\r', '') content = content.replace('\n', ' --linebreak-- ') content = _add_music_tag(content, 'nowplaying') - contentSimplified = \ + content_simplified = \ content.replace(',', ' ').replace(';', ' ').replace('- ', ' ') - contentSimplified = contentSimplified.replace('. ', ' ').strip() - if contentSimplified.endswith('.'): - contentSimplified = contentSimplified[:len(contentSimplified)-1] - words = contentSimplified.split(' ') + content_simplified = content_simplified.replace('. ', ' ').strip() + if content_simplified.endswith('.'): + content_simplified = content_simplified[:len(content_simplified)-1] + words = content_simplified.split(' ') # remove . for words which are not mentions - newWords = [] - for wordIndex in range(0, len(words)): - wordStr = words[wordIndex] - if wordStr.endswith('.'): - if not wordStr.startswith('@'): - wordStr = wordStr[:-1] - if wordStr.startswith('.'): - wordStr = wordStr[1:] - newWords.append(wordStr) - words = newWords + new_words = [] + for word_index in range(0, len(words)): + word_str = words[word_index] + if word_str.endswith('.'): + if not word_str.startswith('@'): + word_str = word_str[:-1] + if word_str.startswith('.'): + word_str = word_str[1:] + new_words.append(word_str) + words = new_words - replaceMentions = {} - replaceHashTags = {} - replaceEmoji = {} - emojiDict = {} - originalDomain = domain + replace_mentions = {} + replace_hashtags = {} + replace_emoji = {} + emoji_dict = {} + original_domain = domain domain = remove_domain_port(domain) - followingFilename = acct_dir(base_dir, nickname, domain) + '/following.txt' + following_filename = \ + acct_dir(base_dir, nickname, domain) + '/following.txt' # read the following list so that we can detect just @nick # in addition to @nick@domain following = None petnames = None if '@' in words: - if os.path.isfile(followingFilename): + if os.path.isfile(following_filename): following = [] try: - with open(followingFilename, 'r') as f: - following = f.readlines() + with open(following_filename, 'r') as foll_file: + following = foll_file.readlines() except OSError: - print('EX: unable to read ' + followingFilename) + print('EX: unable to read ' + following_filename) for handle in following: pet = get_pet_name(base_dir, nickname, domain, handle) if pet: petnames.append(pet + '\n') # extract mentions and tags from words - longWordsList = [] - prevWordStr = '' - autoTagsList = _load_auto_tags(base_dir, nickname, domain) - appendTags = [] - for wordStr in words: - wordLen = len(wordStr) - if wordLen > 2: - if wordLen > maxWordLength: - longWordsList.append(wordStr) - firstChar = wordStr[0] - if firstChar == '@': - if _add_mention(wordStr, http_prefix, following, petnames, - replaceMentions, recipients, hashtags): - prevWordStr = '' + long_words_list = [] + prev_word_str = '' + auto_tags_list = _load_auto_tags(base_dir, nickname, domain) + append_tags = [] + for word_str in words: + word_len = len(word_str) + if word_len > 2: + if word_len > max_word_length: + long_words_list.append(word_str) + first_char = word_str[0] + if first_char == '@': + if _add_mention(word_str, http_prefix, following, petnames, + replace_mentions, recipients, hashtags): + prev_word_str = '' continue - elif firstChar == '#': + elif first_char == '#': # remove any endings from the hashtag - hashTagEndings = ('.', ':', ';', '-', '\n') - for ending in hashTagEndings: - if wordStr.endswith(ending): - wordStr = wordStr[:len(wordStr) - 1] + hash_tag_endings = ('.', ':', ';', '-', '\n') + for ending in hash_tag_endings: + if word_str.endswith(ending): + word_str = word_str[:len(word_str) - 1] break - if _add_hash_tags(wordStr, http_prefix, originalDomain, - replaceHashTags, hashtags): - prevWordStr = '' + if _add_hash_tags(word_str, http_prefix, original_domain, + replace_hashtags, hashtags): + prev_word_str = '' continue - elif ':' in wordStr: - wordStr2 = wordStr.split(':')[1] -# print('TAG: emoji located - ' + wordStr) - if not emojiDict: + elif ':' in word_str: + word_str2 = word_str.split(':')[1] +# print('TAG: emoji located - ' + word_str) + if not emoji_dict: # emoji.json is generated so that it can be customized and # the changes will be retained even if default_emoji.json # is subsequently updated if not os.path.isfile(base_dir + '/emoji/emoji.json'): copyfile(base_dir + '/emoji/default_emoji.json', base_dir + '/emoji/emoji.json') - emojiDict = load_json(base_dir + '/emoji/emoji.json') + emoji_dict = load_json(base_dir + '/emoji/emoji.json') # append custom emoji to the dict if os.path.isfile(base_dir + '/emojicustom/emoji.json'): - customEmojiDict = \ + custom_emoji_dict = \ load_json(base_dir + '/emojicustom/emoji.json') - if customEmojiDict: - emojiDict = dict(emojiDict, **customEmojiDict) + if custom_emoji_dict: + emoji_dict = dict(emoji_dict, **custom_emoji_dict) -# print('TAG: looking up emoji for :' + wordStr2 + ':') - _add_emoji(base_dir, ':' + wordStr2 + ':', http_prefix, - originalDomain, replaceEmoji, hashtags, - emojiDict) +# print('TAG: looking up emoji for :' + word_str2 + ':') + _add_emoji(base_dir, ':' + word_str2 + ':', http_prefix, + original_domain, replace_emoji, hashtags, + emoji_dict) else: - if _auto_tag(base_dir, nickname, domain, wordStr, - autoTagsList, appendTags): - prevWordStr = '' + if _auto_tag(base_dir, nickname, domain, word_str, + auto_tags_list, append_tags): + prev_word_str = '' continue - if prevWordStr: + if prev_word_str: if _auto_tag(base_dir, nickname, domain, - prevWordStr + ' ' + wordStr, - autoTagsList, appendTags): - prevWordStr = '' + prev_word_str + ' ' + word_str, + auto_tags_list, append_tags): + prev_word_str = '' continue - prevWordStr = wordStr + prev_word_str = word_str # add any auto generated tags - for appended in appendTags: + for appended in append_tags: content = content + ' ' + appended - _add_hash_tags(appended, http_prefix, originalDomain, - replaceHashTags, hashtags) + _add_hash_tags(appended, http_prefix, original_domain, + replace_hashtags, hashtags) # replace words with their html versions - for wordStr, replaceStr in replaceMentions.items(): - content = content.replace(wordStr, replaceStr) - for wordStr, replaceStr in replaceHashTags.items(): - content = content.replace(wordStr, replaceStr) - if not isJsonContent: - for wordStr, replaceStr in replaceEmoji.items(): - content = content.replace(wordStr, replaceStr) + for word_str, replace_str in replace_mentions.items(): + content = content.replace(word_str, replace_str) + for word_str, replace_str in replace_hashtags.items(): + content = content.replace(word_str, replace_str) + if not is_json_content: + for word_str, replace_str in replace_emoji.items(): + content = content.replace(word_str, replace_str) content = add_web_links(content) - if longWordsList: - content = remove_long_words(content, maxWordLength, longWordsList) + if long_words_list: + content = remove_long_words(content, max_word_length, long_words_list) content = limit_repeated_words(content, 6) content = content.replace(' --linebreak-- ', '

') content = html_replace_email_quote(content) return '

' + html_replace_quote_marks(content) + '

' -def get_mentions_from_html(htmlText: str, - matchStr=" []: +def get_mentions_from_html(html_text: str, match_str: str) -> []: """Extracts mentioned actors from the given html content string """ mentions = [] - if matchStr not in htmlText: + if match_str not in html_text: return mentions - mentionsList = htmlText.split(matchStr) - for mentionStr in mentionsList: - if '"' not in mentionStr: + mentions_list = html_text.split(match_str) + for mention_str in mentions_list: + if '"' not in mention_str: continue - actorStr = mentionStr.split('"')[0] - if actorStr.startswith('http') or \ - actorStr.startswith('gnunet') or \ - actorStr.startswith('i2p') or \ - actorStr.startswith('hyper') or \ - actorStr.startswith('dat:'): - if actorStr not in mentions: - mentions.append(actorStr) + actor_str = mention_str.split('"')[0] + if actor_str.startswith('http') or \ + actor_str.startswith('gnunet') or \ + actor_str.startswith('i2p') or \ + actor_str.startswith('hyper') or \ + actor_str.startswith('dat:'): + if actor_str not in mentions: + mentions.append(actor_str) return mentions -def extract_media_in_form_post(postBytes, boundary, name: str): +def extract_media_in_form_post(post_bytes, boundary, name: str): """Extracts the binary encoding for image/video/audio within a http form POST Returns the media bytes and the remaining bytes """ - imageStartBoundary = b'Content-Disposition: form-data; name="' + \ + image_start_boundary = b'Content-Disposition: form-data; name="' + \ name.encode('utf8', 'ignore') + b'";' - imageStartLocation = postBytes.find(imageStartBoundary) - if imageStartLocation == -1: - return None, postBytes + image_start_location = post_bytes.find(image_start_boundary) + if image_start_location == -1: + return None, post_bytes # bytes after the start boundary appears - mediaBytes = postBytes[imageStartLocation:] + media_bytes = post_bytes[image_start_location:] # look for the next boundary - imageEndBoundary = boundary.encode('utf8', 'ignore') - imageEndLocation = mediaBytes.find(imageEndBoundary) - if imageEndLocation == -1: + image_end_boundary = boundary.encode('utf8', 'ignore') + image_end_location = media_bytes.find(image_end_boundary) + if image_end_location == -1: # no ending boundary - return mediaBytes, postBytes[:imageStartLocation] + return media_bytes, post_bytes[:image_start_location] # remaining bytes after the end of the image - remainder = mediaBytes[imageEndLocation:] + remainder = media_bytes[image_end_location:] # remove bytes after the end boundary - mediaBytes = mediaBytes[:imageEndLocation] + media_bytes = media_bytes[:image_end_location] # return the media and the before+after bytes - return mediaBytes, postBytes[:imageStartLocation] + remainder + return media_bytes, post_bytes[:image_start_location] + remainder -def save_media_in_form_post(mediaBytes, debug: bool, - filenameBase: str = None) -> (str, str): +def save_media_in_form_post(media_bytes, debug: bool, + filename_base: str = None) -> (str, str): """Saves the given media bytes extracted from http form POST Returns the filename and attachment type """ - if not mediaBytes: - if filenameBase: + if not media_bytes: + if filename_base: # remove any existing files - extensionTypes = get_image_extensions() - for ex in extensionTypes: - possibleOtherFormat = filenameBase + '.' + ex - if os.path.isfile(possibleOtherFormat): + extension_types = get_image_extensions() + for ex in extension_types: + possible_other_format = filename_base + '.' + ex + if os.path.isfile(possible_other_format): try: - os.remove(possibleOtherFormat) + os.remove(possible_other_format) except OSError: if debug: print('EX: save_media_in_form_post ' + 'unable to delete other ' + - str(possibleOtherFormat)) - if os.path.isfile(filenameBase): + str(possible_other_format)) + if os.path.isfile(filename_base): try: - os.remove(filenameBase) + os.remove(filename_base) except OSError: if debug: print('EX: save_media_in_form_post ' + 'unable to delete ' + - str(filenameBase)) + str(filename_base)) if debug: print('DEBUG: No media found within POST') return None, None - mediaLocation = -1 - searchStr = '' + media_location = -1 + search_str = '' filename = None # directly search the binary array for the beginning # of an image - extensionList = { + extension_list = { 'png': 'image/png', 'jpeg': 'image/jpeg', 'gif': 'image/gif', @@ -1068,21 +1094,21 @@ def save_media_in_form_post(mediaBytes, debug: bool, 'flac': 'audio/flac', 'zip': 'application/zip' } - detectedExtension = None - for extension, content_type in extensionList.items(): - searchStr = b'Content-Type: ' + content_type.encode('utf8', 'ignore') - mediaLocation = mediaBytes.find(searchStr) - if mediaLocation > -1: + detected_extension = None + for extension, content_type in extension_list.items(): + search_str = b'Content-Type: ' + content_type.encode('utf8', 'ignore') + media_location = media_bytes.find(search_str) + if media_location > -1: # image/video/audio binaries if extension == 'jpeg': extension = 'jpg' elif extension == 'mpeg': extension = 'mp3' - if filenameBase: - filename = filenameBase + '.' + extension - attachmentMediaType = \ - searchStr.decode().split('/')[0].replace('Content-Type: ', '') - detectedExtension = extension + if filename_base: + filename = filename_base + '.' + extension + attachment_media_type = \ + search_str.decode().split('/')[0].replace('Content-Type: ', '') + detected_extension = extension break if not filename: @@ -1090,42 +1116,42 @@ def save_media_in_form_post(mediaBytes, debug: bool, # locate the beginning of the image, after any # carriage returns - startPos = mediaLocation + len(searchStr) + start_pos = media_location + len(search_str) for offset in range(1, 8): - if mediaBytes[startPos+offset] != 10: - if mediaBytes[startPos+offset] != 13: - startPos += offset + if media_bytes[start_pos+offset] != 10: + if media_bytes[start_pos+offset] != 13: + start_pos += offset break # remove any existing image files with a different format - if detectedExtension != 'zip': - extensionTypes = get_image_extensions() - for ex in extensionTypes: - if ex == detectedExtension: + if detected_extension != 'zip': + extension_types = get_image_extensions() + for ex in extension_types: + if ex == detected_extension: continue - possibleOtherFormat = \ + possible_other_format = \ filename.replace('.temp', '').replace('.' + - detectedExtension, '.' + + detected_extension, '.' + ex) - if os.path.isfile(possibleOtherFormat): + if os.path.isfile(possible_other_format): try: - os.remove(possibleOtherFormat) + os.remove(possible_other_format) except OSError: if debug: print('EX: save_media_in_form_post ' + 'unable to delete other 2 ' + - str(possibleOtherFormat)) + str(possible_other_format)) # don't allow scripts within svg files - if detectedExtension == 'svg': - svgStr = mediaBytes[startPos:] - svgStr = svgStr.decode() - if dangerous_svg(svgStr, False): + if detected_extension == 'svg': + svg_str = media_bytes[start_pos:] + svg_str = svg_str.decode() + if dangerous_svg(svg_str, False): return None, None try: - with open(filename, 'wb') as fp: - fp.write(mediaBytes[startPos:]) + with open(filename, 'wb') as fp_media: + fp_media.write(media_bytes[start_pos:]) except OSError: print('EX: unable to write media') @@ -1134,84 +1160,84 @@ def save_media_in_form_post(mediaBytes, debug: bool, return None, None print('Uploaded media file written: ' + filename) - return filename, attachmentMediaType + return filename, attachment_media_type -def extract_text_fields_in_post(postBytes, boundary: str, debug: bool, +def extract_text_fields_in_post(post_bytes, boundary: str, debug: bool, unit_testData: str = None) -> {}: """Returns a dictionary containing the text fields of a http form POST The boundary argument comes from the http header """ if not unit_testData: - msgBytes = email.parser.BytesParser().parsebytes(postBytes) - messageFields = msgBytes.get_payload(decode=True).decode('utf-8') + msg_bytes = email.parser.BytesParser().parsebytes(post_bytes) + message_fields = msg_bytes.get_payload(decode=True).decode('utf-8') else: - messageFields = unit_testData + message_fields = unit_testData if debug: - print('DEBUG: POST arriving ' + messageFields) + print('DEBUG: POST arriving ' + message_fields) - messageFields = messageFields.split(boundary) + message_fields = message_fields.split(boundary) fields = {} - fieldsWithSemicolonAllowed = ( + fields_with_semicolon_allowed = ( 'message', 'bio', 'autoCW', 'password', 'passwordconfirm', 'instanceDescription', 'instanceDescriptionShort', 'subject', 'location', 'imageDescription' ) # examine each section of the POST, separated by the boundary - for f in messageFields: - if f == '--': + for fld in message_fields: + if fld == '--': continue - if ' name="' not in f: + if ' name="' not in fld: continue - postStr = f.split(' name="', 1)[1] - if '"' not in postStr: + post_str = fld.split(' name="', 1)[1] + if '"' not in post_str: continue - postKey = postStr.split('"', 1)[0] - postValueStr = postStr.split('"', 1)[1] - if ';' in postValueStr: - if postKey not in fieldsWithSemicolonAllowed and \ - not postKey.startswith('edited'): + post_key = post_str.split('"', 1)[0] + post_value_str = post_str.split('"', 1)[1] + if ';' in post_value_str: + if post_key not in fields_with_semicolon_allowed and \ + not post_key.startswith('edited'): continue - if '\r\n' not in postValueStr: + if '\r\n' not in post_value_str: continue - postLines = postValueStr.split('\r\n') - postValue = '' - if len(postLines) > 2: - for line in range(2, len(postLines)-1): + post_lines = post_value_str.split('\r\n') + post_value = '' + if len(post_lines) > 2: + for line in range(2, len(post_lines)-1): if line > 2: - postValue += '\n' - postValue += postLines[line] - fields[postKey] = urllib.parse.unquote(postValue) + post_value += '\n' + post_value += post_lines[line] + fields[post_key] = urllib.parse.unquote(post_value) return fields -def limit_repeated_words(text: str, maxRepeats: int) -> str: +def limit_repeated_words(text: str, max_repeats: int) -> str: """Removes words which are repeated many times """ words = text.replace('\n', ' ').split(' ') - repeatCtr = 0 - repeatedText = '' + repeat_ctr = 0 + repeated_text = '' replacements = {} - prevWord = '' + prev_word = '' for word in words: - if word == prevWord: - repeatCtr += 1 - if repeatedText: - repeatedText += ' ' + word + if word == prev_word: + repeat_ctr += 1 + if repeated_text: + repeated_text += ' ' + word else: - repeatedText = word + ' ' + word + repeated_text = word + ' ' + word else: - if repeatCtr > maxRepeats: - newText = ((prevWord + ' ') * maxRepeats).strip() - replacements[prevWord] = [repeatedText, newText] - repeatCtr = 0 - repeatedText = '' - prevWord = word + if repeat_ctr > max_repeats: + new_text = ((prev_word + ' ') * max_repeats).strip() + replacements[prev_word] = [repeated_text, new_text] + repeat_ctr = 0 + repeated_text = '' + prev_word = word - if repeatCtr > maxRepeats: - newText = ((prevWord + ' ') * maxRepeats).strip() - replacements[prevWord] = [repeatedText, newText] + if repeat_ctr > max_repeats: + new_text = ((prev_word + ' ') * max_repeats).strip() + replacements[prev_word] = [repeated_text, new_text] for word, item in replacements.items(): text = text.replace(item[0], item[1]) @@ -1241,68 +1267,56 @@ def _words_similarity_histogram(words: []) -> {}: """ histogram = {} for index in range(1, len(words)): - combinedWords = words[index - 1] + words[index] - if histogram.get(combinedWords): - histogram[combinedWords] += 1 + combined_words = words[index - 1] + words[index] + if histogram.get(combined_words): + histogram[combined_words] += 1 else: - histogram[combinedWords] = 1 + histogram[combined_words] = 1 return histogram def _words_similarity_words_list(content: str) -> []: """Returns a list of words for the given content """ - removePunctuation = ('.', ',', ';', '-', ':', '"') + remove_punctuation = ('.', ',', ';', '-', ':', '"') content = remove_html(content).lower() - for p in removePunctuation: - content = content.replace(p, ' ') + for punc in remove_punctuation: + content = content.replace(punc, ' ') content = content.replace(' ', ' ') return content.split(' ') -def words_similarity(content1: str, content2: str, minWords: int) -> int: +def words_similarity(content1: str, content2: str, min_words: int) -> int: """Returns percentage similarity """ if content1 == content2: return 100 words1 = _words_similarity_words_list(content1) - if len(words1) < minWords: + if len(words1) < min_words: return 0 words2 = _words_similarity_words_list(content2) - if len(words2) < minWords: + if len(words2) < min_words: return 0 histogram1 = _words_similarity_histogram(words1) histogram2 = _words_similarity_histogram(words2) diff = 0 - for combinedWords, hits in histogram1.items(): - if not histogram2.get(combinedWords): + for combined_words, _ in histogram1.items(): + if not histogram2.get(combined_words): diff += 1 else: - diff += abs(histogram2[combinedWords] - histogram1[combinedWords]) + diff += \ + abs(histogram2[combined_words] - histogram1[combined_words]) return 100 - int(diff * 100 / len(histogram1.items())) def contains_invalid_local_links(content: str) -> bool: """Returns true if the given content has invalid links """ - invalidStrings = ( - 'mute', 'unmute', 'editeventpost', 'notifypost', - 'delete', 'options', 'page', 'repeat', - 'bm', 'tl', 'actor', 'unrepeat', 'eventid', - 'unannounce', 'like', 'unlike', 'bookmark', - 'unbookmark', 'likedBy', 'time', - 'year', 'month', 'day', 'editnewpost', - 'graph', 'showshare', 'category', 'showwanted', - 'rmshare', 'rmwanted', 'repeatprivate', - 'unrepeatprivate', 'replyto', - 'replyfollowers', 'replydm', 'editblogpost', - 'handle', 'blockdomain' - ) - for invStr in invalidStrings: - if '?' + invStr + '=' in content: + for inv_str in INVALID_CONTENT_STRINGS: + if '?' + inv_str + '=' in content: return True return False diff --git a/tests.py b/tests.py index 3e16e1c71..3ed8e9b29 100644 --- a/tests.py +++ b/tests.py @@ -142,7 +142,7 @@ from content import add_html_tags from content import remove_long_words from content import replace_content_duplicates from content import remove_text_formatting -from content import remove_htmlTag +from content import remove_html_tag from theme import update_default_themes_list from theme import set_cs_sparam from theme import scan_themes_for_scripts @@ -4015,7 +4015,7 @@ def _test_strip_html_tag(): print('testRemoveHtmlTag') testStr = "

" - resultStr = remove_htmlTag(testStr, 'width') + resultStr = remove_html_tag(testStr, 'width') assert resultStr == "

" diff --git a/webapp_post.py b/webapp_post.py index b4af4b096..73b93efc4 100644 --- a/webapp_post.py +++ b/webapp_post.py @@ -412,7 +412,9 @@ def _get_reply_icon_html(base_dir: str, nickname: str, domain: str, '?mention=' + post_json_object['object']['attributedTo'] content = get_base_content_from_post(post_json_object, system_language) if content: - mentionedActors = get_mentions_from_html(content) + mentionedActors = \ + get_mentions_from_html(content, + "