mirror of https://gitlab.com/bashrc2/epicyon
				
				
				
			Unit test for safe html
							parent
							
								
									bdf1c77408
								
							
						
					
					
						commit
						9a0185ef3c
					
				
							
								
								
									
										16
									
								
								content.py
								
								
								
								
							
							
						
						
									
										16
									
								
								content.py
								
								
								
								
							|  | @ -486,6 +486,22 @@ def add_web_links(content: str) -> str: | |||
|     return content | ||||
| 
 | ||||
| 
 | ||||
| def safe_web_text(arbitrary_html: str) -> str: | ||||
|     """Turns arbitrary html into something safe. | ||||
|     So if the arbitrary html contains attack scripts those will be removed | ||||
|     """ | ||||
|     # first remove the markup, so that we have something safe | ||||
|     safe_text = remove_html(arbitrary_html) | ||||
|     if not safe_text: | ||||
|         return '' | ||||
|     # remove any spurious characters found in podcast descriptions | ||||
|     remove_chars = ('Œ', 'â€', 'ğŸ', '<EFBFBD>', ']]') | ||||
|     for remchar in remove_chars: | ||||
|         safe_text = safe_text.replace(remchar, '') | ||||
|     # recreate any url links safely | ||||
|     return add_web_links(safe_text) | ||||
| 
 | ||||
| 
 | ||||
| def _add_hash_tags(word_str: str, http_prefix: str, domain: str, | ||||
|                    replace_hashtags: {}, post_hashtags: {}) -> bool: | ||||
|     """Detects hashtags and adds them to the replacements dict | ||||
|  |  | |||
							
								
								
									
										26
									
								
								tests.py
								
								
								
								
							
							
						
						
									
										26
									
								
								tests.py
								
								
								
								
							|  | @ -128,6 +128,7 @@ from inbox import json_post_allows_comments | |||
| from inbox import valid_inbox | ||||
| from inbox import valid_inbox_filenames | ||||
| from categories import guess_hashtag_category | ||||
| from content import safe_web_text | ||||
| from content import words_similarity | ||||
| from content import get_price_from_string | ||||
| from content import limit_repeated_words | ||||
|  | @ -6488,6 +6489,30 @@ def _test_get_link_from_rss_item() -> None: | |||
|     assert link.startswith('https://test.link/creativecommons') | ||||
| 
 | ||||
| 
 | ||||
| def _test_safe_webtext() -> None: | ||||
|     print('test_safe_webtext') | ||||
|     web_text = '<p>Some text including a link https://some.site/some-path</p>' | ||||
|     expected_text = 'Some text including a link ' + \ | ||||
|         '<a href="https://some.site/some-path"' | ||||
|     safe_text = safe_web_text(web_text) | ||||
|     if expected_text not in safe_text: | ||||
|         print('Original html: ' + web_text) | ||||
|         print('Expected html: ' + expected_text) | ||||
|         print('Actual html: ' + safe_text) | ||||
|     assert expected_text in safe_text | ||||
|     assert '<p>' not in safe_text | ||||
|     assert '</p>' not in safe_text | ||||
| 
 | ||||
|     web_text = 'Some text with <script>some script</script>' | ||||
|     expected_text = 'Some text with some script' | ||||
|     safe_text = safe_web_text(web_text) | ||||
|     if expected_text != safe_text: | ||||
|         print('Original html: ' + web_text) | ||||
|         print('Expected html: ' + expected_text) | ||||
|         print('Actual html: ' + safe_text) | ||||
|     assert expected_text == safe_text | ||||
| 
 | ||||
| 
 | ||||
| def run_all_tests(): | ||||
|     base_dir = os.getcwd() | ||||
|     print('Running tests...') | ||||
|  | @ -6504,6 +6529,7 @@ def run_all_tests(): | |||
|                             'message_json', 'liked_post_json']) | ||||
|     _test_checkbox_names() | ||||
|     _test_functions() | ||||
|     _test_safe_webtext() | ||||
|     _test_get_link_from_rss_item() | ||||
|     _test_xml_podcast_dict() | ||||
|     _test_get_actor_from_in_reply_to() | ||||
|  |  | |||
|  | @ -14,7 +14,7 @@ from shutil import copyfile | |||
| from utils import get_config_param | ||||
| from utils import remove_html | ||||
| from media import path_is_audio | ||||
| from content import add_web_links | ||||
| from content import safe_web_text | ||||
| from webapp_utils import get_broken_link_substitute | ||||
| from webapp_utils import html_header_with_external_style | ||||
| from webapp_utils import html_footer | ||||
|  | @ -210,15 +210,8 @@ def html_podcast_episode(css_cache: {}, translate: {}, | |||
|     if newswire_item[4]: | ||||
|         podcast_description = \ | ||||
|             html.unescape(urllib.parse.unquote_plus(newswire_item[4])) | ||||
|         # Why remove html? Potentially podcast descriptions could contain | ||||
|         # arbitrary html with attack scripts, etc | ||||
|         podcast_description = remove_html(podcast_description) | ||||
|         podcast_description = safe_web_text(podcast_description) | ||||
|         if podcast_description: | ||||
|             remove_chars = ('Œ', 'â€', 'ğŸ', '<EFBFBD>', ']]') | ||||
|             for remchar in remove_chars: | ||||
|                 podcast_description = podcast_description.replace(remchar, '') | ||||
|             # recreate any url links safely | ||||
|             podcast_description = add_web_links(podcast_description) | ||||
|             podcast_str += '<p>' + podcast_description + '</p>\n' | ||||
| 
 | ||||
|     # donate button | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue