mirror of https://gitlab.com/bashrc2/epicyon
Tidying
parent
4308f7501a
commit
32a08bddca
38
newswire.py
38
newswire.py
|
@ -116,7 +116,7 @@ def get_newswire_tags(text: str, max_tags: int) -> []:
|
|||
return tags
|
||||
|
||||
|
||||
def limit_word_lengths(text: str, maxWordLength: int) -> str:
|
||||
def limit_word_lengths(text: str, max_word_length: int) -> str:
|
||||
"""Limits the maximum length of words so that the newswire
|
||||
column cannot become too wide
|
||||
"""
|
||||
|
@ -125,8 +125,8 @@ def limit_word_lengths(text: str, maxWordLength: int) -> str:
|
|||
words = text.split(' ')
|
||||
result = ''
|
||||
for wrd in words:
|
||||
if len(wrd) > maxWordLength:
|
||||
wrd = wrd[:maxWordLength]
|
||||
if len(wrd) > max_word_length:
|
||||
wrd = wrd[:max_word_length]
|
||||
if result:
|
||||
result += ' '
|
||||
result += wrd
|
||||
|
@ -381,12 +381,12 @@ def load_hashtag_categories(base_dir: str, language: str) -> None:
|
|||
|
||||
|
||||
def _xml2str_to_hashtag_categories(base_dir: str, xml_str: str,
|
||||
max_categories_feedItem_size_kb: int,
|
||||
max_categories_feed_item_size_kb: int,
|
||||
force: bool = False) -> None:
|
||||
"""Updates hashtag categories based upon an rss feed
|
||||
"""
|
||||
rss_items = xml_str.split('<item>')
|
||||
max_bytes = max_categories_feedItem_size_kb * 1024
|
||||
max_bytes = max_categories_feed_item_size_kb * 1024
|
||||
for rss_item in rss_items:
|
||||
if not rss_item:
|
||||
continue
|
||||
|
@ -488,7 +488,7 @@ def _valid_podcast_entry(base_dir: str, key: str, entry: {}) -> bool:
|
|||
post_url = entry['text']
|
||||
if '://' not in post_url:
|
||||
return False
|
||||
post_domain, post_port = get_domain_from_actor(post_url)
|
||||
post_domain, _ = get_domain_from_actor(post_url)
|
||||
if not post_domain:
|
||||
return False
|
||||
if is_blocked_domain(base_dir, post_domain):
|
||||
|
@ -720,7 +720,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
moderated: bool, mirrored: bool,
|
||||
max_posts_per_source: int,
|
||||
max_feed_item_size_kb: int,
|
||||
max_categories_feedItem_size_kb: int,
|
||||
max_categories_feed_item_size_kb: int,
|
||||
session, debug: bool,
|
||||
preferred_podcast_formats: []) -> {}:
|
||||
"""Converts an xml RSS 2.0 string to a dictionary
|
||||
|
@ -732,7 +732,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
# is this an rss feed containing hashtag categories?
|
||||
if '<title>#categories</title>' in xml_str:
|
||||
_xml2str_to_hashtag_categories(base_dir, xml_str,
|
||||
max_categories_feedItem_size_kb)
|
||||
max_categories_feed_item_size_kb)
|
||||
return {}
|
||||
|
||||
rss_items = xml_str.split('<item>')
|
||||
|
@ -824,7 +824,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
moderated: bool, mirrored: bool,
|
||||
max_posts_per_source: int,
|
||||
max_feed_item_size_kb: int,
|
||||
max_categories_feedItem_size_kb: int,
|
||||
max_categories_feed_item_size_kb: int,
|
||||
session, debug: bool,
|
||||
preferred_podcast_formats: []) -> {}:
|
||||
"""Converts an xml RSS 1.0 string to a dictionary
|
||||
|
@ -838,7 +838,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
# is this an rss feed containing hashtag categories?
|
||||
if '<title>#categories</title>' in xml_str:
|
||||
_xml2str_to_hashtag_categories(base_dir, xml_str,
|
||||
max_categories_feedItem_size_kb)
|
||||
max_categories_feed_item_size_kb)
|
||||
return {}
|
||||
|
||||
rss_items = xml_str.split(item_str)
|
||||
|
@ -1226,7 +1226,7 @@ def _xml_str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
moderated: bool, mirrored: bool,
|
||||
max_posts_per_source: int,
|
||||
max_feed_item_size_kb: int,
|
||||
max_categories_feedItem_size_kb: int,
|
||||
max_categories_feed_item_size_kb: int,
|
||||
session, debug: bool,
|
||||
preferred_podcast_formats: []) -> {}:
|
||||
"""Converts an xml string to a dictionary
|
||||
|
@ -1242,14 +1242,14 @@ def _xml_str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
return _xml2str_to_dict(base_dir, domain,
|
||||
xml_str, moderated, mirrored,
|
||||
max_posts_per_source, max_feed_item_size_kb,
|
||||
max_categories_feedItem_size_kb,
|
||||
max_categories_feed_item_size_kb,
|
||||
session, debug,
|
||||
preferred_podcast_formats)
|
||||
if '<?xml version="1.0"' in xml_str:
|
||||
return _xml1str_to_dict(base_dir, domain,
|
||||
xml_str, moderated, mirrored,
|
||||
max_posts_per_source, max_feed_item_size_kb,
|
||||
max_categories_feedItem_size_kb,
|
||||
max_categories_feed_item_size_kb,
|
||||
session, debug, preferred_podcast_formats)
|
||||
if 'xmlns="http://www.w3.org/2005/Atom"' in xml_str:
|
||||
return _atom_feed_to_dict(base_dir, domain,
|
||||
|
@ -1281,7 +1281,7 @@ def get_rss(base_dir: str, domain: str, session, url: str,
|
|||
moderated: bool, mirrored: bool,
|
||||
max_posts_per_source: int, max_feed_size_kb: int,
|
||||
max_feed_item_size_kb: int,
|
||||
max_categories_feedItem_size_kb: int, debug: bool,
|
||||
max_categories_feed_item_size_kb: int, debug: bool,
|
||||
preferred_podcast_formats: [],
|
||||
timeout_sec: int) -> {}:
|
||||
"""Returns an RSS url as a dict
|
||||
|
@ -1318,7 +1318,7 @@ def get_rss(base_dir: str, domain: str, session, url: str,
|
|||
moderated, mirrored,
|
||||
max_posts_per_source,
|
||||
max_feed_item_size_kb,
|
||||
max_categories_feedItem_size_kb,
|
||||
max_categories_feed_item_size_kb,
|
||||
session, debug,
|
||||
preferred_podcast_formats)
|
||||
print('WARN: feed is too large, ' +
|
||||
|
@ -1445,7 +1445,7 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
|
|||
if os.path.isfile(moderated_filename):
|
||||
moderated = True
|
||||
|
||||
with open(index_filename, 'r') as index_file:
|
||||
with open(index_filename, 'r', encoding='utf-8') as index_file:
|
||||
post_filename = 'start'
|
||||
ctr = 0
|
||||
while post_filename:
|
||||
|
@ -1563,7 +1563,7 @@ def get_dict_from_newswire(session, base_dir: str, domain: str,
|
|||
max_posts_per_source: int, max_feed_size_kb: int,
|
||||
max_tags: int, max_feed_item_size_kb: int,
|
||||
max_newswire_posts: int,
|
||||
max_categories_feedItem_size_kb: int,
|
||||
max_categories_feed_item_size_kb: int,
|
||||
system_language: str, debug: bool,
|
||||
preferred_podcast_formats: [],
|
||||
timeout_sec: int) -> {}:
|
||||
|
@ -1577,7 +1577,7 @@ def get_dict_from_newswire(session, base_dir: str, domain: str,
|
|||
|
||||
# add rss feeds
|
||||
rss_feed = []
|
||||
with open(subscriptions_filename, 'r') as fp_sub:
|
||||
with open(subscriptions_filename, 'r', encoding='utf-8') as fp_sub:
|
||||
rss_feed = fp_sub.readlines()
|
||||
result = {}
|
||||
for url in rss_feed:
|
||||
|
@ -1607,7 +1607,7 @@ def get_dict_from_newswire(session, base_dir: str, domain: str,
|
|||
moderated, mirrored,
|
||||
max_posts_per_source, max_feed_size_kb,
|
||||
max_feed_item_size_kb,
|
||||
max_categories_feedItem_size_kb, debug,
|
||||
max_categories_feed_item_size_kb, debug,
|
||||
preferred_podcast_formats,
|
||||
timeout_sec)
|
||||
if items_list:
|
||||
|
|
Loading…
Reference in New Issue