mirror of https://gitlab.com/bashrc2/epicyon
main
commit
7f5e591f15
96
content.py
96
content.py
|
@ -585,6 +585,73 @@ def _shorten_linked_urls(content: str) -> str:
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def _contains_doi_reference(wrd: str, replace_dict: {}) -> bool:
|
||||||
|
"""Handle DOI scientific references
|
||||||
|
"""
|
||||||
|
if not wrd.startswith('doi:') and \
|
||||||
|
not wrd.startswith('DOI:'):
|
||||||
|
return False
|
||||||
|
|
||||||
|
doi_ref_str = wrd.split(':', 1)[1]
|
||||||
|
doi_site = 'https://sci-hub.ru'
|
||||||
|
markup = '<a href="' + doi_site + '/' + \
|
||||||
|
doi_ref_str + '" tabindex="10" ' + \
|
||||||
|
'rel="nofollow noopener noreferrer" ' + \
|
||||||
|
'target="_blank">' + \
|
||||||
|
'<span class="ellipsis">doi:' + doi_ref_str + \
|
||||||
|
'</span></a>'
|
||||||
|
replace_dict[wrd] = markup
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _contains_arxiv_reference(wrd: str, replace_dict: {}) -> bool:
|
||||||
|
"""Handle arxiv scientific references
|
||||||
|
"""
|
||||||
|
if not wrd.startswith('arXiv:') and \
|
||||||
|
not wrd.startswith('arx:') and \
|
||||||
|
not wrd.startswith('arxiv:'):
|
||||||
|
return False
|
||||||
|
|
||||||
|
arxiv_ref_str = wrd.split(':', 1)[1].lower()
|
||||||
|
if '.' in arxiv_ref_str:
|
||||||
|
arxiv_ref = arxiv_ref_str.split('.')
|
||||||
|
elif ':' in arxiv_ref_str:
|
||||||
|
arxiv_ref = arxiv_ref_str.split(':')
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
if len(arxiv_ref) != 2:
|
||||||
|
return False
|
||||||
|
if not arxiv_ref[0].isdigit():
|
||||||
|
return False
|
||||||
|
arxiv_day = arxiv_ref[1]
|
||||||
|
if 'v' in arxiv_day:
|
||||||
|
arxiv_day = arxiv_day.split('v')[0]
|
||||||
|
if not arxiv_day.isdigit():
|
||||||
|
return False
|
||||||
|
ref_str = arxiv_ref[0] + '.' + arxiv_ref[1]
|
||||||
|
markup = '<a href="https://arxiv.org/abs/' + \
|
||||||
|
ref_str + '" tabindex="10" ' + \
|
||||||
|
'rel="nofollow noopener noreferrer" ' + \
|
||||||
|
'target="_blank">' + \
|
||||||
|
'<span class="ellipsis">arXiv:' + ref_str + \
|
||||||
|
'</span></a>'
|
||||||
|
replace_dict[wrd] = markup
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _contains_academic_references(content: str) -> bool:
|
||||||
|
"""Does the given content contain academic references
|
||||||
|
"""
|
||||||
|
prefixes = (
|
||||||
|
'arXiv:', 'arx:', 'arxiv:',
|
||||||
|
'doi:', 'DOI:'
|
||||||
|
)
|
||||||
|
for reference in prefixes:
|
||||||
|
if reference in content:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def add_web_links(content: str) -> str:
|
def add_web_links(content: str) -> str:
|
||||||
"""Adds markup for web links
|
"""Adds markup for web links
|
||||||
"""
|
"""
|
||||||
|
@ -604,7 +671,7 @@ def add_web_links(content: str) -> str:
|
||||||
|
|
||||||
# if there are no prefixes then just keep the content we have
|
# if there are no prefixes then just keep the content we have
|
||||||
if not prefix_found:
|
if not prefix_found:
|
||||||
if 'arXiv:' in content or 'arx:' in content or 'arxiv:' in content:
|
if _contains_academic_references(content):
|
||||||
prefix_found = True
|
prefix_found = True
|
||||||
else:
|
else:
|
||||||
return content
|
return content
|
||||||
|
@ -615,30 +682,9 @@ def add_web_links(content: str) -> str:
|
||||||
for wrd in words:
|
for wrd in words:
|
||||||
if ':' not in wrd:
|
if ':' not in wrd:
|
||||||
continue
|
continue
|
||||||
# handle arxiv scientific references
|
if _contains_arxiv_reference(wrd, replace_dict):
|
||||||
if wrd.startswith('arXiv:') or \
|
continue
|
||||||
wrd.startswith('arx:') or \
|
if _contains_doi_reference(wrd, replace_dict):
|
||||||
wrd.startswith('arxiv:'):
|
|
||||||
arxiv_ref_str = wrd.split(':', 1)[1].lower()
|
|
||||||
if '.' in arxiv_ref_str:
|
|
||||||
arxiv_ref = arxiv_ref_str.split('.')
|
|
||||||
elif ':' in arxiv_ref_str:
|
|
||||||
arxiv_ref = arxiv_ref_str.split(':')
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
if len(arxiv_ref) == 2:
|
|
||||||
arxiv_day = arxiv_ref[1]
|
|
||||||
if 'v' in arxiv_day:
|
|
||||||
arxiv_day = arxiv_day.split('v')[0]
|
|
||||||
if arxiv_ref[0].isdigit() and arxiv_day.isdigit():
|
|
||||||
ref_str = arxiv_ref[0] + '.' + arxiv_ref[1]
|
|
||||||
markup = '<a href="https://arxiv.org/abs/' + \
|
|
||||||
ref_str + '" tabindex="10" ' + \
|
|
||||||
'rel="nofollow noopener noreferrer" ' + \
|
|
||||||
'target="_blank">' + \
|
|
||||||
'<span class="ellipsis">arXiv:' + ref_str + \
|
|
||||||
'</span></a>'
|
|
||||||
replace_dict[wrd] = markup
|
|
||||||
continue
|
continue
|
||||||
# does the word begin with a prefix?
|
# does the word begin with a prefix?
|
||||||
prefix_found = False
|
prefix_found = False
|
||||||
|
|
19
tests.py
19
tests.py
|
@ -3623,6 +3623,11 @@ def _test_web_links():
|
||||||
print(linked_text)
|
print(linked_text)
|
||||||
assert expected_text in linked_text
|
assert expected_text in linked_text
|
||||||
|
|
||||||
|
# NOTE: it is difficult to find academic studies of the fediverse which
|
||||||
|
# do not in some way violate consent or embody an arrogant status
|
||||||
|
# quo attitude. Did all those scraped accounts agree to be part of
|
||||||
|
# an academic study? Did they even consider consent as an issue?
|
||||||
|
# It seems doubtful. We are just like algae under a microscope to them.
|
||||||
example_text = \
|
example_text = \
|
||||||
'This post has an arxiv link arXiv:2203.15752 some other text'
|
'This post has an arxiv link arXiv:2203.15752 some other text'
|
||||||
linked_text = add_web_links(example_text)
|
linked_text = add_web_links(example_text)
|
||||||
|
@ -3635,6 +3640,20 @@ def _test_web_links():
|
||||||
print(linked_text)
|
print(linked_text)
|
||||||
assert expected_text in linked_text
|
assert expected_text in linked_text
|
||||||
|
|
||||||
|
example_text = \
|
||||||
|
'This post has an doi link ' + \
|
||||||
|
'doi:10.1109/INFCOMW.2019.8845221 some other text'
|
||||||
|
linked_text = add_web_links(example_text)
|
||||||
|
expected_text = \
|
||||||
|
'<a href="https://sci-hub.ru/10.1109/INFCOMW.2019.8845221" ' + \
|
||||||
|
'tabindex="10" rel="nofollow noopener noreferrer"' + \
|
||||||
|
' target="_blank"><span class="ellipsis">' + \
|
||||||
|
'doi:10.1109/INFCOMW.2019.8845221</span></a>'
|
||||||
|
if expected_text not in linked_text:
|
||||||
|
print(expected_text + '\n')
|
||||||
|
print(linked_text)
|
||||||
|
assert expected_text in linked_text
|
||||||
|
|
||||||
example_text = \
|
example_text = \
|
||||||
'This post has a very long web link\n\nhttp://' + \
|
'This post has a very long web link\n\nhttp://' + \
|
||||||
'cbwebewuvfuftdiudbqd33dddbbyuef23fyug3bfhcyu2fct2' + \
|
'cbwebewuvfuftdiudbqd33dddbbyuef23fyug3bfhcyu2fct2' + \
|
||||||
|
|
|
@ -299,7 +299,7 @@ def html_podcast_episode(translate: {},
|
||||||
text_mode_banner: str, access_keys: {},
|
text_mode_banner: str, access_keys: {},
|
||||||
session, session_onion, session_i2p,
|
session, session_onion, session_i2p,
|
||||||
http_prefix: str, debug: bool) -> str:
|
http_prefix: str, debug: bool) -> str:
|
||||||
"""Returns html for a podcast episode, giebn an item from the newswire
|
"""Returns html for a podcast episode, an item from the newswire
|
||||||
"""
|
"""
|
||||||
css_filename = base_dir + '/epicyon-podcast.css'
|
css_filename = base_dir + '/epicyon-podcast.css'
|
||||||
if os.path.isfile(base_dir + '/podcast.css'):
|
if os.path.isfile(base_dir + '/podcast.css'):
|
||||||
|
|
Loading…
Reference in New Issue