Link to DOI scientific references

2023-01-04 13:33:05 +00:00 · 2023-01-04 13:33:05 +00:00 · f25cf2121a
parent bcd0b5f30b
commit f25cf2121a
3 changed files with 55 additions and 2 deletions
--- a/content.py
+++ b/content.py
@ -585,6 +585,25 @@ def _shorten_linked_urls(content: str) -> str:
    return content


+def _contains_doi_reference(wrd: str, replace_dict: {}) -> bool:
+    """Handle DOI scientific references
+    """
+    if not wrd.startswith('doi:') and \
+       not wrd.startswith('DOI:'):
+        return False
+
+    doi_ref_str = wrd.split(':', 1)[1]
+    doi_site = 'https://sci-hub.ru'
+    markup = '<a href="' + doi_site + '/' + \
+        doi_ref_str + '" tabindex="10" ' + \
+        'rel="nofollow noopener noreferrer" ' + \
+        'target="_blank">' + \
+        '<span class="ellipsis">doi:' + doi_ref_str + \
+        '</span></a>'
+    replace_dict[wrd] = markup
+    return True
+
+
 def _contains_arxiv_reference(wrd: str, replace_dict: {}) -> bool:
    """Handle arxiv scientific references
    """
@ -620,6 +639,19 @@ def _contains_arxiv_reference(wrd: str, replace_dict: {}) -> bool:
    return True


+def _contains_academic_references(content: str) -> bool:
+    """Does the given content contain academic references
+    """
+    prefixes = (
+        'arXiv:', 'arx:', 'arxiv:',
+        'doi:', 'DOI:'
+    )
+    for reference in prefixes:
+        if reference in content:
+            return True
+    return False
+
+
 def add_web_links(content: str) -> str:
    """Adds markup for web links
    """
@ -639,7 +671,7 @@ def add_web_links(content: str) -> str:

    # if there are no prefixes then just keep the content we have
    if not prefix_found:
-        if 'arXiv:' in content or 'arx:' in content or 'arxiv:' in content:
+        if _contains_academic_references(content):
            prefix_found = True
        else:
            return content
@ -652,6 +684,8 @@ def add_web_links(content: str) -> str:
            continue
        if _contains_arxiv_reference(wrd, replace_dict):
            continue
+        if _contains_doi_reference(wrd, replace_dict):
+            continue
        # does the word begin with a prefix?
        prefix_found = False
        for prefix in prefixes:
--- a/tests.py
+++ b/tests.py
@ -3623,6 +3623,11 @@ def _test_web_links():
        print(linked_text)
    assert expected_text in linked_text

+    # NOTE: it is difficult to find academic studies of the fediverse which
+    # do not in some way violate consent or embody an arrogant status
+    # quo attitude. Did all those scraped accounts agree to be part of
+    # an academic study? Did they even consider consent as an issue?
+    # It seems doubtful. We are just like algae under a microscope to them.
    example_text = \
        'This post has an arxiv link arXiv:2203.15752 some other text'
    linked_text = add_web_links(example_text)
@ -3635,6 +3640,20 @@ def _test_web_links():
        print(linked_text)
    assert expected_text in linked_text

+    example_text = \
+        'This post has an doi link ' + \
+        'doi:10.1109/INFCOMW.2019.8845221 some other text'
+    linked_text = add_web_links(example_text)
+    expected_text = \
+        '<a href="https://sci-hub.ru/10.1109/INFCOMW.2019.8845221" ' + \
+        'tabindex="10" rel="nofollow noopener noreferrer"' + \
+        ' target="_blank"><span class="ellipsis">' + \
+        'doi:10.1109/INFCOMW.2019.8845221</span></a>'
+    if expected_text not in linked_text:
+        print(expected_text + '\n')
+        print(linked_text)
+    assert expected_text in linked_text
+
    example_text = \
        'This post has a very long web link\n\nhttp://' + \
        'cbwebewuvfuftdiudbqd33dddbbyuef23fyug3bfhcyu2fct2' + \
--- a/webapp_podcast.py
+++ b/webapp_podcast.py
@ -299,7 +299,7 @@ def html_podcast_episode(translate: {},
                         text_mode_banner: str, access_keys: {},
                         session, session_onion, session_i2p,
                         http_prefix: str, debug: bool) -> str:
-    """Returns html for a podcast episode, giebn an item from the newswire
+    """Returns html for a podcast episode, an item from the newswire
    """
    css_filename = base_dir + '/epicyon-podcast.css'
    if os.path.isfile(base_dir + '/podcast.css'):