Support arxiv scientific publication references

2023-01-04 11:53:15 +00:00 · 2023-01-04 11:53:15 +00:00 · 9a7b95eca4
parent 8716759208
commit 9a7b95eca4
2 changed files with 42 additions and 2 deletions
--- a/content.py
+++ b/content.py
@ -604,7 +604,10 @@ def add_web_links(content: str) -> str:

    # if there are no prefixes then just keep the content we have
    if not prefix_found:
-        return content
+        if 'arXiv:' in content or 'arx:' in content or 'arxiv:' in content:
+            prefix_found = True
+        else:
+            return content

    content = content.replace('\r', '')
    words = content.replace('\n', ' --linebreak-- ').split(' ')
@ -612,6 +615,31 @@ def add_web_links(content: str) -> str:
    for wrd in words:
        if ':' not in wrd:
            continue
+        # handle arxiv scientific references
+        if wrd.startswith('arXiv:') or \
+           wrd.startswith('arx:') or \
+           wrd.startswith('arxiv:'):
+            arxiv_ref_str = wrd.split(':', 1)[1].lower()
+            if '.' in arxiv_ref_str:
+                arxiv_ref = arxiv_ref_str.split('.')
+            elif ':' in arxiv_ref_str:
+                arxiv_ref = arxiv_ref_str.split(':')
+            else:
+                continue
+            if len(arxiv_ref) == 2:
+                arxiv_day = arxiv_ref[1]
+                if 'v' in arxiv_day:
+                    arxiv_day = arxiv_day.split('v')[0]
+                if arxiv_ref[0].isdigit() and arxiv_day.isdigit():
+                    ref_str = arxiv_ref[0] + '.' + arxiv_ref[1]
+                    markup = '<a href="https://arxiv.org/abs/' + \
+                        ref_str + '" tabindex="10" ' + \
+                        'rel="nofollow noopener noreferrer" ' + \
+                        'target="_blank">' + \
+                        '<span class="ellipsis">arXiv:' + ref_str + \
+                        '</span></a>'
+                    replace_dict[wrd] = markup
+            continue
        # does the word begin with a prefix?
        prefix_found = False
        for prefix in prefixes:
--- a/tests.py
+++ b/tests.py
@ -3617,7 +3617,19 @@ def _test_web_links():
        '<a href="https://somesite.net" tabindex="10" ' + \
        'rel="nofollow noopener noreferrer"' + \
        ' target="_blank"><span class="invisible">https://' + \
-        '</span><span class="ellipsis">somesite.net</span></a'
+        '</span><span class="ellipsis">somesite.net</span></a>'
+    if expected_text not in linked_text:
+        print(expected_text + '\n')
+        print(linked_text)
+    assert expected_text in linked_text
+
+    example_text = \
+        'This post has an arxiv link arXiv:2212.14672 some other text'
+    linked_text = add_web_links(example_text)
+    expected_text = \
+        '<a href="https://arxiv.org/abs/2212.14672" tabindex="10" ' + \
+        'rel="nofollow noopener noreferrer"' + \
+        ' target="_blank"><span class="ellipsis">arXiv:2212.14672</span></a>'
    if expected_text not in linked_text:
        print(expected_text + '\n')
        print(linked_text)