diff --git a/content.py b/content.py
index a1320d4b7..042827fb1 100644
--- a/content.py
+++ b/content.py
@@ -604,7 +604,10 @@ def add_web_links(content: str) -> str:
# if there are no prefixes then just keep the content we have
if not prefix_found:
- return content
+ if 'arXiv:' in content or 'arx:' in content or 'arxiv:' in content:
+ prefix_found = True
+ else:
+ return content
content = content.replace('\r', '')
words = content.replace('\n', ' --linebreak-- ').split(' ')
@@ -612,6 +615,31 @@ def add_web_links(content: str) -> str:
for wrd in words:
if ':' not in wrd:
continue
+ # handle arxiv scientific references
+ if wrd.startswith('arXiv:') or \
+ wrd.startswith('arx:') or \
+ wrd.startswith('arxiv:'):
+ arxiv_ref_str = wrd.split(':', 1)[1].lower()
+ if '.' in arxiv_ref_str:
+ arxiv_ref = arxiv_ref_str.split('.')
+ elif ':' in arxiv_ref_str:
+ arxiv_ref = arxiv_ref_str.split(':')
+ else:
+ continue
+ if len(arxiv_ref) == 2:
+ arxiv_day = arxiv_ref[1]
+ if 'v' in arxiv_day:
+ arxiv_day = arxiv_day.split('v')[0]
+ if arxiv_ref[0].isdigit() and arxiv_day.isdigit():
+ ref_str = arxiv_ref[0] + '.' + arxiv_ref[1]
+ markup = '' + \
+ 'arXiv:' + ref_str + \
+ ''
+ replace_dict[wrd] = markup
+ continue
# does the word begin with a prefix?
prefix_found = False
for prefix in prefixes:
diff --git a/tests.py b/tests.py
index da05e6d37..a15bf87f2 100644
--- a/tests.py
+++ b/tests.py
@@ -3617,7 +3617,19 @@ def _test_web_links():
'https://' + \
- 'somesite.netsomesite.net'
+ if expected_text not in linked_text:
+ print(expected_text + '\n')
+ print(linked_text)
+ assert expected_text in linked_text
+
+ example_text = \
+ 'This post has an arxiv link arXiv:2212.14672 some other text'
+ linked_text = add_web_links(example_text)
+ expected_text = \
+ 'arXiv:2212.14672'
if expected_text not in linked_text:
print(expected_text + '\n')
print(linked_text)