From bcd0b5f30b0ab17b976cdf11a060f0a6bd71a26e Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Wed, 4 Jan 2023 12:45:36 +0000 Subject: [PATCH] Tidying --- content.py | 60 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/content.py b/content.py index 042827fb1..31d86978e 100644 --- a/content.py +++ b/content.py @@ -585,6 +585,41 @@ def _shorten_linked_urls(content: str) -> str: return content +def _contains_arxiv_reference(wrd: str, replace_dict: {}) -> bool: + """Handle arxiv scientific references + """ + if not wrd.startswith('arXiv:') and \ + not wrd.startswith('arx:') and \ + not wrd.startswith('arxiv:'): + return False + + arxiv_ref_str = wrd.split(':', 1)[1].lower() + if '.' in arxiv_ref_str: + arxiv_ref = arxiv_ref_str.split('.') + elif ':' in arxiv_ref_str: + arxiv_ref = arxiv_ref_str.split(':') + else: + return False + if len(arxiv_ref) != 2: + return False + if not arxiv_ref[0].isdigit(): + return False + arxiv_day = arxiv_ref[1] + if 'v' in arxiv_day: + arxiv_day = arxiv_day.split('v')[0] + if not arxiv_day.isdigit(): + return False + ref_str = arxiv_ref[0] + '.' + arxiv_ref[1] + markup = '' + \ + 'arXiv:' + ref_str + \ + '' + replace_dict[wrd] = markup + return True + + def add_web_links(content: str) -> str: """Adds markup for web links """ @@ -615,30 +650,7 @@ def add_web_links(content: str) -> str: for wrd in words: if ':' not in wrd: continue - # handle arxiv scientific references - if wrd.startswith('arXiv:') or \ - wrd.startswith('arx:') or \ - wrd.startswith('arxiv:'): - arxiv_ref_str = wrd.split(':', 1)[1].lower() - if '.' in arxiv_ref_str: - arxiv_ref = arxiv_ref_str.split('.') - elif ':' in arxiv_ref_str: - arxiv_ref = arxiv_ref_str.split(':') - else: - continue - if len(arxiv_ref) == 2: - arxiv_day = arxiv_ref[1] - if 'v' in arxiv_day: - arxiv_day = arxiv_day.split('v')[0] - if arxiv_ref[0].isdigit() and arxiv_day.isdigit(): - ref_str = arxiv_ref[0] + '.' + arxiv_ref[1] - markup = '' + \ - 'arXiv:' + ref_str + \ - '' - replace_dict[wrd] = markup + if _contains_arxiv_reference(wrd, replace_dict): continue # does the word begin with a prefix? prefix_found = False