From 85780153dc0ca51ccb3e299d36e564fde90b458f Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Tue, 5 Jul 2022 20:35:38 +0100 Subject: [PATCH] Wildcard in middle --- content.py | 75 +++++++++++++++++++++++++++++++++--------------------- tests.py | 5 +++- 2 files changed, 50 insertions(+), 30 deletions(-) diff --git a/content.py b/content.py index 1cc074111..06ec4ed06 100644 --- a/content.py +++ b/content.py @@ -1052,36 +1052,53 @@ def detect_dogwhistles(content: str, dogwhistles: {}) -> {}: } else: result[whistle]['count'] += 1 - else: - if whistle.lower().endswith('-x'): - whistle = whistle[:len(whistle)-2] - starting = True - elif (whistle.endswith('*') or - whistle.endswith('~') or - whistle.endswith('-')): - whistle = whistle[:len(whistle)-1] - starting = True + continue - if starting: - for wrd in words: - if wrd.startswith(whistle): - if not result.get(whistle): - result[whistle] = { - "count": 1, - "category": category - } - else: - result[whistle]['count'] += 1 - else: - for wrd in words: - if wrd == whistle: - if not result.get(whistle): - result[whistle] = { - "count": 1, - "category": category - } - else: - result[whistle]['count'] += 1 + if whistle.lower().endswith('-x'): + whistle = whistle[:len(whistle)-2] + starting = True + elif (whistle.endswith('*') or + whistle.endswith('~') or + whistle.endswith('-')): + whistle = whistle[:len(whistle)-1] + starting = True + + if starting: + for wrd in words: + if wrd.startswith(whistle): + if not result.get(whistle): + result[whistle] = { + "count": 1, + "category": category + } + else: + result[whistle]['count'] += 1 + continue + + if '*' in whistle: + whistle_start = whistle.split('*', 1)[0] + whistle_end = whistle.split('*', 1)[1] + for wrd in words: + if wrd.startswith(whistle_start) and \ + wrd.endswith(whistle_end): + if not result.get(whistle): + result[whistle] = { + "count": 1, + "category": category + } + else: + result[whistle]['count'] += 1 + continue + + for wrd in words: + if wrd == whistle: + if not result.get(whistle): + result[whistle] = { + "count": 1, + "category": category + } + else: + result[whistle]['count'] += 1 return result diff --git a/tests.py b/tests.py index a84b4aee8..990299132 100644 --- a/tests.py +++ b/tests.py @@ -7332,7 +7332,8 @@ def _test_dogwhistles(): dogwhistles = { "X-hamstered": "hamsterism", "gerbil": "rodent", - "*snake": "slither" + "*snake": "slither", + "start*end": "something" } content = 'This text does not contain any dogwhistles' assert not detect_dogwhistles(content, dogwhistles) @@ -7340,6 +7341,8 @@ def _test_dogwhistles(): assert detect_dogwhistles(content, dogwhistles) content = 'A rattlesnake.' assert detect_dogwhistles(content, dogwhistles) + content = 'A startthingend.' + assert detect_dogwhistles(content, dogwhistles) content = 'This content is unhamstered and yhamstered.' result = detect_dogwhistles(content, dogwhistles) assert result