Include hashtags within cw lists

main
Bob Mottram 2024-02-23 10:32:46 +00:00
parent 781db96120
commit f32333953c
2 changed files with 154 additions and 114 deletions

View File

@ -31,7 +31,9 @@ def load_cw_lists(base_dir: str, verbose: bool) -> {}:
continue
if not list_json.get('name'):
continue
if not list_json.get('words') and not list_json.get('domains'):
if not list_json.get('words') and \
not list_json.get('hashtags') and \
not list_json.get('domains'):
continue
name = list_json['name']
if verbose:
@ -59,6 +61,12 @@ def add_cw_from_lists(post_json_object: {}, cw_lists: {}, translate: {},
languages_understood, "content")
if not content:
return
post_tags = []
if post_json_object['object'].get('tag'):
if isinstance(post_json_object['object']['tag'], list):
post_tags = post_json_object['object']['tag']
for name, item in cw_lists.items():
if name not in lists_enabled:
continue
@ -76,6 +84,35 @@ def add_cw_from_lists(post_json_object: {}, cw_lists: {}, translate: {},
matched = False
# match hashtags within the post
if post_tags and item.get('hashtags'):
for tag in item['hashtags']:
tag = tag.strip()
if not tag:
continue
if not tag.startswith('#'):
tag = '#' + tag
tag = tag.lower()
for tag_dict in post_tags:
if not isinstance(tag_dict, dict):
continue
if not tag_dict.get('Hashtag'):
continue
if not tag_dict.get('name'):
continue
if tag_dict['name'].lower() == tag:
if cw_text:
cw_text = warning + ' / ' + cw_text
else:
cw_text = warning
matched = True
break
if matched:
break
if matched:
continue
# match domains within the content
if item.get('domains'):
for domain in item['domains']:

View File

@ -3,119 +3,122 @@
"warning": "Satire",
"description": "Intended to be humorous. Not real news stories.",
"words": [],
"hashtags": [
"satire"
],
"domains": [
"alhudood.net",
"adobochronicles.com",
"alternativelyfacts.com",
"alternative-science.com",
"americaslastlineofdefense.com",
"babylonbee.com",
"bluenewsnetwork.com",
"borowitzreport.com",
"breakingburgh.com",
"bullshitnews.org",
"bustatroll.org",
"burrardstreetjournal.com",
"clickhole.com",
"confederacyofdrones.com",
"conservativetears.com",
"cracked.com",
"dailybonnet.com",
"dailysquib.co.uk",
"dailyworldupdate.us",
"dailysnark.com",
"der-postillon.com",
"derfmagazine.com",
"elchiguirebipolar.net",
"elmundotoday.com",
"speld.nl",
"duffelblog.com",
"duhprogressive.com",
"elkoshary.com",
"empirenews.net",
"empiresports.co",
"eveningharold.com",
"fark.com",
"fmobserver.com",
"fognews.ru",
"frankmag.ca",
"framleyexaminer.com",
"freedomcrossroads.com",
"freedomfictions.com",
"genesiustimes.com",
"gishgallop.com",
"gomerblog.com",
"harddawn.com",
"huzlers.com",
"www.imao.us",
"infobattle.org",
"islamicanews.com",
"chronicle.su",
"landoverbaptist.org",
"larknews.com",
"legorafi.fr",
"lercio.it",
"madhousemagazine.com",
"mcsweeneys.net",
"moronmajority.com",
"nationalreport.net",
"newsbiscuit.com",
"newsmutiny.com",
"newsthump.com",
"npcdaily.com",
"prettycoolsite.com",
"private-eye.co.uk",
"realnewsrightnow.com",
"realrawnews.com",
"reductress.com",
"sanctumnews.com",
"satirev.org",
"sportspickle.com",
"stiltonsplace.blogspot.com",
"stubhillnews.com",
"stuppid.com",
"suffolkgazette.com",
"sundaysportonline.co.uk",
"thatsprettygoodscience.com",
"atlbanana.com",
"thebeaverton.com",
"betootaadvocate.com",
"chaser.com.au",
"dailydiscord.com",
"thedailymash.co.uk",
"halfwaypost.com",
"thehardtimes.net",
"humortimes.com",
"satirewire.com",
"scrappleface.com",
"thelemonpress.co.uk",
"themideastbeast.com",
"theneedling.com",
"theonion.com",
"theoxymoron.co.uk",
"thepeoplescube.com",
"thepoke.co.uk",
"therightists.com",
"rochdaleherald.co.uk",
"politicalgarbagechute.com",
"the-postillon.com",
"thecivilian.co.nz",
"thedailyer.com",
"thedailywtf.com",
"theredshtick.com",
"thesciencepost.com",
"theshovel.com.au",
"thespoof.com",
"thestonkmarket.com",
"thereisnews.com",
"tittletattle365.com",
"truenorthtimes.ca",
"truthbrary.org",
"walkingeaglenews.com",
"waterfordwhispersnews.com",
"weeklyworldnews.com",
"wokennews.com",
"worldnewsdailyreport.com",
"zaytung.com"
"alhudood.net",
"adobochronicles.com",
"alternativelyfacts.com",
"alternative-science.com",
"americaslastlineofdefense.com",
"babylonbee.com",
"bluenewsnetwork.com",
"borowitzreport.com",
"breakingburgh.com",
"bullshitnews.org",
"bustatroll.org",
"burrardstreetjournal.com",
"clickhole.com",
"confederacyofdrones.com",
"conservativetears.com",
"cracked.com",
"dailybonnet.com",
"dailysquib.co.uk",
"dailyworldupdate.us",
"dailysnark.com",
"der-postillon.com",
"derfmagazine.com",
"elchiguirebipolar.net",
"elmundotoday.com",
"speld.nl",
"duffelblog.com",
"duhprogressive.com",
"elkoshary.com",
"empirenews.net",
"empiresports.co",
"eveningharold.com",
"fark.com",
"fmobserver.com",
"fognews.ru",
"frankmag.ca",
"framleyexaminer.com",
"freedomcrossroads.com",
"freedomfictions.com",
"genesiustimes.com",
"gishgallop.com",
"gomerblog.com",
"harddawn.com",
"huzlers.com",
"www.imao.us",
"infobattle.org",
"islamicanews.com",
"chronicle.su",
"landoverbaptist.org",
"larknews.com",
"legorafi.fr",
"lercio.it",
"madhousemagazine.com",
"mcsweeneys.net",
"moronmajority.com",
"nationalreport.net",
"newsbiscuit.com",
"newsmutiny.com",
"newsthump.com",
"npcdaily.com",
"prettycoolsite.com",
"private-eye.co.uk",
"realnewsrightnow.com",
"realrawnews.com",
"reductress.com",
"sanctumnews.com",
"satirev.org",
"sportspickle.com",
"stiltonsplace.blogspot.com",
"stubhillnews.com",
"stuppid.com",
"suffolkgazette.com",
"sundaysportonline.co.uk",
"thatsprettygoodscience.com",
"atlbanana.com",
"thebeaverton.com",
"betootaadvocate.com",
"chaser.com.au",
"dailydiscord.com",
"thedailymash.co.uk",
"halfwaypost.com",
"thehardtimes.net",
"humortimes.com",
"satirewire.com",
"scrappleface.com",
"thelemonpress.co.uk",
"themideastbeast.com",
"theneedling.com",
"theonion.com",
"theoxymoron.co.uk",
"thepeoplescube.com",
"thepoke.co.uk",
"therightists.com",
"rochdaleherald.co.uk",
"politicalgarbagechute.com",
"the-postillon.com",
"thecivilian.co.nz",
"thedailyer.com",
"thedailywtf.com",
"theredshtick.com",
"thesciencepost.com",
"theshovel.com.au",
"thespoof.com",
"thestonkmarket.com",
"thereisnews.com",
"tittletattle365.com",
"truenorthtimes.ca",
"truthbrary.org",
"walkingeaglenews.com",
"waterfordwhispersnews.com",
"weeklyworldnews.com",
"wokennews.com",
"worldnewsdailyreport.com",
"zaytung.com"
]
}