mirror of https://gitlab.com/bashrc2/epicyon
Include hashtags within cw lists
parent
781db96120
commit
f32333953c
39
cwlists.py
39
cwlists.py
|
@ -31,7 +31,9 @@ def load_cw_lists(base_dir: str, verbose: bool) -> {}:
|
||||||
continue
|
continue
|
||||||
if not list_json.get('name'):
|
if not list_json.get('name'):
|
||||||
continue
|
continue
|
||||||
if not list_json.get('words') and not list_json.get('domains'):
|
if not list_json.get('words') and \
|
||||||
|
not list_json.get('hashtags') and \
|
||||||
|
not list_json.get('domains'):
|
||||||
continue
|
continue
|
||||||
name = list_json['name']
|
name = list_json['name']
|
||||||
if verbose:
|
if verbose:
|
||||||
|
@ -59,6 +61,12 @@ def add_cw_from_lists(post_json_object: {}, cw_lists: {}, translate: {},
|
||||||
languages_understood, "content")
|
languages_understood, "content")
|
||||||
if not content:
|
if not content:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
post_tags = []
|
||||||
|
if post_json_object['object'].get('tag'):
|
||||||
|
if isinstance(post_json_object['object']['tag'], list):
|
||||||
|
post_tags = post_json_object['object']['tag']
|
||||||
|
|
||||||
for name, item in cw_lists.items():
|
for name, item in cw_lists.items():
|
||||||
if name not in lists_enabled:
|
if name not in lists_enabled:
|
||||||
continue
|
continue
|
||||||
|
@ -76,6 +84,35 @@ def add_cw_from_lists(post_json_object: {}, cw_lists: {}, translate: {},
|
||||||
|
|
||||||
matched = False
|
matched = False
|
||||||
|
|
||||||
|
# match hashtags within the post
|
||||||
|
if post_tags and item.get('hashtags'):
|
||||||
|
for tag in item['hashtags']:
|
||||||
|
tag = tag.strip()
|
||||||
|
if not tag:
|
||||||
|
continue
|
||||||
|
if not tag.startswith('#'):
|
||||||
|
tag = '#' + tag
|
||||||
|
tag = tag.lower()
|
||||||
|
for tag_dict in post_tags:
|
||||||
|
if not isinstance(tag_dict, dict):
|
||||||
|
continue
|
||||||
|
if not tag_dict.get('Hashtag'):
|
||||||
|
continue
|
||||||
|
if not tag_dict.get('name'):
|
||||||
|
continue
|
||||||
|
if tag_dict['name'].lower() == tag:
|
||||||
|
if cw_text:
|
||||||
|
cw_text = warning + ' / ' + cw_text
|
||||||
|
else:
|
||||||
|
cw_text = warning
|
||||||
|
matched = True
|
||||||
|
break
|
||||||
|
if matched:
|
||||||
|
break
|
||||||
|
|
||||||
|
if matched:
|
||||||
|
continue
|
||||||
|
|
||||||
# match domains within the content
|
# match domains within the content
|
||||||
if item.get('domains'):
|
if item.get('domains'):
|
||||||
for domain in item['domains']:
|
for domain in item['domains']:
|
||||||
|
|
|
@ -3,119 +3,122 @@
|
||||||
"warning": "Satire",
|
"warning": "Satire",
|
||||||
"description": "Intended to be humorous. Not real news stories.",
|
"description": "Intended to be humorous. Not real news stories.",
|
||||||
"words": [],
|
"words": [],
|
||||||
|
"hashtags": [
|
||||||
|
"satire"
|
||||||
|
],
|
||||||
"domains": [
|
"domains": [
|
||||||
"alhudood.net",
|
"alhudood.net",
|
||||||
"adobochronicles.com",
|
"adobochronicles.com",
|
||||||
"alternativelyfacts.com",
|
"alternativelyfacts.com",
|
||||||
"alternative-science.com",
|
"alternative-science.com",
|
||||||
"americaslastlineofdefense.com",
|
"americaslastlineofdefense.com",
|
||||||
"babylonbee.com",
|
"babylonbee.com",
|
||||||
"bluenewsnetwork.com",
|
"bluenewsnetwork.com",
|
||||||
"borowitzreport.com",
|
"borowitzreport.com",
|
||||||
"breakingburgh.com",
|
"breakingburgh.com",
|
||||||
"bullshitnews.org",
|
"bullshitnews.org",
|
||||||
"bustatroll.org",
|
"bustatroll.org",
|
||||||
"burrardstreetjournal.com",
|
"burrardstreetjournal.com",
|
||||||
"clickhole.com",
|
"clickhole.com",
|
||||||
"confederacyofdrones.com",
|
"confederacyofdrones.com",
|
||||||
"conservativetears.com",
|
"conservativetears.com",
|
||||||
"cracked.com",
|
"cracked.com",
|
||||||
"dailybonnet.com",
|
"dailybonnet.com",
|
||||||
"dailysquib.co.uk",
|
"dailysquib.co.uk",
|
||||||
"dailyworldupdate.us",
|
"dailyworldupdate.us",
|
||||||
"dailysnark.com",
|
"dailysnark.com",
|
||||||
"der-postillon.com",
|
"der-postillon.com",
|
||||||
"derfmagazine.com",
|
"derfmagazine.com",
|
||||||
"elchiguirebipolar.net",
|
"elchiguirebipolar.net",
|
||||||
"elmundotoday.com",
|
"elmundotoday.com",
|
||||||
"speld.nl",
|
"speld.nl",
|
||||||
"duffelblog.com",
|
"duffelblog.com",
|
||||||
"duhprogressive.com",
|
"duhprogressive.com",
|
||||||
"elkoshary.com",
|
"elkoshary.com",
|
||||||
"empirenews.net",
|
"empirenews.net",
|
||||||
"empiresports.co",
|
"empiresports.co",
|
||||||
"eveningharold.com",
|
"eveningharold.com",
|
||||||
"fark.com",
|
"fark.com",
|
||||||
"fmobserver.com",
|
"fmobserver.com",
|
||||||
"fognews.ru",
|
"fognews.ru",
|
||||||
"frankmag.ca",
|
"frankmag.ca",
|
||||||
"framleyexaminer.com",
|
"framleyexaminer.com",
|
||||||
"freedomcrossroads.com",
|
"freedomcrossroads.com",
|
||||||
"freedomfictions.com",
|
"freedomfictions.com",
|
||||||
"genesiustimes.com",
|
"genesiustimes.com",
|
||||||
"gishgallop.com",
|
"gishgallop.com",
|
||||||
"gomerblog.com",
|
"gomerblog.com",
|
||||||
"harddawn.com",
|
"harddawn.com",
|
||||||
"huzlers.com",
|
"huzlers.com",
|
||||||
"www.imao.us",
|
"www.imao.us",
|
||||||
"infobattle.org",
|
"infobattle.org",
|
||||||
"islamicanews.com",
|
"islamicanews.com",
|
||||||
"chronicle.su",
|
"chronicle.su",
|
||||||
"landoverbaptist.org",
|
"landoverbaptist.org",
|
||||||
"larknews.com",
|
"larknews.com",
|
||||||
"legorafi.fr",
|
"legorafi.fr",
|
||||||
"lercio.it",
|
"lercio.it",
|
||||||
"madhousemagazine.com",
|
"madhousemagazine.com",
|
||||||
"mcsweeneys.net",
|
"mcsweeneys.net",
|
||||||
"moronmajority.com",
|
"moronmajority.com",
|
||||||
"nationalreport.net",
|
"nationalreport.net",
|
||||||
"newsbiscuit.com",
|
"newsbiscuit.com",
|
||||||
"newsmutiny.com",
|
"newsmutiny.com",
|
||||||
"newsthump.com",
|
"newsthump.com",
|
||||||
"npcdaily.com",
|
"npcdaily.com",
|
||||||
"prettycoolsite.com",
|
"prettycoolsite.com",
|
||||||
"private-eye.co.uk",
|
"private-eye.co.uk",
|
||||||
"realnewsrightnow.com",
|
"realnewsrightnow.com",
|
||||||
"realrawnews.com",
|
"realrawnews.com",
|
||||||
"reductress.com",
|
"reductress.com",
|
||||||
"sanctumnews.com",
|
"sanctumnews.com",
|
||||||
"satirev.org",
|
"satirev.org",
|
||||||
"sportspickle.com",
|
"sportspickle.com",
|
||||||
"stiltonsplace.blogspot.com",
|
"stiltonsplace.blogspot.com",
|
||||||
"stubhillnews.com",
|
"stubhillnews.com",
|
||||||
"stuppid.com",
|
"stuppid.com",
|
||||||
"suffolkgazette.com",
|
"suffolkgazette.com",
|
||||||
"sundaysportonline.co.uk",
|
"sundaysportonline.co.uk",
|
||||||
"thatsprettygoodscience.com",
|
"thatsprettygoodscience.com",
|
||||||
"atlbanana.com",
|
"atlbanana.com",
|
||||||
"thebeaverton.com",
|
"thebeaverton.com",
|
||||||
"betootaadvocate.com",
|
"betootaadvocate.com",
|
||||||
"chaser.com.au",
|
"chaser.com.au",
|
||||||
"dailydiscord.com",
|
"dailydiscord.com",
|
||||||
"thedailymash.co.uk",
|
"thedailymash.co.uk",
|
||||||
"halfwaypost.com",
|
"halfwaypost.com",
|
||||||
"thehardtimes.net",
|
"thehardtimes.net",
|
||||||
"humortimes.com",
|
"humortimes.com",
|
||||||
"satirewire.com",
|
"satirewire.com",
|
||||||
"scrappleface.com",
|
"scrappleface.com",
|
||||||
"thelemonpress.co.uk",
|
"thelemonpress.co.uk",
|
||||||
"themideastbeast.com",
|
"themideastbeast.com",
|
||||||
"theneedling.com",
|
"theneedling.com",
|
||||||
"theonion.com",
|
"theonion.com",
|
||||||
"theoxymoron.co.uk",
|
"theoxymoron.co.uk",
|
||||||
"thepeoplescube.com",
|
"thepeoplescube.com",
|
||||||
"thepoke.co.uk",
|
"thepoke.co.uk",
|
||||||
"therightists.com",
|
"therightists.com",
|
||||||
"rochdaleherald.co.uk",
|
"rochdaleherald.co.uk",
|
||||||
"politicalgarbagechute.com",
|
"politicalgarbagechute.com",
|
||||||
"the-postillon.com",
|
"the-postillon.com",
|
||||||
"thecivilian.co.nz",
|
"thecivilian.co.nz",
|
||||||
"thedailyer.com",
|
"thedailyer.com",
|
||||||
"thedailywtf.com",
|
"thedailywtf.com",
|
||||||
"theredshtick.com",
|
"theredshtick.com",
|
||||||
"thesciencepost.com",
|
"thesciencepost.com",
|
||||||
"theshovel.com.au",
|
"theshovel.com.au",
|
||||||
"thespoof.com",
|
"thespoof.com",
|
||||||
"thestonkmarket.com",
|
"thestonkmarket.com",
|
||||||
"thereisnews.com",
|
"thereisnews.com",
|
||||||
"tittletattle365.com",
|
"tittletattle365.com",
|
||||||
"truenorthtimes.ca",
|
"truenorthtimes.ca",
|
||||||
"truthbrary.org",
|
"truthbrary.org",
|
||||||
"walkingeaglenews.com",
|
"walkingeaglenews.com",
|
||||||
"waterfordwhispersnews.com",
|
"waterfordwhispersnews.com",
|
||||||
"weeklyworldnews.com",
|
"weeklyworldnews.com",
|
||||||
"wokennews.com",
|
"wokennews.com",
|
||||||
"worldnewsdailyreport.com",
|
"worldnewsdailyreport.com",
|
||||||
"zaytung.com"
|
"zaytung.com"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue