More robust domain detection

main
Bob Mottram 2025-02-28 12:48:43 +00:00
parent 0a790a8598
commit 252142b70f
1 changed files with 21 additions and 3 deletions

View File

@ -5,7 +5,9 @@
"words": [], "words": [],
"domains": [ "domains": [
"nytimes.com", "nytimes.com",
"ft.com", ".ft.com",
"/ft.com",
">ft.com",
"wsj.com", "wsj.com",
"theathletic.com", "theathletic.com",
"washingtonpost.com", "washingtonpost.com",
@ -15,13 +17,17 @@
"aftonbladet.se", "aftonbladet.se",
"telegraph.co.uk", "telegraph.co.uk",
"clarin.com", "clarin.com",
"bild.de", ".bild.de",
"/bild.de",
">bild.de",
"latimes.com", "latimes.com",
"theatlantic.com", "theatlantic.com",
"thetimes.co.uk", "thetimes.co.uk",
"lemonde.fr", "lemonde.fr",
"corriere.it", "corriere.it",
"dn.se", ">dn.se",
"/dn.se",
".dn.se",
"lanacion.com", "lanacion.com",
"wyborcza.pl", "wyborcza.pl",
"wired.com", "wired.com",
@ -29,21 +35,33 @@
"newscorp.com", "newscorp.com",
"medium.com", "medium.com",
".lee.net", ".lee.net",
">lee.net",
"/lee.net",
"which.co.uk", "which.co.uk",
"readly.com", "readly.com",
"nineforbrands.com.au", "nineforbrands.com.au",
"newyorker.com", "newyorker.com",
"nationalgeographic.com", "nationalgeographic.com",
"bostonglobe.com", "bostonglobe.com",
">time.com",
".time.com",
"/time.com", "/time.com",
"/fortune.com", "/fortune.com",
".fortune.com",
">fortune.com",
">hbr.com",
".hbr.com",
"/hbr.com", "/hbr.com",
"nzherald.co.nz", "nzherald.co.nz",
"startribune.com", "startribune.com",
"independent.co.uk", "independent.co.uk",
"thesun.co.uk", "thesun.co.uk",
"theintercept.com", "theintercept.com",
">zeit.de",
"/zeit.de",
".zeit.de", ".zeit.de",
">stern.de",
"/stern.de",
".stern.de" ".stern.de"
] ]
} }