From 67e06f65c9ef03950b327e7229a12a9b992f4537 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 5 Dec 2020 11:11:32 +0000 Subject: [PATCH 01/11] Guess hashtag categorisations --- inbox.py | 37 +++++++++++++++++++++++++++++++++++++ tests.py | 15 +++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/inbox.py b/inbox.py index cf681eb4e..d0c2a21dc 100644 --- a/inbox.py +++ b/inbox.py @@ -30,6 +30,8 @@ from utils import loadJson from utils import saveJson from utils import updateLikesCollection from utils import undoLikesCollectionEntry +from utils import getHashtagCategories +from utils import setHashtagCategory from httpsig import verifyPostHeaders from session import createSession from session import getJson @@ -68,6 +70,31 @@ from happening import saveEventPost from delete import removeOldHashtags +def guessHashtagCategory(tagName: str, hashtagCategories: {}) -> str: + """Tries to guess a category for the given hashtag. + This works by trying to find the longest similar hashtag + """ + categoryMatched = '' + tagMatched = '' + tagMatchedLen = 0 + + for categoryStr, hashtagList in hashtagCategories.items(): + for hashtag in hashtagList: + if hashtag in tagName: + if not tagMatched: + tagMatched = hashtag + tagMatchedLen = len(tagMatched) + categoryMatched = categoryStr + else: + # match the longest tag + if len(hashtag) > tagMatchedLen: + tagMatched = hashtag + categoryMatched = categoryStr + if not categoryMatched: + return + return categoryMatched + + def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None: """Extracts hashtags from an incoming post and updates the relevant tags files. @@ -91,6 +118,8 @@ def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None: print('Creating tags directory') os.mkdir(tagsDir) + hashtagCategories = getHashtagCategories(baseDir) + for tag in postJsonObject['object']['tag']: if not tag.get('type'): continue @@ -122,6 +151,14 @@ def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None: tagsFilename + ' ' + str(e)) removeOldHashtags(baseDir, 3) + # automatically assign a category to the tag if possible + categoryFilename = tagsDir + '/' + tagName + '.category' + if not os.path.isfile(categoryFilename): + categoryStr = \ + guessHashtagCategory(tagName, hashtagCategories) + if categoryStr: + setHashtagCategory(baseDir, tagName, categoryStr) + def inboxStorePostToHtmlCache(recentPostsCache: {}, maxRecentPosts: int, translate: {}, diff --git a/tests.py b/tests.py index aae2345b3..be85000b3 100644 --- a/tests.py +++ b/tests.py @@ -71,6 +71,7 @@ from delete import sendDeleteViaServer from inbox import jsonPostAllowsComments from inbox import validInbox from inbox import validInboxFilenames +from inbox import guessHashtagCategory from content import htmlReplaceEmailQuote from content import htmlReplaceQuoteMarks from content import dangerousMarkup @@ -2421,8 +2422,22 @@ def testValidNickname(): assert not validNickname(domain, nickname) +def testGuessHashtagCategory() -> None: + print('testGuessHashtagCategory') + hashtagCategories = { + "foo": ["swan", "goose"], + "bar": ["cat", "mouse"] + } + guess = guessHashtagCategory("unspecifiedgoose", hashtagCategories) + assert guess == "foo" + + guess = guessHashtagCategory("catpic", hashtagCategories) + assert guess == "bar" + + def runAllTests(): print('Running tests...') + testGuessHashtagCategory() testValidNickname() testParseFeedDate() testFirstParagraphFromString() From 4d4681735019df10af73eb386b756f935f8beef3 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 5 Dec 2020 11:58:03 +0000 Subject: [PATCH 02/11] Simplify --- inbox.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/inbox.py b/inbox.py index d0c2a21dc..5bcabcbe3 100644 --- a/inbox.py +++ b/inbox.py @@ -75,20 +75,17 @@ def guessHashtagCategory(tagName: str, hashtagCategories: {}) -> str: This works by trying to find the longest similar hashtag """ categoryMatched = '' - tagMatched = '' tagMatchedLen = 0 for categoryStr, hashtagList in hashtagCategories.items(): for hashtag in hashtagList: - if hashtag in tagName: - if not tagMatched: - tagMatched = hashtag - tagMatchedLen = len(tagMatched) + if hashtag in tagName or tagName in hashtag: + if not categoryMatched: + tagMatchedLen = len(hashtag) categoryMatched = categoryStr else: # match the longest tag if len(hashtag) > tagMatchedLen: - tagMatched = hashtag categoryMatched = categoryStr if not categoryMatched: return From 931068d4c259e265895ae184f0f95c270c20efc7 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 5 Dec 2020 13:38:07 +0000 Subject: [PATCH 03/11] Default hashtag categories per language --- daemon.py | 4 + defaultcategories/en.xml | 390 +++++++++++++++++++++++++++++++++++++++ newswire.py | 19 +- 3 files changed, 411 insertions(+), 2 deletions(-) create mode 100644 defaultcategories/en.xml diff --git a/daemon.py b/daemon.py index 2ab60cb3c..aee21d837 100644 --- a/daemon.py +++ b/daemon.py @@ -231,6 +231,7 @@ from devices import E2EEaddDevice from newswire import getRSSfromDict from newswire import rss2Header from newswire import rss2Footer +from newswire import loadHashtagCategories from newsdaemon import runNewswireWatchdog from newsdaemon import runNewswireDaemon from filters import isFiltered @@ -13149,6 +13150,9 @@ def runDaemon(maxNewswirePosts: int, httpd.restartInboxQueueInProgress = False httpd.restartInboxQueue = False + print('Adding hashtag categories for language ' + httpd.systemLanguage) + loadHashtagCategories(baseDir, httpd.systemLanguage) + if not unitTest: print('Creating inbox queue watchdog') httpd.thrWatchdog = \ diff --git a/defaultcategories/en.xml b/defaultcategories/en.xml new file mode 100644 index 000000000..8f4ccbd45 --- /dev/null +++ b/defaultcategories/en.xml @@ -0,0 +1,390 @@ + + + + #categories + + gafam + zuckerberg apple youtube facebook amazon amazonring microsoft twitter skype degoogled google dotcoms deleteyoutube fascistbook FuckGoogle degoogle ring gafam fuckoffgoogle deletefacebook bigtech + + Sat, 05 Dec 2020 13:30:17 UT + + + food + vitamind bolognese sourdough vegan tea bread soysauce baking foodwaste coffee + + Sat, 05 Dec 2020 13:30:17 UT + + + cycling + bicycle bike Snowbike + + Sat, 05 Dec 2020 13:30:17 UT + + + phones + mobileapp fdroid plasmamobile smartphone pinephone mobile ubuntutouch osmand vodafone postmarketos + + Sat, 05 Dec 2020 13:30:17 UT + + + software + app freedombox windows libre nginx Framasoft drm kubernetes jami FuckOffZoom docker freesoftware foss nextcloud wechat ikiwiki outreachy selfhosting lyft nitter opensource diaspora cabal conferencing accessibility owncast emacs gemini email chatapps floss deltachat bittorrent zoom gpl FriendofGNOME obnam cryptpad OwnStream mumble grsync irssi mutt backup apps ffmpeg lemmy OSM win10 jitsi libreoffice dino openoffice + + Sat, 05 Dec 2020 13:30:17 UT + + + security + encrypt password cryptography infosec gchq cryptowars UseAMaskUseTor cyberattack security tor vpn openssh openssl crypto opsec nsa protonvpn nitrokey openpgp gpg cybersecurity signal noscript openvpn + + Sat, 05 Dec 2020 13:30:17 UT + + + countries + chile italy morocco russia belarus uk catalonia greece nigeria american iran scotland turkey spain europe ireland iraq mexico nz france argentina wales kenya eu india europeanunion chinese + + Sat, 05 Dec 2020 13:30:17 UT + + + politics + cia wageslavery liberation fascism rojava leftists Socialism ukpol freedom anarchism DefundThePolice bjp election generalstrike digitalfreedom hatespeech fascists peerproduction corporations commons wageslave softwarefreedom socialecology politics nzpol totalitarianism TyskySour Labour decolonization surveillance elections borisjohnson mutuality whitehouse decolonize decenterwhiteness ChineseAppBan modi surveillancecapitalism leftist Revolution ukpolitics migration mutualaid fascist uselection + + Sat, 05 Dec 2020 13:30:17 UT + + + conferences + debconf talk FreedomBoxSummit summit minidebconf flossevent conf rC3 flossconf + + Sat, 05 Dec 2020 13:30:17 UT + + + photos + nikon photography photo tokyocameraclub photoshop camera picture + + Sat, 05 Dec 2020 13:30:17 UT + + + programming + programming css rustlang typescript adventofcode scripting fedidev sourcecode django tuskydev lisp javascript code elisp html rust clojurescript racket python + + Sat, 05 Dec 2020 13:30:17 UT + + + activitypub + activitypub pleroma fedilab mastotips misskey siskin followers pixelfed monal tusky peertubers feditips fedizens epicyon mastomagic pixeldev fediverse mastodon peertube mobilizon + + Sat, 05 Dec 2020 13:30:17 UT + + + news + news + + Sat, 05 Dec 2020 13:30:17 UT + + + games + minecraft TetrisGore gaming + + Sat, 05 Dec 2020 13:30:17 UT + + + cats + dailycatpic DailyCatVid + + Sat, 05 Dec 2020 13:30:17 UT + + + music + punk bandcamp musicians mp3 thecure vaporwave dubstep synthwave experimentalmusic dj newwave dorkwave producing NowPlaying libremusicproduction MusicAdvent synth music fediversemusic cyberpunkmusic BandcampFriday + + Sat, 05 Dec 2020 13:30:17 UT + + + indymedia + visionontv indymediaback omn 4opens + + Sat, 05 Dec 2020 13:30:17 UT + + + places + dublin hannover hamburg ipswich paris tokyo london oakland nürnberg munich essex minsk nyc montreal lesbos sahara + + Sat, 05 Dec 2020 13:30:17 UT + + + questions + askmastodon askfosstodon + + Sat, 05 Dec 2020 13:30:17 UT + + + birds + RainbowBeeEater bird + + Sat, 05 Dec 2020 13:30:17 UT + + + ethics + digitalethics + + Sat, 05 Dec 2020 13:30:17 UT + + + internet + redecentralize rtmp decentralization decentralize w3c torrent data icann dns openstandards oauth SmallWeb xmpp semanticweb ntp socialnetworks jabber decentralized darknet cookies darkweb server browser p2p web twitch domain browsers openculture internet openweb socialweb cloudflare + + Sat, 05 Dec 2020 13:30:17 UT + + + retro + atarist teletext floppydisk retrocomputing 80s z80 amiga + + Sat, 05 Dec 2020 13:30:17 UT + + + years + Year2020 + + Sat, 05 Dec 2020 13:30:17 UT + + + pets + catofmastodon catbehaviour cats kittens dog caturday catsofmastodon cute dogs cat catcontent + + Sat, 05 Dec 2020 13:30:17 UT + + + podcasts + IntergalacticWasabiHour podcast tilderadio + + Sat, 05 Dec 2020 13:30:17 UT + + + events + live followfriday livestream InternationalCheetahDay + + Sat, 05 Dec 2020 13:30:17 UT + + + gender + transwomen transcrowdfund female trans women estradiol woman transrights + + Sat, 05 Dec 2020 13:30:17 UT + + + hardware + keyboards modem keyboard cybredeck thinkpad + + Sat, 05 Dec 2020 13:30:17 UT + + + linux + mobian openwrt distros wireguard linuxaudio gtk debian trisquel ubuntu xubuntu fedora distro qubesos linux btrfs + + Sat, 05 Dec 2020 13:30:17 UT + + + fiction + cyberpunk thehobbit fiction + + Sat, 05 Dec 2020 13:30:17 UT + + + audio + audioproduction audiofeedback + + Sat, 05 Dec 2020 13:30:17 UT + + + bots + bot + + Sat, 05 Dec 2020 13:30:17 UT + + + climate + clouds weather + + Sat, 05 Dec 2020 13:30:17 UT + + + books + earthsea ebooks ebook epub + + Sat, 05 Dec 2020 13:30:17 UT + + + scifi + startrek starwars + + Sat, 05 Dec 2020 13:30:17 UT + + + pandemic + CoronaWarnApp facemasks vaccines vaccine covid Lockdown codid19 COVID19 COVID + + Sat, 05 Dec 2020 13:30:17 UT + + + religion + pagan + + Sat, 05 Dec 2020 13:30:17 UT + + + science + supercollider paleontology + + Sat, 05 Dec 2020 13:30:17 UT + + + nature + trees nature + + Sat, 05 Dec 2020 13:30:17 UT + + + fashion + bras fashion patches + + Sat, 05 Dec 2020 13:30:17 UT + + + art + krita adultcolouring collage MastoArt digitalart mandala concretepoetry artwithopensource + + Sat, 05 Dec 2020 13:30:17 UT + + + techbros + hackernews reddit + + Sat, 05 Dec 2020 13:30:17 UT + + + moderation + fedblock + + Sat, 05 Dec 2020 13:30:17 UT + + + election + voted vote + + Sat, 05 Dec 2020 13:30:17 UT + + + #music + trance + + Sat, 05 Dec 2020 13:30:17 UT + + + activism + fsfe xr eff openrightsgroup fsf conservancy + + Sat, 05 Dec 2020 13:30:17 UT + + + crafts + knitting makers + + Sat, 05 Dec 2020 13:30:17 UT + + + gardening + DailyFlowers flowers gardening + + Sat, 05 Dec 2020 13:30:17 UT + + + education + education tutorial + + Sat, 05 Dec 2020 13:30:17 UT + + + seasons + winter + + Sat, 05 Dec 2020 13:30:17 UT + + + radio + hamradio + + Sat, 05 Dec 2020 13:30:17 UT + + + microcontroller + microcontroller + + Sat, 05 Dec 2020 13:30:17 UT + + + comedy + satire irony + + Sat, 05 Dec 2020 13:30:17 UT + + + privacy + privacymatters dataprivacy privacy + + Sat, 05 Dec 2020 13:30:17 UT + + + environment + climatechange climatechaos + + Sat, 05 Dec 2020 13:30:17 UT + + + #software + flatpak + + Sat, 05 Dec 2020 13:30:17 UT + + + scotland + highlands + + Sat, 05 Dec 2020 13:30:17 UT + + + microcontrollers + esp8266 esp32 + + Sat, 05 Dec 2020 13:30:17 UT + + + health + meds + + Sat, 05 Dec 2020 13:30:17 UT + + + astronomy + moon milkyway + + Sat, 05 Dec 2020 13:30:17 UT + + + travel + travel taxi + + Sat, 05 Dec 2020 13:30:17 UT + + + help + help + + Sat, 05 Dec 2020 13:30:17 UT + + + funding + patreon + + Sat, 05 Dec 2020 13:30:17 UT + + + diff --git a/newswire.py b/newswire.py index 7ee8ef784..d0da7bb35 100644 --- a/newswire.py +++ b/newswire.py @@ -203,7 +203,22 @@ def parseFeedDate(pubDate: str) -> str: return pubDateStr -def xml2StrToHashtagCategories(baseDir: str, domain: str, xmlStr: str, +def loadHashtagCategories(baseDir: str, language: str) -> None: + """Loads an rss file containing hashtag categories + """ + hashtagCategoriesFilename = baseDir + '/categories.xml' + if not os.path.isfile(hashtagCategoriesFilename): + hashtagCategoriesFilename = \ + baseDir + '/defaultcategories/' + language + '.xml' + if not os.path.isfile(hashtagCategoriesFilename): + return + + with open(hashtagCategoriesFilename, 'r') as fp: + xmlStr = fp.read() + xml2StrToHashtagCategories(baseDir, xmlStr, 1024) + + +def xml2StrToHashtagCategories(baseDir: str, xmlStr: str, maxCategoriesFeedItemSizeKb: int) -> None: """Updates hashtag categories based upon an rss feed """ @@ -252,7 +267,7 @@ def xml2StrToDict(baseDir: str, domain: str, xmlStr: str, return {} result = {} if '#categories' in xmlStr: - xml2StrToHashtagCategories(baseDir, domain, xmlStr, + xml2StrToHashtagCategories(baseDir, xmlStr, maxCategoriesFeedItemSizeKb) return {} rssItems = xmlStr.split('') From dc951e6286eacd8bb2e3e325c2e4ca6b0aad8a2e Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 5 Dec 2020 13:53:06 +0000 Subject: [PATCH 04/11] More countries --- defaultcategories/en.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/defaultcategories/en.xml b/defaultcategories/en.xml index 8f4ccbd45..c216c7fdf 100644 --- a/defaultcategories/en.xml +++ b/defaultcategories/en.xml @@ -40,7 +40,7 @@ countries - chile italy morocco russia belarus uk catalonia greece nigeria american iran scotland turkey spain europe ireland iraq mexico nz france argentina wales kenya eu india europeanunion chinese + wales scotland eu europe europeanunion chinese afghanistan albania algeria andorra angola antigua argentina armenia australia austria azerbaijan bahamas bahrain bangladesh barbados belarus belgium belize benin bhutan bolivia bosnia botswana brazil brunei bulgaria burkina burundi cambodia cameroon canada capeverde chad chile china colombia comoros congo costarica croatia cuba cyprus czech denmark djibouti dominica dominican easttimor ecuador egypt elsalvador guinea eritrea estonia ethiopia fiji finland france gabon gambia georgia germany ghana greece grenada guatemala guinea guyana haiti honduras hungary iceland india indonesia iran iraq ireland israel italy ivorycoast jamaica japan jordan kazakhstan kenya kiribati koreanorth koreasouth kosovo kuwait kyrgyzstan laos latvia lebanon lesotho liberia libya liechtenstein lithuania luxembourg macedonia madagascar malawi malaysia maldives mali malta marshall mauritania mauritius mexico micronesia moldova monaco mongolia montenegro morocco mozambique myanmar burma namibia nauru nepal netherlands newzealand nz nicaragua niger nigeria norway oman pakistan palau panama papua paraguay peru philippines poland Portugal qatar romania russian rwanda stkitts stlucia saintvincent samoa sanmarino saudi senegal serbia seychelles sierraleone singapore slovakia slovenia solomon somalia southafrica sudan spain srilanka sudan suriname swaziland sweden switzerland syria taiwan tajikistan tanzania thailand togo tonga trinidad tunisia turkey turkmenistan tuvalu uganda ukraine uae uk usa us unitedstates uruguay uzbekistan vanuatu vaticancity venezuela vietnam yemen zambia zimbabwe Sat, 05 Dec 2020 13:30:17 UT From 4bd0cc2e903d73a8bb3982b23a285447cb79f284 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 5 Dec 2020 13:58:19 +0000 Subject: [PATCH 05/11] More foods --- defaultcategories/en.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/defaultcategories/en.xml b/defaultcategories/en.xml index c216c7fdf..eca304a02 100644 --- a/defaultcategories/en.xml +++ b/defaultcategories/en.xml @@ -10,7 +10,7 @@ food - vitamind bolognese sourdough vegan tea bread soysauce baking foodwaste coffee + vitamind bolognese sourdough vegan soysauce bakery baking foodwaste aroma bagel batter beans beer biscuit bread broth burger butter cake candy caramel caviar cheese chili chocolate cider cobbler cocoa coffee cookie cream croissant crumble cuisine curd dessert dish drink eggs entree filet fish flour foie gras food glaze grill hamburger juice ketchup kitchen lard liquor margarine marinade mayo mayonnaise meat milk mousse muffin mushroom noodle nuts oil olive omelette pan pasta paste pastry pie pizza plate pot poutine pudding raclette recipe rice salad salsa sandwich sauce seasoning skillet soda soup soy spice steak stew syrup tartar taste tea toast vinegar waffle wheat wine wok yeast yogurt cookery cooking Sat, 05 Dec 2020 13:30:17 UT From a0b404774bc41d3a10cbd8a9c4457185c1d44480 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 5 Dec 2020 14:27:50 +0000 Subject: [PATCH 06/11] More places --- defaultcategories/en.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/defaultcategories/en.xml b/defaultcategories/en.xml index eca304a02..37b1040fe 100644 --- a/defaultcategories/en.xml +++ b/defaultcategories/en.xml @@ -16,7 +16,7 @@ cycling - bicycle bike Snowbike + bicycle bike Snowbike cycling Sat, 05 Dec 2020 13:30:17 UT @@ -94,7 +94,7 @@ music - punk bandcamp musicians mp3 thecure vaporwave dubstep synthwave experimentalmusic dj newwave dorkwave producing NowPlaying libremusicproduction MusicAdvent synth music fediversemusic cyberpunkmusic BandcampFriday + punk rap hiphop ipod rave bandcamp musicians mp3 thecure vaporwave dubstep synthwave experimentalmusic dj newwave dorkwave producing NowPlaying libremusicproduction MusicAdvent synth music fediversemusic cyberpunkmusic BandcampFriday Sat, 05 Dec 2020 13:30:17 UT @@ -106,7 +106,7 @@ places - dublin hannover hamburg ipswich paris tokyo london oakland nürnberg munich essex minsk nyc montreal lesbos sahara + hannover hamburg ipswich oakland nürnberg munich essex minsk nyc montreal lesbos sahara abidjan abudhabi abuja accra adamstown addis aden algiers alofi amman amsterdam thehague andorralavella ankara antananarivo apia ashgabat asmara asunción athens avarua baghdad baku bamako bandar bangkok bangui banjul basseterre beijing beirut belgrade belmopan berlin bern bishkek bissau bloemfontein capetown pretoria bogotá bradesestate plymouth brasília bratislava brazzaville bridgetown brussels bucharest budapest buenosaires cairo canberra caracas castries cetinje podgorica charlotteamalie chișinău cockburntown colombo conakry copenhagen cotonou portonovo dakar damascus daressalaam dodoma dhaka dili djibouti doha douglas dublin dushanbe elaaiún tifariti flyingfishcove freetown funafuti gaborone georgetown gibraltar gitega bujumbura guatemalacity gustavia hagåtña hamilton hanoi harare hargeisa havana helsinki honiara islamabad jakarta jamestown jerusalem ramallah juba kabul kampala kathmandu khartoum kigali kingedwardpoint kingston kingston kingstown kinshasa kualalumpur putrajaya kuwaitcity kyiv lapaz sucre libreville lilongwe lima lisbon ljubljana lobamba mbabane lomé london luanda lusaka luxembourg madrid majuro malabo malé managua manama manila maputo mariehamn marigot maseru matautu mexicocity minsk mogadishu monaco monrovia montevideo moroni moscow muscat nairobi nassau naypyidaw ndjamena newdelhi delhi ngerulmud niamey nicosia nouakchott nouméa nukualofa nursultan nuuk oranjestad oslo ottawa ouagadougou pagopago palikir panamacity papeete paramaribo paris philipsburg phnompenh portlouis portmoresby portvila portauprince portofspain prague praia pristina pyongyang quito rabat reykjavík riga riyadh rome roseau saipan sanjosé sanjuan sanmarino sansalvador santiago valparaíso santodomingo sãotomé sarajevo seoul singapore skopje sofia southtarawa stanley stepanakert stockholm sukhumi suva taipei tallinn tashkent tbilisi tegucigalpa tehran thimphu tirana tokyo tórshavn tripoli tskhinvali tunis ulaanbaatar vaduz valletta vatican victoria vienna vientiane vilnius warsaw washington wellington westisland willemstad windhoek yaoundé yaren yerevan zagreb Sat, 05 Dec 2020 13:30:17 UT From c20f29b3afaa6aff93bcce7a164c35c3d82f553f Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 5 Dec 2020 14:32:13 +0000 Subject: [PATCH 07/11] Increase max item size --- daemon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/daemon.py b/daemon.py index aee21d837..eb46e9e29 100644 --- a/daemon.py +++ b/daemon.py @@ -12983,7 +12983,7 @@ def runDaemon(maxNewswirePosts: int, httpd.maxFeedItemSizeKb = maxFeedItemSizeKb # maximum size of a hashtag category, in K - httpd.maxCategoriesFeedItemSizeKb = 256 + httpd.maxCategoriesFeedItemSizeKb = 1024 if registration == 'open': httpd.registration = True From 09e980da24e17be85795350cf42b9074e4698d94 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 5 Dec 2020 14:43:29 +0000 Subject: [PATCH 08/11] Setting categories from defaults --- newswire.py | 7 ++++--- utils.py | 18 ++++++++++++------ 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/newswire.py b/newswire.py index d0da7bb35..0bad590f7 100644 --- a/newswire.py +++ b/newswire.py @@ -215,11 +215,12 @@ def loadHashtagCategories(baseDir: str, language: str) -> None: with open(hashtagCategoriesFilename, 'r') as fp: xmlStr = fp.read() - xml2StrToHashtagCategories(baseDir, xmlStr, 1024) + xml2StrToHashtagCategories(baseDir, xmlStr, 1024, True) def xml2StrToHashtagCategories(baseDir: str, xmlStr: str, - maxCategoriesFeedItemSizeKb: int) -> None: + maxCategoriesFeedItemSizeKb: int, + force=False) -> None: """Updates hashtag categories based upon an rss feed """ rssItems = xmlStr.split('') @@ -253,7 +254,7 @@ def xml2StrToHashtagCategories(baseDir: str, xmlStr: str, hashtagList = hashtagListStr.split(' ') if not isBlockedHashtag(baseDir, categoryStr): for hashtag in hashtagList: - setHashtagCategory(baseDir, hashtag, categoryStr) + setHashtagCategory(baseDir, hashtag, categoryStr, force) def xml2StrToDict(baseDir: str, domain: str, xmlStr: str, diff --git a/utils.py b/utils.py index 414f46bd8..4a7b16e7c 100644 --- a/utils.py +++ b/utils.py @@ -112,23 +112,29 @@ def validHashtagCategory(category: str) -> bool: return True -def setHashtagCategory(baseDir: str, hashtag: str, category: str) -> bool: +def setHashtagCategory(baseDir: str, hashtag: str, category: str, + force=False) -> bool: """Sets the category for the hashtag """ if not validHashtagCategory(category): return False - hashtagFilename = baseDir + '/tags/' + hashtag + '.txt' - if not os.path.isfile(hashtagFilename): - hashtag = hashtag.title() + if not force: hashtagFilename = baseDir + '/tags/' + hashtag + '.txt' if not os.path.isfile(hashtagFilename): - hashtag = hashtag.upper() + hashtag = hashtag.title() hashtagFilename = baseDir + '/tags/' + hashtag + '.txt' if not os.path.isfile(hashtagFilename): - return False + hashtag = hashtag.upper() + hashtagFilename = baseDir + '/tags/' + hashtag + '.txt' + if not os.path.isfile(hashtagFilename): + return False categoryFilename = baseDir + '/tags/' + hashtag + '.category' + if force: + # don't overwrite any existing categories + if os.path.isfile(categoryFilename): + return False with open(categoryFilename, 'w+') as fp: fp.write(category) updateHashtagCategories(baseDir) From d2f8d916f7467aa08ee222591c74dde6d0a637ec Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 5 Dec 2020 16:06:36 +0000 Subject: [PATCH 09/11] Filter for recent hashtags on category screen --- utils.py | 20 +++++++++++++++++++- webapp_hashtagswarm.py | 2 +- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/utils.py b/utils.py index 4a7b16e7c..a4b07ea7f 100644 --- a/utils.py +++ b/utils.py @@ -38,11 +38,16 @@ def getHashtagCategory(baseDir: str, hashtag: str) -> str: return '' -def getHashtagCategories(baseDir: str, category=None) -> None: +def getHashtagCategories(baseDir: str, recent=False, category=None) -> None: """Returns a dictionary containing hashtag categories """ hashtagCategories = {} + if recent: + currTime = datetime.datetime.utcnow() + daysSinceEpoch = (currTime - datetime.datetime(1970, 1, 1)).days + recently = daysSinceEpoch - 1 + for subdir, dirs, files in os.walk(baseDir + '/tags'): for f in files: if not f.endswith('.category'): @@ -62,6 +67,19 @@ def getHashtagCategories(baseDir: str, category=None) -> None: if categoryStr != category: continue + if recent: + tagsFilename = baseDir + '/tags/' + hashtag + '.txt' + if os.path.isfile(tagsFilename): + modTimesinceEpoc = \ + os.path.getmtime(tagsFilename) + lastModifiedDate = \ + datetime.datetime.fromtimestamp(modTimesinceEpoc) + fileDaysSinceEpoch = \ + (lastModifiedDate - + datetime.datetime(1970, 1, 1)).days + if fileDaysSinceEpoch < recently: + continue + if not hashtagCategories.get(categoryStr): hashtagCategories[categoryStr] = [hashtag] else: diff --git a/webapp_hashtagswarm.py b/webapp_hashtagswarm.py index 8fa03b5cb..37e51060a 100644 --- a/webapp_hashtagswarm.py +++ b/webapp_hashtagswarm.py @@ -279,7 +279,7 @@ def htmlSearchHashtagCategory(cssCache: {}, translate: {}, htmlStr += '

' htmlStr += translate['Category'] + ': ' + categoryStr + '

' - hashtagsDict = getHashtagCategories(baseDir, categoryStr) + hashtagsDict = getHashtagCategories(baseDir, True, categoryStr) if hashtagsDict: for categoryStr2, hashtagList in hashtagsDict.items(): hashtagList.sort() From f45f6235d0263a547a95571b6e2da76e13574953 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 5 Dec 2020 16:11:11 +0000 Subject: [PATCH 10/11] Only show tags that exist --- utils.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/utils.py b/utils.py index a4b07ea7f..aa82ea054 100644 --- a/utils.py +++ b/utils.py @@ -69,16 +69,17 @@ def getHashtagCategories(baseDir: str, recent=False, category=None) -> None: if recent: tagsFilename = baseDir + '/tags/' + hashtag + '.txt' - if os.path.isfile(tagsFilename): - modTimesinceEpoc = \ - os.path.getmtime(tagsFilename) - lastModifiedDate = \ - datetime.datetime.fromtimestamp(modTimesinceEpoc) - fileDaysSinceEpoch = \ - (lastModifiedDate - - datetime.datetime(1970, 1, 1)).days - if fileDaysSinceEpoch < recently: - continue + if not os.path.isfile(tagsFilename): + continue + modTimesinceEpoc = \ + os.path.getmtime(tagsFilename) + lastModifiedDate = \ + datetime.datetime.fromtimestamp(modTimesinceEpoc) + fileDaysSinceEpoch = \ + (lastModifiedDate - + datetime.datetime(1970, 1, 1)).days + if fileDaysSinceEpoch < recently: + continue if not hashtagCategories.get(categoryStr): hashtagCategories[categoryStr] = [hashtag] From 2808f2acb2b0e26cb46c8fbdc0c268fdf5bc7ac2 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 5 Dec 2020 17:07:35 +0000 Subject: [PATCH 11/11] Default hashtag categories --- defaultcategories/en.xml | 50 +++++++++++++++------------------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/defaultcategories/en.xml b/defaultcategories/en.xml index 37b1040fe..d66ef2907 100644 --- a/defaultcategories/en.xml +++ b/defaultcategories/en.xml @@ -34,7 +34,7 @@
security - encrypt password cryptography infosec gchq cryptowars UseAMaskUseTor cyberattack security tor vpn openssh openssl crypto opsec nsa protonvpn nitrokey openpgp gpg cybersecurity signal noscript openvpn + password encrypt password cryptography infosec gchq cryptowars UseAMaskUseTor cyberattack security tor vpn openssh openssl crypto opsec nsa protonvpn nitrokey openpgp gpg cybersecurity signal noscript openvpn Sat, 05 Dec 2020 13:30:17 UT @@ -52,7 +52,7 @@ conferences - debconf talk FreedomBoxSummit summit minidebconf flossevent conf rC3 flossconf + schmoocon defcon C3 fossdem debconf talk FreedomBoxSummit summit minidebconf flossevent conf rC3 flossconf conference Sat, 05 Dec 2020 13:30:17 UT @@ -82,13 +82,7 @@ games - minecraft TetrisGore gaming - - Sat, 05 Dec 2020 13:30:17 UT - - - cats - dailycatpic DailyCatVid + mud dnd rpg minetest game chess minecraft TetrisGore gaming Sat, 05 Dec 2020 13:30:17 UT @@ -124,31 +118,31 @@ ethics - digitalethics + digitalethics ethics ethical Sat, 05 Dec 2020 13:30:17 UT internet - redecentralize rtmp decentralization decentralize w3c torrent data icann dns openstandards oauth SmallWeb xmpp semanticweb ntp socialnetworks jabber decentralized darknet cookies darkweb server browser p2p web twitch domain browsers openculture internet openweb socialweb cloudflare + cyberspace www w3c redecentralize rtmp decentralization decentralize w3c torrent data icann dns openstandards oauth SmallWeb xmpp semanticweb ntp socialnetworks jabber decentralized darknet cookies darkweb server browser p2p web twitch domain browsers openculture internet openweb socialweb cloudflare Sat, 05 Dec 2020 13:30:17 UT retro - atarist teletext floppydisk retrocomputing 80s z80 amiga + microcomputer microcomputing commodore C64 A500 8bit retro bbcmicro atari atarist teletext floppydisk retrocomputing 80s z80 amiga Sat, 05 Dec 2020 13:30:17 UT years - Year2020 + year Year2020 Sat, 05 Dec 2020 13:30:17 UT pets - catofmastodon catbehaviour cats kittens dog caturday catsofmastodon cute dogs cat catcontent + dailycatpic DailyCatVid dogsofmastodon catofmastodon catbehaviour cats kittens dog caturday catsofmastodon cute dogs cat catcontent pet Sat, 05 Dec 2020 13:30:17 UT @@ -160,7 +154,7 @@ events - live followfriday livestream InternationalCheetahDay + meetup live followfriday livestream festival concert InternationalCheetahDay Sat, 05 Dec 2020 13:30:17 UT @@ -172,13 +166,13 @@ hardware - keyboards modem keyboard cybredeck thinkpad + keyboards modem keyboard cyberdeck cybredeck thinkpad lenovo arm Sat, 05 Dec 2020 13:30:17 UT linux - mobian openwrt distros wireguard linuxaudio gtk debian trisquel ubuntu xubuntu fedora distro qubesos linux btrfs + mobian openwrt distros wireguard linuxaudio gtk debian trisquel gentoo archlinux ubuntu xubuntu fedora systemd distro qubesos linux btrfs Sat, 05 Dec 2020 13:30:17 UT @@ -202,19 +196,19 @@ climate - clouds weather + clouds weather climate Sat, 05 Dec 2020 13:30:17 UT books - earthsea ebooks ebook epub + earthsea ebooks ebook epub book Sat, 05 Dec 2020 13:30:17 UT scifi - startrek starwars + startrek starwars cyberpunk Sat, 05 Dec 2020 13:30:17 UT @@ -268,13 +262,7 @@ election - voted vote - - Sat, 05 Dec 2020 13:30:17 UT - - - #music - trance + voted vote election Sat, 05 Dec 2020 13:30:17 UT @@ -286,7 +274,7 @@ crafts - knitting makers + knitting makers jewelry quilt Sat, 05 Dec 2020 13:30:17 UT @@ -304,19 +292,19 @@ seasons - winter + winter summer spring autumn Sat, 05 Dec 2020 13:30:17 UT radio - hamradio + hamradio radio Sat, 05 Dec 2020 13:30:17 UT microcontroller - microcontroller + microcontroller arduino esp32 Sat, 05 Dec 2020 13:30:17 UT