Merge branch 'main' of ssh://code.freedombone.net:2222/bashrc/epicyon into main

merge-requests/30/head
Bob Mottram 2020-12-05 19:11:06 +00:00
commit ed87c969f5
7 changed files with 485 additions and 13 deletions

View File

@ -231,6 +231,7 @@ from devices import E2EEaddDevice
from newswire import getRSSfromDict
from newswire import rss2Header
from newswire import rss2Footer
from newswire import loadHashtagCategories
from newsdaemon import runNewswireWatchdog
from newsdaemon import runNewswireDaemon
from filters import isFiltered
@ -12982,7 +12983,7 @@ def runDaemon(maxNewswirePosts: int,
httpd.maxFeedItemSizeKb = maxFeedItemSizeKb
# maximum size of a hashtag category, in K
httpd.maxCategoriesFeedItemSizeKb = 256
httpd.maxCategoriesFeedItemSizeKb = 1024
if registration == 'open':
httpd.registration = True
@ -13149,6 +13150,9 @@ def runDaemon(maxNewswirePosts: int,
httpd.restartInboxQueueInProgress = False
httpd.restartInboxQueue = False
print('Adding hashtag categories for language ' + httpd.systemLanguage)
loadHashtagCategories(baseDir, httpd.systemLanguage)
if not unitTest:
print('Creating inbox queue watchdog')
httpd.thrWatchdog = \

View File

@ -0,0 +1,378 @@
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title>#categories</title>
<item>
<title>gafam</title>
<description>zuckerberg apple youtube facebook amazon amazonring microsoft twitter skype degoogled google dotcoms deleteyoutube fascistbook FuckGoogle degoogle ring gafam fuckoffgoogle deletefacebook bigtech</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>food</title>
<description>vitamind bolognese sourdough vegan soysauce bakery baking foodwaste aroma bagel batter beans beer biscuit bread broth burger butter cake candy caramel caviar cheese chili chocolate cider cobbler cocoa coffee cookie cream croissant crumble cuisine curd dessert dish drink eggs entree filet fish flour foie gras food glaze grill hamburger juice ketchup kitchen lard liquor margarine marinade mayo mayonnaise meat milk mousse muffin mushroom noodle nuts oil olive omelette pan pasta paste pastry pie pizza plate pot poutine pudding raclette recipe rice salad salsa sandwich sauce seasoning skillet soda soup soy spice steak stew syrup tartar taste tea toast vinegar waffle wheat wine wok yeast yogurt cookery cooking</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>cycling</title>
<description>bicycle bike Snowbike cycling</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>phones</title>
<description>mobileapp fdroid plasmamobile smartphone pinephone mobile ubuntutouch osmand vodafone postmarketos</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>software</title>
<description>app freedombox windows libre nginx Framasoft drm kubernetes jami FuckOffZoom docker freesoftware foss nextcloud wechat ikiwiki outreachy selfhosting lyft nitter opensource diaspora cabal conferencing accessibility owncast emacs gemini email chatapps floss deltachat bittorrent zoom gpl FriendofGNOME obnam cryptpad OwnStream mumble grsync irssi mutt backup apps ffmpeg lemmy OSM win10 jitsi libreoffice dino openoffice</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>security</title>
<description>password encrypt password cryptography infosec gchq cryptowars UseAMaskUseTor cyberattack security tor vpn openssh openssl crypto opsec nsa protonvpn nitrokey openpgp gpg cybersecurity signal noscript openvpn</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>countries</title>
<description>wales scotland eu europe europeanunion chinese afghanistan albania algeria andorra angola antigua argentina armenia australia austria azerbaijan bahamas bahrain bangladesh barbados belarus belgium belize benin bhutan bolivia bosnia botswana brazil brunei bulgaria burkina burundi cambodia cameroon canada capeverde chad chile china colombia comoros congo costarica croatia cuba cyprus czech denmark djibouti dominica dominican easttimor ecuador egypt elsalvador guinea eritrea estonia ethiopia fiji finland france gabon gambia georgia germany ghana greece grenada guatemala guinea guyana haiti honduras hungary iceland india indonesia iran iraq ireland israel italy ivorycoast jamaica japan jordan kazakhstan kenya kiribati koreanorth koreasouth kosovo kuwait kyrgyzstan laos latvia lebanon lesotho liberia libya liechtenstein lithuania luxembourg macedonia madagascar malawi malaysia maldives mali malta marshall mauritania mauritius mexico micronesia moldova monaco mongolia montenegro morocco mozambique myanmar burma namibia nauru nepal netherlands newzealand nz nicaragua niger nigeria norway oman pakistan palau panama papua paraguay peru philippines poland Portugal qatar romania russian rwanda stkitts stlucia saintvincent samoa sanmarino saudi senegal serbia seychelles sierraleone singapore slovakia slovenia solomon somalia southafrica sudan spain srilanka sudan suriname swaziland sweden switzerland syria taiwan tajikistan tanzania thailand togo tonga trinidad tunisia turkey turkmenistan tuvalu uganda ukraine uae uk usa us unitedstates uruguay uzbekistan vanuatu vaticancity venezuela vietnam yemen zambia zimbabwe</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>politics</title>
<description>cia wageslavery liberation fascism rojava leftists Socialism ukpol freedom anarchism DefundThePolice bjp election generalstrike digitalfreedom hatespeech fascists peerproduction corporations commons wageslave softwarefreedom socialecology politics nzpol totalitarianism TyskySour Labour decolonization surveillance elections borisjohnson mutuality whitehouse decolonize decenterwhiteness ChineseAppBan modi surveillancecapitalism leftist Revolution ukpolitics migration mutualaid fascist uselection</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>conferences</title>
<description>schmoocon defcon C3 fossdem debconf talk FreedomBoxSummit summit minidebconf flossevent conf rC3 flossconf conference</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>photos</title>
<description>nikon photography photo tokyocameraclub photoshop camera picture</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>programming</title>
<description>programming css rustlang typescript adventofcode scripting fedidev sourcecode django tuskydev lisp javascript code elisp html rust clojurescript racket python</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>activitypub</title>
<description>activitypub pleroma fedilab mastotips misskey siskin followers pixelfed monal tusky peertubers feditips fedizens epicyon mastomagic pixeldev fediverse mastodon peertube mobilizon</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>news</title>
<description>news</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>games</title>
<description>mud dnd rpg minetest game chess minecraft TetrisGore gaming</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>music</title>
<description>punk rap hiphop ipod rave bandcamp musicians mp3 thecure vaporwave dubstep synthwave experimentalmusic dj newwave dorkwave producing NowPlaying libremusicproduction MusicAdvent synth music fediversemusic cyberpunkmusic BandcampFriday</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>indymedia</title>
<description>visionontv indymediaback omn 4opens</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>places</title>
<description>hannover hamburg ipswich oakland nürnberg munich essex minsk nyc montreal lesbos sahara abidjan abudhabi abuja accra adamstown addis aden algiers alofi amman amsterdam thehague andorralavella ankara antananarivo apia ashgabat asmara asunción athens avarua baghdad baku bamako bandar bangkok bangui banjul basseterre beijing beirut belgrade belmopan berlin bern bishkek bissau bloemfontein capetown pretoria bogotá bradesestate plymouth brasília bratislava brazzaville bridgetown brussels bucharest budapest buenosaires cairo canberra caracas castries cetinje podgorica charlotteamalie chișinău cockburntown colombo conakry copenhagen cotonou portonovo dakar damascus daressalaam dodoma dhaka dili djibouti doha douglas dublin dushanbe elaaiún tifariti flyingfishcove freetown funafuti gaborone georgetown gibraltar gitega bujumbura guatemalacity gustavia hagåtña hamilton hanoi harare hargeisa havana helsinki honiara islamabad jakarta jamestown jerusalem ramallah juba kabul kampala kathmandu khartoum kigali kingedwardpoint kingston kingston kingstown kinshasa kualalumpur putrajaya kuwaitcity kyiv lapaz sucre libreville lilongwe lima lisbon ljubljana lobamba mbabane lomé london luanda lusaka luxembourg madrid majuro malabo malé managua manama manila maputo mariehamn marigot maseru matautu mexicocity minsk mogadishu monaco monrovia montevideo moroni moscow muscat nairobi nassau naypyidaw ndjamena newdelhi delhi ngerulmud niamey nicosia nouakchott nouméa nukualofa nursultan nuuk oranjestad oslo ottawa ouagadougou pagopago palikir panamacity papeete paramaribo paris philipsburg phnompenh portlouis portmoresby portvila portauprince portofspain prague praia pristina pyongyang quito rabat reykjavík riga riyadh rome roseau saipan sanjosé sanjuan sanmarino sansalvador santiago valparaíso santodomingo sãotomé sarajevo seoul singapore skopje sofia southtarawa stanley stepanakert stockholm sukhumi suva taipei tallinn tashkent tbilisi tegucigalpa tehran thimphu tirana tokyo tórshavn tripoli tskhinvali tunis ulaanbaatar vaduz valletta vatican victoria vienna vientiane vilnius warsaw washington wellington westisland willemstad windhoek yaoundé yaren yerevan zagreb</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>questions</title>
<description>askmastodon askfosstodon</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>birds</title>
<description>RainbowBeeEater bird</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>ethics</title>
<description>digitalethics ethics ethical</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>internet</title>
<description>cyberspace www w3c redecentralize rtmp decentralization decentralize w3c torrent data icann dns openstandards oauth SmallWeb xmpp semanticweb ntp socialnetworks jabber decentralized darknet cookies darkweb server browser p2p web twitch domain browsers openculture internet openweb socialweb cloudflare</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>retro</title>
<description>microcomputer microcomputing commodore C64 A500 8bit retro bbcmicro atari atarist teletext floppydisk retrocomputing 80s z80 amiga</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>years</title>
<description>year Year2020</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>pets</title>
<description>dailycatpic DailyCatVid dogsofmastodon catofmastodon catbehaviour cats kittens dog caturday catsofmastodon cute dogs cat catcontent pet</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>podcasts</title>
<description>IntergalacticWasabiHour podcast tilderadio</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>events</title>
<description>meetup live followfriday livestream festival concert InternationalCheetahDay</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>gender</title>
<description>transwomen transcrowdfund female trans women estradiol woman transrights</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>hardware</title>
<description>keyboards modem keyboard cyberdeck cybredeck thinkpad lenovo arm</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>linux</title>
<description>mobian openwrt distros wireguard linuxaudio gtk debian trisquel gentoo archlinux ubuntu xubuntu fedora systemd distro qubesos linux btrfs</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>fiction</title>
<description>cyberpunk thehobbit fiction</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>audio</title>
<description>audioproduction audiofeedback</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>bots</title>
<description>bot</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>climate</title>
<description>clouds weather climate</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>books</title>
<description>earthsea ebooks ebook epub book</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>scifi</title>
<description>startrek starwars cyberpunk</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>pandemic</title>
<description>CoronaWarnApp facemasks vaccines vaccine covid Lockdown codid19 COVID19 COVID</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>religion</title>
<description>pagan</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>science</title>
<description>supercollider paleontology</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>nature</title>
<description>trees nature</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>fashion</title>
<description>bras fashion patches</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>art</title>
<description>krita adultcolouring collage MastoArt digitalart mandala concretepoetry artwithopensource</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>techbros</title>
<description>hackernews reddit</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>moderation</title>
<description>fedblock</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>election</title>
<description>voted vote election</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>activism</title>
<description>fsfe xr eff openrightsgroup fsf conservancy</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>crafts</title>
<description>knitting makers jewelry quilt</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>gardening</title>
<description>DailyFlowers flowers gardening</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>education</title>
<description>education tutorial</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>seasons</title>
<description>winter summer spring autumn</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>radio</title>
<description>hamradio radio</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>microcontroller</title>
<description>microcontroller arduino esp32</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>comedy</title>
<description>satire irony</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>privacy</title>
<description>privacymatters dataprivacy privacy</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>environment</title>
<description>climatechange climatechaos</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>#software</title>
<description>flatpak</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>scotland</title>
<description>highlands</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>microcontrollers</title>
<description>esp8266 esp32</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>health</title>
<description>meds</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>astronomy</title>
<description>moon milkyway</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>travel</title>
<description>travel taxi</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>help</title>
<description>help</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
<item>
<title>funding</title>
<description>patreon</description>
<link/>
<pubDate>Sat, 05 Dec 2020 13:30:17 UT</pubDate>
</item>
</channel>
</rss>

View File

@ -30,6 +30,8 @@ from utils import loadJson
from utils import saveJson
from utils import updateLikesCollection
from utils import undoLikesCollectionEntry
from utils import getHashtagCategories
from utils import setHashtagCategory
from httpsig import verifyPostHeaders
from session import createSession
from session import getJson
@ -68,6 +70,28 @@ from happening import saveEventPost
from delete import removeOldHashtags
def guessHashtagCategory(tagName: str, hashtagCategories: {}) -> str:
"""Tries to guess a category for the given hashtag.
This works by trying to find the longest similar hashtag
"""
categoryMatched = ''
tagMatchedLen = 0
for categoryStr, hashtagList in hashtagCategories.items():
for hashtag in hashtagList:
if hashtag in tagName or tagName in hashtag:
if not categoryMatched:
tagMatchedLen = len(hashtag)
categoryMatched = categoryStr
else:
# match the longest tag
if len(hashtag) > tagMatchedLen:
categoryMatched = categoryStr
if not categoryMatched:
return
return categoryMatched
def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None:
"""Extracts hashtags from an incoming post and updates the
relevant tags files.
@ -91,6 +115,8 @@ def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None:
print('Creating tags directory')
os.mkdir(tagsDir)
hashtagCategories = getHashtagCategories(baseDir)
for tag in postJsonObject['object']['tag']:
if not tag.get('type'):
continue
@ -122,6 +148,14 @@ def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None:
tagsFilename + ' ' + str(e))
removeOldHashtags(baseDir, 3)
# automatically assign a category to the tag if possible
categoryFilename = tagsDir + '/' + tagName + '.category'
if not os.path.isfile(categoryFilename):
categoryStr = \
guessHashtagCategory(tagName, hashtagCategories)
if categoryStr:
setHashtagCategory(baseDir, tagName, categoryStr)
def inboxStorePostToHtmlCache(recentPostsCache: {}, maxRecentPosts: int,
translate: {},

View File

@ -203,8 +203,24 @@ def parseFeedDate(pubDate: str) -> str:
return pubDateStr
def xml2StrToHashtagCategories(baseDir: str, domain: str, xmlStr: str,
maxCategoriesFeedItemSizeKb: int) -> None:
def loadHashtagCategories(baseDir: str, language: str) -> None:
"""Loads an rss file containing hashtag categories
"""
hashtagCategoriesFilename = baseDir + '/categories.xml'
if not os.path.isfile(hashtagCategoriesFilename):
hashtagCategoriesFilename = \
baseDir + '/defaultcategories/' + language + '.xml'
if not os.path.isfile(hashtagCategoriesFilename):
return
with open(hashtagCategoriesFilename, 'r') as fp:
xmlStr = fp.read()
xml2StrToHashtagCategories(baseDir, xmlStr, 1024, True)
def xml2StrToHashtagCategories(baseDir: str, xmlStr: str,
maxCategoriesFeedItemSizeKb: int,
force=False) -> None:
"""Updates hashtag categories based upon an rss feed
"""
rssItems = xmlStr.split('<item>')
@ -238,7 +254,7 @@ def xml2StrToHashtagCategories(baseDir: str, domain: str, xmlStr: str,
hashtagList = hashtagListStr.split(' ')
if not isBlockedHashtag(baseDir, categoryStr):
for hashtag in hashtagList:
setHashtagCategory(baseDir, hashtag, categoryStr)
setHashtagCategory(baseDir, hashtag, categoryStr, force)
def xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
@ -252,7 +268,7 @@ def xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
return {}
result = {}
if '<title>#categories</title>' in xmlStr:
xml2StrToHashtagCategories(baseDir, domain, xmlStr,
xml2StrToHashtagCategories(baseDir, xmlStr,
maxCategoriesFeedItemSizeKb)
return {}
rssItems = xmlStr.split('<item>')

View File

@ -71,6 +71,7 @@ from delete import sendDeleteViaServer
from inbox import jsonPostAllowsComments
from inbox import validInbox
from inbox import validInboxFilenames
from inbox import guessHashtagCategory
from content import htmlReplaceEmailQuote
from content import htmlReplaceQuoteMarks
from content import dangerousMarkup
@ -2421,8 +2422,22 @@ def testValidNickname():
assert not validNickname(domain, nickname)
def testGuessHashtagCategory() -> None:
print('testGuessHashtagCategory')
hashtagCategories = {
"foo": ["swan", "goose"],
"bar": ["cat", "mouse"]
}
guess = guessHashtagCategory("unspecifiedgoose", hashtagCategories)
assert guess == "foo"
guess = guessHashtagCategory("catpic", hashtagCategories)
assert guess == "bar"
def runAllTests():
print('Running tests...')
testGuessHashtagCategory()
testValidNickname()
testParseFeedDate()
testFirstParagraphFromString()

View File

@ -38,11 +38,16 @@ def getHashtagCategory(baseDir: str, hashtag: str) -> str:
return ''
def getHashtagCategories(baseDir: str, category=None) -> None:
def getHashtagCategories(baseDir: str, recent=False, category=None) -> None:
"""Returns a dictionary containing hashtag categories
"""
hashtagCategories = {}
if recent:
currTime = datetime.datetime.utcnow()
daysSinceEpoch = (currTime - datetime.datetime(1970, 1, 1)).days
recently = daysSinceEpoch - 1
for subdir, dirs, files in os.walk(baseDir + '/tags'):
for f in files:
if not f.endswith('.category'):
@ -62,6 +67,20 @@ def getHashtagCategories(baseDir: str, category=None) -> None:
if categoryStr != category:
continue
if recent:
tagsFilename = baseDir + '/tags/' + hashtag + '.txt'
if not os.path.isfile(tagsFilename):
continue
modTimesinceEpoc = \
os.path.getmtime(tagsFilename)
lastModifiedDate = \
datetime.datetime.fromtimestamp(modTimesinceEpoc)
fileDaysSinceEpoch = \
(lastModifiedDate -
datetime.datetime(1970, 1, 1)).days
if fileDaysSinceEpoch < recently:
continue
if not hashtagCategories.get(categoryStr):
hashtagCategories[categoryStr] = [hashtag]
else:
@ -112,23 +131,29 @@ def validHashtagCategory(category: str) -> bool:
return True
def setHashtagCategory(baseDir: str, hashtag: str, category: str) -> bool:
def setHashtagCategory(baseDir: str, hashtag: str, category: str,
force=False) -> bool:
"""Sets the category for the hashtag
"""
if not validHashtagCategory(category):
return False
hashtagFilename = baseDir + '/tags/' + hashtag + '.txt'
if not os.path.isfile(hashtagFilename):
hashtag = hashtag.title()
if not force:
hashtagFilename = baseDir + '/tags/' + hashtag + '.txt'
if not os.path.isfile(hashtagFilename):
hashtag = hashtag.upper()
hashtag = hashtag.title()
hashtagFilename = baseDir + '/tags/' + hashtag + '.txt'
if not os.path.isfile(hashtagFilename):
return False
hashtag = hashtag.upper()
hashtagFilename = baseDir + '/tags/' + hashtag + '.txt'
if not os.path.isfile(hashtagFilename):
return False
categoryFilename = baseDir + '/tags/' + hashtag + '.category'
if force:
# don't overwrite any existing categories
if os.path.isfile(categoryFilename):
return False
with open(categoryFilename, 'w+') as fp:
fp.write(category)
updateHashtagCategories(baseDir)

View File

@ -279,7 +279,7 @@ def htmlSearchHashtagCategory(cssCache: {}, translate: {},
htmlStr += '<h1><a href="' + actor + '/search"><b>'
htmlStr += translate['Category'] + ': ' + categoryStr + '</b></a></h1>'
hashtagsDict = getHashtagCategories(baseDir, categoryStr)
hashtagsDict = getHashtagCategories(baseDir, True, categoryStr)
if hashtagsDict:
for categoryStr2, hashtagList in hashtagsDict.items():
hashtagList.sort()