mirror of https://gitlab.com/bashrc2/epicyon
Calculate word frequencies for account info
parent
914160ef05
commit
345145927a
|
@ -564,12 +564,14 @@ if args.postDomains:
|
||||||
args.port = 80
|
args.port = 80
|
||||||
elif args.gnunet:
|
elif args.gnunet:
|
||||||
proxyType = 'gnunet'
|
proxyType = 'gnunet'
|
||||||
|
wordFrequency = {}
|
||||||
domainList = []
|
domainList = []
|
||||||
domainList = getPublicPostDomains(None,
|
domainList = getPublicPostDomains(None,
|
||||||
baseDir, nickname, domain,
|
baseDir, nickname, domain,
|
||||||
proxyType, args.port,
|
proxyType, args.port,
|
||||||
httpPrefix, debug,
|
httpPrefix, debug,
|
||||||
__version__, domainList)
|
__version__,
|
||||||
|
wordFrequency, domainList)
|
||||||
for postDomain in domainList:
|
for postDomain in domainList:
|
||||||
print(postDomain)
|
print(postDomain)
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
@ -602,12 +604,14 @@ if args.postDomainsBlocked:
|
||||||
args.port = 80
|
args.port = 80
|
||||||
elif args.gnunet:
|
elif args.gnunet:
|
||||||
proxyType = 'gnunet'
|
proxyType = 'gnunet'
|
||||||
|
wordFrequency = {}
|
||||||
domainList = []
|
domainList = []
|
||||||
domainList = getPublicPostDomainsBlocked(None,
|
domainList = getPublicPostDomainsBlocked(None,
|
||||||
baseDir, nickname, domain,
|
baseDir, nickname, domain,
|
||||||
proxyType, args.port,
|
proxyType, args.port,
|
||||||
httpPrefix, debug,
|
httpPrefix, debug,
|
||||||
__version__, domainList)
|
__version__,
|
||||||
|
wordFrequency, domainList)
|
||||||
for postDomain in domainList:
|
for postDomain in domainList:
|
||||||
print(postDomain)
|
print(postDomain)
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
52
posts.py
52
posts.py
|
@ -469,6 +469,27 @@ def _getPosts(session, outboxUrl: str, maxPosts: int,
|
||||||
return personPosts
|
return personPosts
|
||||||
|
|
||||||
|
|
||||||
|
def _updateWordFrequency(content: str, wordFrequency: {}) -> None:
|
||||||
|
"""Creates a dictionary containing words and the number of times
|
||||||
|
that they appear
|
||||||
|
"""
|
||||||
|
plainText = removeHtml(content)
|
||||||
|
plainText = plainText.replace('.', ' ')
|
||||||
|
plainText = plainText.replace(';', ' ')
|
||||||
|
wordsList = plainText.split(' ')
|
||||||
|
for word in wordsList:
|
||||||
|
wordLen = len(word)
|
||||||
|
if wordLen < 3:
|
||||||
|
continue
|
||||||
|
if wordLen < 4:
|
||||||
|
if word.upper() != word:
|
||||||
|
continue
|
||||||
|
if wordFrequency.get(word):
|
||||||
|
wordFrequency[word] += 1
|
||||||
|
else:
|
||||||
|
wordFrequency[word] = 1
|
||||||
|
|
||||||
|
|
||||||
def getPostDomains(session, outboxUrl: str, maxPosts: int,
|
def getPostDomains(session, outboxUrl: str, maxPosts: int,
|
||||||
maxMentions: int,
|
maxMentions: int,
|
||||||
maxEmoji: int, maxAttachments: int,
|
maxEmoji: int, maxAttachments: int,
|
||||||
|
@ -476,7 +497,9 @@ def getPostDomains(session, outboxUrl: str, maxPosts: int,
|
||||||
personCache: {},
|
personCache: {},
|
||||||
debug: bool,
|
debug: bool,
|
||||||
projectVersion: str, httpPrefix: str,
|
projectVersion: str, httpPrefix: str,
|
||||||
domain: str, domainList=[]) -> []:
|
domain: str,
|
||||||
|
wordFrequency: {},
|
||||||
|
domainList=[]) -> []:
|
||||||
"""Returns a list of domains referenced within public posts
|
"""Returns a list of domains referenced within public posts
|
||||||
"""
|
"""
|
||||||
if not outboxUrl:
|
if not outboxUrl:
|
||||||
|
@ -503,6 +526,9 @@ def getPostDomains(session, outboxUrl: str, maxPosts: int,
|
||||||
continue
|
continue
|
||||||
if not isinstance(item['object'], dict):
|
if not isinstance(item['object'], dict):
|
||||||
continue
|
continue
|
||||||
|
if item['object'].get('content'):
|
||||||
|
_updateWordFrequency(item['object']['content'],
|
||||||
|
wordFrequency)
|
||||||
if item['object'].get('inReplyTo'):
|
if item['object'].get('inReplyTo'):
|
||||||
if isinstance(item['object']['inReplyTo'], str):
|
if isinstance(item['object']['inReplyTo'], str):
|
||||||
postDomain, postPort = \
|
postDomain, postPort = \
|
||||||
|
@ -3334,7 +3360,7 @@ def getPublicPostsOfPerson(baseDir: str, nickname: str, domain: str,
|
||||||
def getPublicPostDomains(session, baseDir: str, nickname: str, domain: str,
|
def getPublicPostDomains(session, baseDir: str, nickname: str, domain: str,
|
||||||
proxyType: str, port: int, httpPrefix: str,
|
proxyType: str, port: int, httpPrefix: str,
|
||||||
debug: bool, projectVersion: str,
|
debug: bool, projectVersion: str,
|
||||||
domainList=[]) -> []:
|
wordFrequency: {}, domainList=[]) -> []:
|
||||||
""" Returns a list of domains referenced within public posts
|
""" Returns a list of domains referenced within public posts
|
||||||
"""
|
"""
|
||||||
if not session:
|
if not session:
|
||||||
|
@ -3371,7 +3397,8 @@ def getPublicPostDomains(session, baseDir: str, nickname: str, domain: str,
|
||||||
getPostDomains(session, personUrl, 64, maxMentions, maxEmoji,
|
getPostDomains(session, personUrl, 64, maxMentions, maxEmoji,
|
||||||
maxAttachments, federationList,
|
maxAttachments, federationList,
|
||||||
personCache, debug,
|
personCache, debug,
|
||||||
projectVersion, httpPrefix, domain, domainList)
|
projectVersion, httpPrefix, domain,
|
||||||
|
wordFrequency, domainList)
|
||||||
postDomains.sort()
|
postDomains.sort()
|
||||||
return postDomains
|
return postDomains
|
||||||
|
|
||||||
|
@ -3412,7 +3439,8 @@ def downloadFollowCollection(followType: str,
|
||||||
|
|
||||||
def getPublicPostInfo(session, baseDir: str, nickname: str, domain: str,
|
def getPublicPostInfo(session, baseDir: str, nickname: str, domain: str,
|
||||||
proxyType: str, port: int, httpPrefix: str,
|
proxyType: str, port: int, httpPrefix: str,
|
||||||
debug: bool, projectVersion: str) -> []:
|
debug: bool, projectVersion: str,
|
||||||
|
wordFrequency: {}) -> []:
|
||||||
""" Returns a dict of domains referenced within public posts
|
""" Returns a dict of domains referenced within public posts
|
||||||
"""
|
"""
|
||||||
if not session:
|
if not session:
|
||||||
|
@ -3450,7 +3478,8 @@ def getPublicPostInfo(session, baseDir: str, nickname: str, domain: str,
|
||||||
getPostDomains(session, personUrl, maxPosts, maxMentions, maxEmoji,
|
getPostDomains(session, personUrl, maxPosts, maxMentions, maxEmoji,
|
||||||
maxAttachments, federationList,
|
maxAttachments, federationList,
|
||||||
personCache, debug,
|
personCache, debug,
|
||||||
projectVersion, httpPrefix, domain, [])
|
projectVersion, httpPrefix, domain,
|
||||||
|
wordFrequency, [])
|
||||||
postDomains.sort()
|
postDomains.sort()
|
||||||
domainsInfo = {}
|
domainsInfo = {}
|
||||||
for d in postDomains:
|
for d in postDomains:
|
||||||
|
@ -3476,7 +3505,7 @@ def getPublicPostDomainsBlocked(session, baseDir: str,
|
||||||
nickname: str, domain: str,
|
nickname: str, domain: str,
|
||||||
proxyType: str, port: int, httpPrefix: str,
|
proxyType: str, port: int, httpPrefix: str,
|
||||||
debug: bool, projectVersion: str,
|
debug: bool, projectVersion: str,
|
||||||
domainList=[]) -> []:
|
wordFrequency: {}, domainList=[]) -> []:
|
||||||
""" Returns a list of domains referenced within public posts which
|
""" Returns a list of domains referenced within public posts which
|
||||||
are globally blocked on this instance
|
are globally blocked on this instance
|
||||||
"""
|
"""
|
||||||
|
@ -3484,7 +3513,7 @@ def getPublicPostDomainsBlocked(session, baseDir: str,
|
||||||
getPublicPostDomains(session, baseDir, nickname, domain,
|
getPublicPostDomains(session, baseDir, nickname, domain,
|
||||||
proxyType, port, httpPrefix,
|
proxyType, port, httpPrefix,
|
||||||
debug, projectVersion,
|
debug, projectVersion,
|
||||||
domainList)
|
wordFrequency, domainList)
|
||||||
if not postDomains:
|
if not postDomains:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
@ -3532,9 +3561,10 @@ def checkDomains(session, baseDir: str,
|
||||||
nickname: str, domain: str,
|
nickname: str, domain: str,
|
||||||
proxyType: str, port: int, httpPrefix: str,
|
proxyType: str, port: int, httpPrefix: str,
|
||||||
debug: bool, projectVersion: str,
|
debug: bool, projectVersion: str,
|
||||||
maxBlockedDomains: int, singleCheck: bool):
|
maxBlockedDomains: int, singleCheck: bool) -> None:
|
||||||
"""Checks follower accounts for references to globally blocked domains
|
"""Checks follower accounts for references to globally blocked domains
|
||||||
"""
|
"""
|
||||||
|
wordFrequency = {}
|
||||||
nonMutuals = _getNonMutualsOfPerson(baseDir, nickname, domain)
|
nonMutuals = _getNonMutualsOfPerson(baseDir, nickname, domain)
|
||||||
if not nonMutuals:
|
if not nonMutuals:
|
||||||
print('No non-mutual followers were found')
|
print('No non-mutual followers were found')
|
||||||
|
@ -3558,7 +3588,8 @@ def checkDomains(session, baseDir: str,
|
||||||
nonMutualNickname,
|
nonMutualNickname,
|
||||||
nonMutualDomain,
|
nonMutualDomain,
|
||||||
proxyType, port, httpPrefix,
|
proxyType, port, httpPrefix,
|
||||||
debug, projectVersion, [])
|
debug, projectVersion,
|
||||||
|
wordFrequency, [])
|
||||||
if blockedDomains:
|
if blockedDomains:
|
||||||
if len(blockedDomains) > maxBlockedDomains:
|
if len(blockedDomains) > maxBlockedDomains:
|
||||||
followerWarningStr += handle + '\n'
|
followerWarningStr += handle + '\n'
|
||||||
|
@ -3577,7 +3608,8 @@ def checkDomains(session, baseDir: str,
|
||||||
nonMutualNickname,
|
nonMutualNickname,
|
||||||
nonMutualDomain,
|
nonMutualDomain,
|
||||||
proxyType, port, httpPrefix,
|
proxyType, port, httpPrefix,
|
||||||
debug, projectVersion, [])
|
debug, projectVersion,
|
||||||
|
wordFrequency, [])
|
||||||
if blockedDomains:
|
if blockedDomains:
|
||||||
print(handle)
|
print(handle)
|
||||||
for d in blockedDomains:
|
for d in blockedDomains:
|
||||||
|
|
|
@ -67,11 +67,13 @@ def instancesGraph(baseDir: str, handles: str,
|
||||||
projectVersion, httpPrefix,
|
projectVersion, httpPrefix,
|
||||||
nickname, domain, 'outbox',
|
nickname, domain, 'outbox',
|
||||||
27261)
|
27261)
|
||||||
|
wordFrequency = {}
|
||||||
postDomains = \
|
postDomains = \
|
||||||
getPostDomains(session, personUrl, 64, maxMentions, maxEmoji,
|
getPostDomains(session, personUrl, 64, maxMentions, maxEmoji,
|
||||||
maxAttachments, federationList,
|
maxAttachments, federationList,
|
||||||
personCache, debug,
|
personCache, debug,
|
||||||
projectVersion, httpPrefix, domain, [])
|
projectVersion, httpPrefix, domain,
|
||||||
|
wordFrequency, [])
|
||||||
postDomains.sort()
|
postDomains.sort()
|
||||||
for fedDomain in postDomains:
|
for fedDomain in postDomains:
|
||||||
dotLineStr = ' "' + domain + '" -> "' + fedDomain + '";\n'
|
dotLineStr = ' "' + domain + '" -> "' + fedDomain + '";\n'
|
||||||
|
|
|
@ -97,11 +97,12 @@ def htmlAccountInfo(cssCache: {}, translate: {},
|
||||||
|
|
||||||
session = createSession(proxyType)
|
session = createSession(proxyType)
|
||||||
|
|
||||||
|
wordFrequency = {}
|
||||||
domainDict = getPublicPostInfo(session,
|
domainDict = getPublicPostInfo(session,
|
||||||
baseDir, searchNickname, searchDomain,
|
baseDir, searchNickname, searchDomain,
|
||||||
proxyType, searchPort,
|
proxyType, searchPort,
|
||||||
httpPrefix, debug,
|
httpPrefix, debug,
|
||||||
__version__)
|
__version__, wordFrequency)
|
||||||
|
|
||||||
# get a list of any blocked followers
|
# get a list of any blocked followers
|
||||||
followersList = \
|
followersList = \
|
||||||
|
|
Loading…
Reference in New Issue