mirror of https://gitlab.com/bashrc2/epicyon
				
				
				
			Calculate word frequencies for account info
							parent
							
								
									914160ef05
								
							
						
					
					
						commit
						345145927a
					
				| 
						 | 
				
			
			@ -564,12 +564,14 @@ if args.postDomains:
 | 
			
		|||
            args.port = 80
 | 
			
		||||
    elif args.gnunet:
 | 
			
		||||
        proxyType = 'gnunet'
 | 
			
		||||
    wordFrequency = {}
 | 
			
		||||
    domainList = []
 | 
			
		||||
    domainList = getPublicPostDomains(None,
 | 
			
		||||
                                      baseDir, nickname, domain,
 | 
			
		||||
                                      proxyType, args.port,
 | 
			
		||||
                                      httpPrefix, debug,
 | 
			
		||||
                                      __version__, domainList)
 | 
			
		||||
                                      __version__,
 | 
			
		||||
                                      wordFrequency, domainList)
 | 
			
		||||
    for postDomain in domainList:
 | 
			
		||||
        print(postDomain)
 | 
			
		||||
    sys.exit()
 | 
			
		||||
| 
						 | 
				
			
			@ -602,12 +604,14 @@ if args.postDomainsBlocked:
 | 
			
		|||
            args.port = 80
 | 
			
		||||
    elif args.gnunet:
 | 
			
		||||
        proxyType = 'gnunet'
 | 
			
		||||
    wordFrequency = {}
 | 
			
		||||
    domainList = []
 | 
			
		||||
    domainList = getPublicPostDomainsBlocked(None,
 | 
			
		||||
                                             baseDir, nickname, domain,
 | 
			
		||||
                                             proxyType, args.port,
 | 
			
		||||
                                             httpPrefix, debug,
 | 
			
		||||
                                             __version__, domainList)
 | 
			
		||||
                                             __version__,
 | 
			
		||||
                                             wordFrequency, domainList)
 | 
			
		||||
    for postDomain in domainList:
 | 
			
		||||
        print(postDomain)
 | 
			
		||||
    sys.exit()
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										52
									
								
								posts.py
								
								
								
								
							
							
						
						
									
										52
									
								
								posts.py
								
								
								
								
							| 
						 | 
				
			
			@ -469,6 +469,27 @@ def _getPosts(session, outboxUrl: str, maxPosts: int,
 | 
			
		|||
    return personPosts
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _updateWordFrequency(content: str, wordFrequency: {}) -> None:
 | 
			
		||||
    """Creates a dictionary containing words and the number of times
 | 
			
		||||
    that they appear
 | 
			
		||||
    """
 | 
			
		||||
    plainText = removeHtml(content)
 | 
			
		||||
    plainText = plainText.replace('.', ' ')
 | 
			
		||||
    plainText = plainText.replace(';', ' ')
 | 
			
		||||
    wordsList = plainText.split(' ')
 | 
			
		||||
    for word in wordsList:
 | 
			
		||||
        wordLen = len(word)
 | 
			
		||||
        if wordLen < 3:
 | 
			
		||||
            continue
 | 
			
		||||
        if wordLen < 4:
 | 
			
		||||
            if word.upper() != word:
 | 
			
		||||
                continue
 | 
			
		||||
        if wordFrequency.get(word):
 | 
			
		||||
            wordFrequency[word] += 1
 | 
			
		||||
        else:
 | 
			
		||||
            wordFrequency[word] = 1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def getPostDomains(session, outboxUrl: str, maxPosts: int,
 | 
			
		||||
                   maxMentions: int,
 | 
			
		||||
                   maxEmoji: int, maxAttachments: int,
 | 
			
		||||
| 
						 | 
				
			
			@ -476,7 +497,9 @@ def getPostDomains(session, outboxUrl: str, maxPosts: int,
 | 
			
		|||
                   personCache: {},
 | 
			
		||||
                   debug: bool,
 | 
			
		||||
                   projectVersion: str, httpPrefix: str,
 | 
			
		||||
                   domain: str, domainList=[]) -> []:
 | 
			
		||||
                   domain: str,
 | 
			
		||||
                   wordFrequency: {},
 | 
			
		||||
                   domainList=[]) -> []:
 | 
			
		||||
    """Returns a list of domains referenced within public posts
 | 
			
		||||
    """
 | 
			
		||||
    if not outboxUrl:
 | 
			
		||||
| 
						 | 
				
			
			@ -503,6 +526,9 @@ def getPostDomains(session, outboxUrl: str, maxPosts: int,
 | 
			
		|||
            continue
 | 
			
		||||
        if not isinstance(item['object'], dict):
 | 
			
		||||
            continue
 | 
			
		||||
        if item['object'].get('content'):
 | 
			
		||||
            _updateWordFrequency(item['object']['content'],
 | 
			
		||||
                                 wordFrequency)
 | 
			
		||||
        if item['object'].get('inReplyTo'):
 | 
			
		||||
            if isinstance(item['object']['inReplyTo'], str):
 | 
			
		||||
                postDomain, postPort = \
 | 
			
		||||
| 
						 | 
				
			
			@ -3334,7 +3360,7 @@ def getPublicPostsOfPerson(baseDir: str, nickname: str, domain: str,
 | 
			
		|||
def getPublicPostDomains(session, baseDir: str, nickname: str, domain: str,
 | 
			
		||||
                         proxyType: str, port: int, httpPrefix: str,
 | 
			
		||||
                         debug: bool, projectVersion: str,
 | 
			
		||||
                         domainList=[]) -> []:
 | 
			
		||||
                         wordFrequency: {}, domainList=[]) -> []:
 | 
			
		||||
    """ Returns a list of domains referenced within public posts
 | 
			
		||||
    """
 | 
			
		||||
    if not session:
 | 
			
		||||
| 
						 | 
				
			
			@ -3371,7 +3397,8 @@ def getPublicPostDomains(session, baseDir: str, nickname: str, domain: str,
 | 
			
		|||
        getPostDomains(session, personUrl, 64, maxMentions, maxEmoji,
 | 
			
		||||
                       maxAttachments, federationList,
 | 
			
		||||
                       personCache, debug,
 | 
			
		||||
                       projectVersion, httpPrefix, domain, domainList)
 | 
			
		||||
                       projectVersion, httpPrefix, domain,
 | 
			
		||||
                       wordFrequency, domainList)
 | 
			
		||||
    postDomains.sort()
 | 
			
		||||
    return postDomains
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -3412,7 +3439,8 @@ def downloadFollowCollection(followType: str,
 | 
			
		|||
 | 
			
		||||
def getPublicPostInfo(session, baseDir: str, nickname: str, domain: str,
 | 
			
		||||
                      proxyType: str, port: int, httpPrefix: str,
 | 
			
		||||
                      debug: bool, projectVersion: str) -> []:
 | 
			
		||||
                      debug: bool, projectVersion: str,
 | 
			
		||||
                      wordFrequency: {}) -> []:
 | 
			
		||||
    """ Returns a dict of domains referenced within public posts
 | 
			
		||||
    """
 | 
			
		||||
    if not session:
 | 
			
		||||
| 
						 | 
				
			
			@ -3450,7 +3478,8 @@ def getPublicPostInfo(session, baseDir: str, nickname: str, domain: str,
 | 
			
		|||
        getPostDomains(session, personUrl, maxPosts, maxMentions, maxEmoji,
 | 
			
		||||
                       maxAttachments, federationList,
 | 
			
		||||
                       personCache, debug,
 | 
			
		||||
                       projectVersion, httpPrefix, domain, [])
 | 
			
		||||
                       projectVersion, httpPrefix, domain,
 | 
			
		||||
                       wordFrequency, [])
 | 
			
		||||
    postDomains.sort()
 | 
			
		||||
    domainsInfo = {}
 | 
			
		||||
    for d in postDomains:
 | 
			
		||||
| 
						 | 
				
			
			@ -3476,7 +3505,7 @@ def getPublicPostDomainsBlocked(session, baseDir: str,
 | 
			
		|||
                                nickname: str, domain: str,
 | 
			
		||||
                                proxyType: str, port: int, httpPrefix: str,
 | 
			
		||||
                                debug: bool, projectVersion: str,
 | 
			
		||||
                                domainList=[]) -> []:
 | 
			
		||||
                                wordFrequency: {}, domainList=[]) -> []:
 | 
			
		||||
    """ Returns a list of domains referenced within public posts which
 | 
			
		||||
    are globally blocked on this instance
 | 
			
		||||
    """
 | 
			
		||||
| 
						 | 
				
			
			@ -3484,7 +3513,7 @@ def getPublicPostDomainsBlocked(session, baseDir: str,
 | 
			
		|||
        getPublicPostDomains(session, baseDir, nickname, domain,
 | 
			
		||||
                             proxyType, port, httpPrefix,
 | 
			
		||||
                             debug, projectVersion,
 | 
			
		||||
                             domainList)
 | 
			
		||||
                             wordFrequency, domainList)
 | 
			
		||||
    if not postDomains:
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -3532,9 +3561,10 @@ def checkDomains(session, baseDir: str,
 | 
			
		|||
                 nickname: str, domain: str,
 | 
			
		||||
                 proxyType: str, port: int, httpPrefix: str,
 | 
			
		||||
                 debug: bool, projectVersion: str,
 | 
			
		||||
                 maxBlockedDomains: int, singleCheck: bool):
 | 
			
		||||
                 maxBlockedDomains: int, singleCheck: bool) -> None:
 | 
			
		||||
    """Checks follower accounts for references to globally blocked domains
 | 
			
		||||
    """
 | 
			
		||||
    wordFrequency = {}
 | 
			
		||||
    nonMutuals = _getNonMutualsOfPerson(baseDir, nickname, domain)
 | 
			
		||||
    if not nonMutuals:
 | 
			
		||||
        print('No non-mutual followers were found')
 | 
			
		||||
| 
						 | 
				
			
			@ -3558,7 +3588,8 @@ def checkDomains(session, baseDir: str,
 | 
			
		|||
                                            nonMutualNickname,
 | 
			
		||||
                                            nonMutualDomain,
 | 
			
		||||
                                            proxyType, port, httpPrefix,
 | 
			
		||||
                                            debug, projectVersion, [])
 | 
			
		||||
                                            debug, projectVersion,
 | 
			
		||||
                                            wordFrequency, [])
 | 
			
		||||
            if blockedDomains:
 | 
			
		||||
                if len(blockedDomains) > maxBlockedDomains:
 | 
			
		||||
                    followerWarningStr += handle + '\n'
 | 
			
		||||
| 
						 | 
				
			
			@ -3577,7 +3608,8 @@ def checkDomains(session, baseDir: str,
 | 
			
		|||
                                            nonMutualNickname,
 | 
			
		||||
                                            nonMutualDomain,
 | 
			
		||||
                                            proxyType, port, httpPrefix,
 | 
			
		||||
                                            debug, projectVersion, [])
 | 
			
		||||
                                            debug, projectVersion,
 | 
			
		||||
                                            wordFrequency, [])
 | 
			
		||||
            if blockedDomains:
 | 
			
		||||
                print(handle)
 | 
			
		||||
                for d in blockedDomains:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -67,11 +67,13 @@ def instancesGraph(baseDir: str, handles: str,
 | 
			
		|||
                                                projectVersion, httpPrefix,
 | 
			
		||||
                                                nickname, domain, 'outbox',
 | 
			
		||||
                                                27261)
 | 
			
		||||
        wordFrequency = {}
 | 
			
		||||
        postDomains = \
 | 
			
		||||
            getPostDomains(session, personUrl, 64, maxMentions, maxEmoji,
 | 
			
		||||
                           maxAttachments, federationList,
 | 
			
		||||
                           personCache, debug,
 | 
			
		||||
                           projectVersion, httpPrefix, domain, [])
 | 
			
		||||
                           projectVersion, httpPrefix, domain,
 | 
			
		||||
                           wordFrequency, [])
 | 
			
		||||
        postDomains.sort()
 | 
			
		||||
        for fedDomain in postDomains:
 | 
			
		||||
            dotLineStr = '    "' + domain + '" -> "' + fedDomain + '";\n'
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -97,11 +97,12 @@ def htmlAccountInfo(cssCache: {}, translate: {},
 | 
			
		|||
 | 
			
		||||
    session = createSession(proxyType)
 | 
			
		||||
 | 
			
		||||
    wordFrequency = {}
 | 
			
		||||
    domainDict = getPublicPostInfo(session,
 | 
			
		||||
                                   baseDir, searchNickname, searchDomain,
 | 
			
		||||
                                   proxyType, searchPort,
 | 
			
		||||
                                   httpPrefix, debug,
 | 
			
		||||
                                   __version__)
 | 
			
		||||
                                   __version__, wordFrequency)
 | 
			
		||||
 | 
			
		||||
    # get a list of any blocked followers
 | 
			
		||||
    followersList = \
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue