mirror of https://gitlab.com/bashrc2/epicyon
				
				
				
			Extract hashtags from newswire feeds
							parent
							
								
									a5d21852bb
								
							
						
					
					
						commit
						af3b7baf12
					
				| 
						 | 
				
			
			@ -374,7 +374,7 @@ def addHashTags(wordStr: str, httpPrefix: str, domain: str,
 | 
			
		|||
    hashtagUrl = httpPrefix + "://" + domain + "/tags/" + hashtag
 | 
			
		||||
    postHashtags[hashtag] = {
 | 
			
		||||
        'href': hashtagUrl,
 | 
			
		||||
        'name': '#'+hashtag,
 | 
			
		||||
        'name': '#' + hashtag,
 | 
			
		||||
        'type': 'Hashtag'
 | 
			
		||||
    }
 | 
			
		||||
    replaceHashTags[wordStr] = "<a href=\"" + hashtagUrl + \
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										37
									
								
								newswire.py
								
								
								
								
							
							
						
						
									
										37
									
								
								newswire.py
								
								
								
								
							| 
						 | 
				
			
			@ -75,13 +75,15 @@ def getNewswireTags(text: str) -> []:
 | 
			
		|||
def addNewswireDictEntry(newswire: {}, dateStr: str,
 | 
			
		||||
                         title: str, link: str,
 | 
			
		||||
                         votesStatus: str, postFilename: str,
 | 
			
		||||
                         description: str, moderated: bool) -> None:
 | 
			
		||||
                         description: str, moderated: bool,
 | 
			
		||||
                         tags=[]) -> None:
 | 
			
		||||
    """Update the newswire dictionary
 | 
			
		||||
    """
 | 
			
		||||
    if not tags:
 | 
			
		||||
        tags = getNewswireTags(title + ' ' + description)
 | 
			
		||||
    newswire[dateStr] = [title, link,
 | 
			
		||||
                         votesStatus, postFilename,
 | 
			
		||||
                         description, moderated,
 | 
			
		||||
                         getNewswireTags(title + ' ' + description)]
 | 
			
		||||
                         description, moderated, tags]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool,
 | 
			
		||||
| 
						 | 
				
			
			@ -340,6 +342,32 @@ def isaBlogPost(postJsonObject: {}) -> bool:
 | 
			
		|||
    return False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def getHashtagsFromPost(postJsonObject: {}) -> []:
 | 
			
		||||
    """Returns a list of any hashtags within a post
 | 
			
		||||
    """
 | 
			
		||||
    if not postJsonObject.get('object'):
 | 
			
		||||
        return []
 | 
			
		||||
    if not isinstance(postJsonObject['object'], dict):
 | 
			
		||||
        return []
 | 
			
		||||
    if not postJsonObject['object'].get('tag'):
 | 
			
		||||
        return []
 | 
			
		||||
    if not isinstance(postJsonObject['object']['tag'], dict):
 | 
			
		||||
        return []
 | 
			
		||||
    tags = []
 | 
			
		||||
    for tg in postJsonObject['object']['tag'].items():
 | 
			
		||||
        if not isinstance(tg, dict):
 | 
			
		||||
            continue
 | 
			
		||||
        if not tg.get('name'):
 | 
			
		||||
            continue
 | 
			
		||||
        if not tg.get('type'):
 | 
			
		||||
            continue
 | 
			
		||||
        if tg['type'] != 'Hashtag':
 | 
			
		||||
            continue
 | 
			
		||||
        if tg['name'] not in tags:
 | 
			
		||||
            tags.append(tg['name'])
 | 
			
		||||
    return tags
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
 | 
			
		||||
                              newswire: {},
 | 
			
		||||
                              maxBlogsPerAccount: int,
 | 
			
		||||
| 
						 | 
				
			
			@ -401,7 +429,8 @@ def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
 | 
			
		|||
                                         postJsonObject['object']['summary'],
 | 
			
		||||
                                         postJsonObject['object']['url'],
 | 
			
		||||
                                         votes, fullPostFilename,
 | 
			
		||||
                                         description, moderated)
 | 
			
		||||
                                         description, moderated,
 | 
			
		||||
                                         getHashtagsFromPost(postJsonObject))
 | 
			
		||||
 | 
			
		||||
            ctr += 1
 | 
			
		||||
            if ctr >= maxBlogsPerAccount:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue