epicyon/video.py

__filename__ = "video.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.5.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Timeline"

from utils import get_url_from_post
from utils import remove_html
from utils import get_full_domain
from utils import get_nickname_from_actor
from utils import get_domain_from_actor
from utils import remove_id_ending
from utils import get_attributed_to
from utils import get_content_from_post
from utils import dangerous_markup
from utils import license_link_from_name
from utils import get_media_url_from_video
from utils import resembles_url
from blocking import is_blocked
from filters import is_filtered


def convert_video_to_note(base_dir: str, nickname: str, domain: str,
                          system_language: str,
                          post_json_object: {}, blocked_cache: {},
                          languages_understood: []) -> {}:
    """Converts a PeerTube Video ActivityPub(ish) object into
    a Note, so that it can then be displayed in a timeline
    https://docs.joinpeertube.org/api/activitypub#video
    """
    # check that the required fields are present
    required_fields = (
        'type', '@context', 'id', 'published', 'to', 'cc',
        'attributedTo', 'commentsEnabled', 'content', 'sensitive',
        'name', 'url'
    )
    for field_name in required_fields:
        if not post_json_object.get(field_name):
            return None

    if post_json_object['type'] != 'Video':
        return None

    # who is this attributed to ?
    attributed_to = None
    if isinstance(post_json_object['attributedTo'], str):
        attributed_to = get_attributed_to(post_json_object['attributedTo'])
    if not attributed_to:
        return None

    # get the language of the video
    post_language = system_language
    if post_json_object.get('language'):
        if isinstance(post_json_object['language'], dict):
            if post_json_object['language'].get('identifier'):
                post_language = post_json_object['language']['identifier']

    # check that the attributed actor is not blocked
    post_nickname = get_nickname_from_actor(attributed_to)
    if not post_nickname:
        return None
    post_domain, post_domain_port = get_domain_from_actor(attributed_to)
    if not post_domain:
        return None
    post_domain_full = get_full_domain(post_domain, post_domain_port)
    if is_blocked(base_dir, nickname, domain,
                  post_nickname, post_domain_full, blocked_cache):
        return None

    # check that the content is valid
    if is_filtered(base_dir, nickname, domain, post_json_object['name'],
                   system_language):
        return None
    if is_filtered(base_dir, nickname, domain, post_json_object['content'],
                   system_language):
        return None

    # get the content
    content = '<p><b>' + post_json_object['name'] + '</b></p>'
    if post_json_object.get('license'):
        if isinstance(post_json_object['license'], dict):
            if post_json_object['license'].get('name'):
                if is_filtered(base_dir, nickname, domain,
                               post_json_object['license']['name'],
                               system_language):
                    return None
                content += '<p>' + post_json_object['license']['name'] + '</p>'
    content += \
        get_content_from_post(post_json_object, system_language,
                              languages_understood, "content")

    conversation_id = remove_id_ending(post_json_object['id'])

    media_type, media_url, media_torrent, media_magnet = \
        get_media_url_from_video(post_json_object)

    if not media_url:
        return None

    attachment = [{
            'mediaType': media_type,
            'name': post_json_object['content'],
            'type': 'Document',
            'url': media_url
    }]

    if media_torrent or media_magnet:
        content += '<p>'
        if media_torrent:
            content += '<a href="' + media_torrent + '">⇓</a> '
        if media_magnet:
            content += '<a href="' + media_magnet + '">🧲</a>'
        content += '</p>'

    new_post_id2 = remove_html(post_json_object['id'])
    new_post_id = remove_id_ending(new_post_id2)
    new_post = {
        '@context': post_json_object['@context'],
        'id': new_post_id + '/activity',
        'type': 'Create',
        'actor': attributed_to,
        'published': post_json_object['published'],
        'to': post_json_object['to'],
        'cc': post_json_object['cc'],
        'object': {
            'id': new_post_id,
            'conversation': conversation_id,
            'context': conversation_id,
            'type': 'Note',
            'summary': None,
            'inReplyTo': None,
            'published': post_json_object['published'],
            'url': new_post_id,
            'attributedTo': attributed_to,
            'to': post_json_object['to'],
            'cc': post_json_object['cc'],
            'sensitive': post_json_object['sensitive'],
            'atomUri': new_post_id,
            'inReplyToAtomUri': None,
            'commentsEnabled': post_json_object['commentsEnabled'],
            'rejectReplies': not post_json_object['commentsEnabled'],
            'mediaType': 'text/html',
            'content': content,
            'contentMap': {
                post_language: content
            },
            'attachment': attachment,
            'tag': [],
            'replies': {
                'id': new_post_id + '/replies',
                'type': 'Collection',
                'first': {
                    'type': 'CollectionPage',
                    'partOf': new_post_id + '/replies',
                    'items': []
                }
            }
        }
    }

    if post_json_object.get('support'):
        support_str = post_json_object['support']
        if isinstance(support_str, str):
            if not dangerous_markup(support_str, False, []):
                if not is_filtered(base_dir, nickname, domain, support_str,
                                   system_language):
                    new_post['object']['support'] = support_str
                    # if this is a link
                    if resembles_url(support_str):
                        # add a buy link
                        new_post['object']['attachment'].append({
                            'type': 'Link',
                            'mediaType': 'text/html',
                            'href': support_str,
                            'rel': 'support',
                            'name': 'Support'
                        })

    if post_json_object.get('license'):
        if isinstance(post_json_object['license'], dict):
            if post_json_object['license'].get('name'):
                if isinstance(post_json_object['license']['name'], str):
                    license_str = post_json_object['license']['name']
                    content_license_url = \
                        license_link_from_name(license_str)
                    if content_license_url:
                        new_post['object']['attachment'].append({
                            "type": "PropertyValue",
                            "name": "license",
                            "value": content_license_url
                        })

    if post_json_object.get('subtitleLanguage'):
        if isinstance(post_json_object['subtitleLanguage'], list):
            for lang in post_json_object['subtitleLanguage']:
                if not isinstance(lang, dict):
                    continue
                if not lang.get('identifier'):
                    continue
                if not isinstance(lang['identifier'], str):
                    continue
                if not lang.get('url'):
                    continue
                url_str = get_url_from_post(lang['url'])
                if not url_str:
                    continue
                if not url_str.endswith('.vtt'):
                    continue
                for understood in languages_understood:
                    if understood in lang['identifier']:
                        new_post['object']['attachment'].append({
                            "type": "Document",
                            "name": understood,
                            "mediaType": "text/vtt",
                            "url": url_str
                        })
                        break

    return new_post
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`__filename__ = "video.py"`
			`__author__ = "Bob Mottram"`
			`__license__ = "AGPL3+"`
Version 1.5.0 2024-01-21 19:01:20 +00:00			`__version__ = "1.5.0"`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`__maintainer__ = "Bob Mottram"`
			`__email__ = "bob@libreserver.org"`
			`__status__ = "Production"`
			`__module_group__ = "Timeline"`

Handle situations where urls are lists 2023-12-09 14:18:24 +00:00			`from utils import get_url_from_post`
Sanitise links to avoid injection attacks in rendered html 2023-07-12 11:08:02 +00:00			`from utils import remove_html`
Snake case 2021-12-26 12:45:03 +00:00			`from utils import get_full_domain`
Snake case 2021-12-27 22:19:18 +00:00			`from utils import get_nickname_from_actor`
Snake case 2021-12-27 19:05:25 +00:00			`from utils import get_domain_from_actor`
Snake case 2021-12-27 11:20:57 +00:00			`from utils import remove_id_ending`
Function to get the attributedTo string 2023-09-26 20:25:53 +00:00			`from utils import get_attributed_to`
get content when converting video to note 2023-10-29 13:24:52 +00:00			`from utils import get_content_from_post`
Add support field for video 2023-10-29 18:08:35 +00:00			`from utils import dangerous_markup`
Get license when converting video to note 2023-10-29 20:20:55 +00:00			`from utils import license_link_from_name`
Tidying 2023-10-29 22:00:04 +00:00			`from utils import get_media_url_from_video`
Checking for url strings 2024-01-27 17:04:21 +00:00			`from utils import resembles_url`
Moving to snake case 2021-12-29 21:55:09 +00:00			`from blocking import is_blocked`
			`from filters import is_filtered`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00

Moving to snake case 2021-12-29 21:55:09 +00:00			`def convert_video_to_note(base_dir: str, nickname: str, domain: str,`
			`system_language: str,`
get content when converting video to note 2023-10-29 13:24:52 +00:00			`post_json_object: {}, blocked_cache: {},`
			`languages_understood: []) -> {}:`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`"""Converts a PeerTube Video ActivityPub(ish) object into`
			`a Note, so that it can then be displayed in a timeline`
Add support field for video 2023-10-29 18:08:35 +00:00			`https://docs.joinpeertube.org/api/activitypub#video`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`"""`
			`# check that the required fields are present`
Snake case 2022-01-03 19:14:30 +00:00			`required_fields = (`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`'type', '@context', 'id', 'published', 'to', 'cc',`
			`'attributedTo', 'commentsEnabled', 'content', 'sensitive',`
			`'name', 'url'`
			`)`
Snake case 2022-01-03 19:14:30 +00:00			`for field_name in required_fields:`
			`if not post_json_object.get(field_name):`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`return None`

Snake case 2021-12-25 22:09:19 +00:00			`if post_json_object['type'] != 'Video':`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`return None`

			`# who is this attributed to ?`
Snake case 2022-01-03 19:14:30 +00:00			`attributed_to = None`
Snake case 2021-12-25 22:09:19 +00:00			`if isinstance(post_json_object['attributedTo'], str):`
Function to get the attributedTo string 2023-09-26 20:25:53 +00:00			`attributed_to = get_attributed_to(post_json_object['attributedTo'])`
Snake case 2022-01-03 19:14:30 +00:00			`if not attributed_to:`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`return None`

			`# get the language of the video`
Snake case 2022-01-03 19:14:30 +00:00			`post_language = system_language`
Snake case 2021-12-25 22:09:19 +00:00			`if post_json_object.get('language'):`
			`if isinstance(post_json_object['language'], dict):`
			`if post_json_object['language'].get('identifier'):`
Snake case 2022-01-03 19:14:30 +00:00			`post_language = post_json_object['language']['identifier']`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00
			`# check that the attributed actor is not blocked`
Snake case 2022-01-03 19:14:30 +00:00			`post_nickname = get_nickname_from_actor(attributed_to)`
			`if not post_nickname:`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`return None`
Snake case 2022-01-03 19:14:30 +00:00			`post_domain, post_domain_port = get_domain_from_actor(attributed_to)`
			`if not post_domain:`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`return None`
Snake case 2022-01-03 19:14:30 +00:00			`post_domain_full = get_full_domain(post_domain, post_domain_port)`
Moving to snake case 2021-12-29 21:55:09 +00:00			`if is_blocked(base_dir, nickname, domain,`
Snake case 2022-01-03 19:14:30 +00:00			`post_nickname, post_domain_full, blocked_cache):`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`return None`

			`# check that the content is valid`
Check for inverted text 2022-09-25 17:26:11 +00:00			`if is_filtered(base_dir, nickname, domain, post_json_object['name'],`
			`system_language):`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`return None`
Check for inverted text 2022-09-25 17:26:11 +00:00			`if is_filtered(base_dir, nickname, domain, post_json_object['content'],`
			`system_language):`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`return None`

			`# get the content`
Snake case 2021-12-25 22:09:19 +00:00			`content = '<p><b>' + post_json_object['name'] + '</b></p>'`
			`if post_json_object.get('license'):`
			`if isinstance(post_json_object['license'], dict):`
			`if post_json_object['license'].get('name'):`
Moving to snake case 2021-12-29 21:55:09 +00:00			`if is_filtered(base_dir, nickname, domain,`
Check for inverted text 2022-09-25 17:26:11 +00:00			`post_json_object['license']['name'],`
			`system_language):`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`return None`
Snake case 2021-12-25 22:09:19 +00:00			`content += '<p>' + post_json_object['license']['name'] + '</p>'`
Tidying 2023-10-29 14:54:00 +00:00			`content += \`
get content when converting video to note 2023-10-29 13:24:52 +00:00			`get_content_from_post(post_json_object, system_language,`
			`languages_understood, "content")`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00
Snake case 2022-01-03 19:14:30 +00:00			`conversation_id = remove_id_ending(post_json_object['id'])`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00
Tidying 2023-10-29 22:00:04 +00:00			`media_type, media_url, media_torrent, media_magnet = \`
			`get_media_url_from_video(post_json_object)`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00
Snake case 2022-01-03 19:14:30 +00:00			`if not media_url:`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`return None`

			`attachment = [{`
Snake case 2022-01-03 19:14:30 +00:00			`'mediaType': media_type,`
Snake case 2021-12-25 22:09:19 +00:00			`'name': post_json_object['content'],`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`'type': 'Document',`
Snake case 2022-01-03 19:14:30 +00:00			`'url': media_url`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`}]`

Snake case 2022-01-03 19:14:30 +00:00			`if media_torrent or media_magnet:`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`content += '<p>'`
Snake case 2022-01-03 19:14:30 +00:00			`if media_torrent:`
			`content += '<a href="' + media_torrent + '">⇓</a> '`
			`if media_magnet:`
			`content += '<a href="' + media_magnet + '">🧲</a>'`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`content += '</p>'`

Sanitise links to avoid injection attacks in rendered html 2023-07-12 11:08:02 +00:00			`new_post_id2 = remove_html(post_json_object['id'])`
			`new_post_id = remove_id_ending(new_post_id2)`
Snake case 2022-01-03 19:14:30 +00:00			`new_post = {`
Snake case 2021-12-25 22:09:19 +00:00			`'@context': post_json_object['@context'],`
Snake case 2022-01-03 19:14:30 +00:00			`'id': new_post_id + '/activity',`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`'type': 'Create',`
Snake case 2022-01-03 19:14:30 +00:00			`'actor': attributed_to,`
Snake case 2021-12-25 22:09:19 +00:00			`'published': post_json_object['published'],`
			`'to': post_json_object['to'],`
			`'cc': post_json_object['cc'],`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`'object': {`
Snake case 2022-01-03 19:14:30 +00:00			`'id': new_post_id,`
			`'conversation': conversation_id,`
'context' is sometimes used to mean the same thing as 'conversation' This does not appear to be part of the AP spec, but is used in the wild 2023-01-09 11:38:05 +00:00			`'context': conversation_id,`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`'type': 'Note',`
			`'summary': None,`
			`'inReplyTo': None,`
Snake case 2021-12-25 22:09:19 +00:00			`'published': post_json_object['published'],`
Snake case 2022-01-03 19:14:30 +00:00			`'url': new_post_id,`
			`'attributedTo': attributed_to,`
Snake case 2021-12-25 22:09:19 +00:00			`'to': post_json_object['to'],`
			`'cc': post_json_object['cc'],`
			`'sensitive': post_json_object['sensitive'],`
Snake case 2022-01-03 19:14:30 +00:00			`'atomUri': new_post_id,`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`'inReplyToAtomUri': None,`
Snake case 2021-12-25 22:09:19 +00:00			`'commentsEnabled': post_json_object['commentsEnabled'],`
			`'rejectReplies': not post_json_object['commentsEnabled'],`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`'mediaType': 'text/html',`
			`'content': content,`
			`'contentMap': {`
Snake case 2022-01-03 19:14:30 +00:00			`post_language: content`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`},`
			`'attachment': attachment,`
			`'tag': [],`
			`'replies': {`
Snake case 2022-01-03 19:14:30 +00:00			`'id': new_post_id + '/replies',`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`'type': 'Collection',`
			`'first': {`
			`'type': 'CollectionPage',`
Snake case 2022-01-03 19:14:30 +00:00			`'partOf': new_post_id + '/replies',`
Convert announced peertube videos to Note 2021-09-13 11:34:56 +00:00			`'items': []`
			`}`
			`}`
			`}`
			`}`

Add support field for video 2023-10-29 18:08:35 +00:00			`if post_json_object.get('support'):`
Add a buy button for support links on videos 2023-10-29 20:00:43 +00:00			`support_str = post_json_object['support']`
			`if isinstance(support_str, str):`
			`if not dangerous_markup(support_str, False, []):`
			`if not is_filtered(base_dir, nickname, domain, support_str,`
Filter video support text 2023-10-29 19:41:09 +00:00			`system_language):`
Add a buy button for support links on videos 2023-10-29 20:00:43 +00:00			`new_post['object']['support'] = support_str`
			`# if this is a link`
Checking for url strings 2024-01-27 17:04:21 +00:00			`if resembles_url(support_str):`
Add a buy button for support links on videos 2023-10-29 20:00:43 +00:00			`# add a buy link`
			`new_post['object']['attachment'].append({`
			`'type': 'Link',`
mime type 2023-10-29 20:01:40 +00:00			`'mediaType': 'text/html',`
Add a buy button for support links on videos 2023-10-29 20:00:43 +00:00			`'href': support_str,`
			`'rel': 'support',`
			`'name': 'Support'`
			`})`
Add support field for video 2023-10-29 18:08:35 +00:00
Get license when converting video to note 2023-10-29 20:20:55 +00:00			`if post_json_object.get('license'):`
			`if isinstance(post_json_object['license'], dict):`
			`if post_json_object['license'].get('name'):`
			`if isinstance(post_json_object['license']['name'], str):`
			`license_str = post_json_object['license']['name']`
			`content_license_url = \`
			`license_link_from_name(license_str)`
			`if content_license_url:`
			`new_post['object']['attachment'].append({`
			`"type": "PropertyValue",`
			`"name": "license",`
			`"value": content_license_url`
			`})`

Import subtitles from video when converting to note 2023-10-29 20:48:27 +00:00			`if post_json_object.get('subtitleLanguage'):`
			`if isinstance(post_json_object['subtitleLanguage'], list):`
			`for lang in post_json_object['subtitleLanguage']:`
			`if not isinstance(lang, dict):`
			`continue`
			`if not lang.get('identifier'):`
			`continue`
			`if not isinstance(lang['identifier'], str):`
			`continue`
			`if not lang.get('url'):`
			`continue`
Handle situations where urls are lists 2023-12-09 14:18:24 +00:00			`url_str = get_url_from_post(lang['url'])`
			`if not url_str:`
Import subtitles from video when converting to note 2023-10-29 20:48:27 +00:00			`continue`
Handle situations where urls are lists 2023-12-09 14:18:24 +00:00			`if not url_str.endswith('.vtt'):`
Import subtitles from video when converting to note 2023-10-29 20:48:27 +00:00			`continue`
			`for understood in languages_understood:`
			`if understood in lang['identifier']:`
			`new_post['object']['attachment'].append({`
Attachment format 2023-10-29 20:53:26 +00:00			`"type": "Document",`
			`"name": understood,`
Import subtitles from video when converting to note 2023-10-29 20:48:27 +00:00			`"mediaType": "text/vtt",`
Handle situations where urls are lists 2023-12-09 14:18:24 +00:00			`"url": url_str`
Import subtitles from video when converting to note 2023-10-29 20:48:27 +00:00			`})`
			`break`

Snake case 2022-01-03 19:14:30 +00:00			`return new_post`