epicyon/git.py

286 lines
9.8 KiB
Python
Raw Normal View History

2020-05-02 10:07:50 +00:00
__filename__ = "git.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
2024-01-21 19:01:20 +00:00
__version__ = "1.5.0"
2020-05-02 10:07:50 +00:00
__maintainer__ = "Bob Mottram"
2021-09-10 16:14:50 +00:00
__email__ = "bob@libreserver.org"
2020-05-02 10:07:50 +00:00
__status__ = "Production"
2021-06-26 11:16:41 +00:00
__module_group__ = "Profile Metadata"
2020-05-02 10:07:50 +00:00
import os
2020-05-02 18:09:54 +00:00
import html
2024-04-24 19:35:04 +00:00
from utils import remove_link_tracking
2021-12-26 12:02:29 +00:00
from utils import acct_dir
2022-04-09 15:11:22 +00:00
from utils import has_object_string_type
2022-06-10 13:01:39 +00:00
from utils import text_in_file
2023-08-27 13:02:19 +00:00
from utils import get_attachment_property_value
from utils import remove_html
from utils import get_attributed_to
from utils import string_contains
2020-05-02 10:07:50 +00:00
2021-12-29 21:55:09 +00:00
def _git_format_content(content: str) -> str:
2020-05-02 17:06:13 +00:00
""" replace html formatting, so that it's more
like the original patch file
"""
2022-01-02 14:59:05 +00:00
patch_str = content.replace('<br>', '\n').replace('<br />', '\n')
patch_str = patch_str.replace('<p>', '').replace('</p>', '\n')
patch_str = html.unescape(patch_str)
if 'From ' in patch_str:
patch_str = 'From ' + patch_str.split('From ', 1)[1]
return patch_str
2020-05-02 17:06:13 +00:00
2021-12-29 21:55:09 +00:00
def _get_git_project_name(base_dir: str, nickname: str, domain: str,
subject: str) -> str:
2020-05-02 11:08:38 +00:00
"""Returns the project name for a git patch
The project name should be contained within the subject line
and should match against a list of projects which the account
holder wants to receive
"""
2022-01-02 14:59:05 +00:00
git_projects_filename = \
2021-12-26 12:02:29 +00:00
acct_dir(base_dir, nickname, domain) + '/gitprojects.txt'
2022-01-02 14:59:05 +00:00
if not os.path.isfile(git_projects_filename):
2020-05-02 11:08:38 +00:00
return None
2022-01-02 14:59:05 +00:00
subject_line_words = subject.lower().split(' ')
for word in subject_line_words:
2022-06-10 13:01:39 +00:00
if text_in_file(word, git_projects_filename):
2020-05-02 11:08:38 +00:00
return word
2020-05-02 16:24:17 +00:00
return None
2020-05-02 11:08:38 +00:00
2021-12-29 21:55:09 +00:00
def is_git_patch(base_dir: str, nickname: str, domain: str,
2022-01-02 14:59:05 +00:00
message_type: str,
2021-12-29 21:55:09 +00:00
subject: str, content: str,
2022-01-02 14:59:05 +00:00
check_project_name: bool = True) -> bool:
2020-05-02 10:07:50 +00:00
"""Is the given post content a git patch?
"""
2022-01-02 14:59:05 +00:00
if message_type not in ('Note', 'Page', 'Patch'):
2020-05-03 10:56:29 +00:00
return False
2020-05-02 10:07:50 +00:00
# must have a subject line
2020-05-02 10:19:24 +00:00
if not subject:
2020-05-02 10:07:50 +00:00
return False
if '[PATCH]' not in content:
return False
if '---' not in content:
return False
if 'diff ' not in content:
return False
if 'From ' not in content:
return False
2020-05-02 10:07:50 +00:00
if 'From:' not in content:
return False
if 'Date:' not in content:
return False
if 'Subject:' not in content:
return False
2020-05-02 14:47:30 +00:00
if '<br>' not in content:
if '<br />' not in content:
return False
2022-01-02 14:59:05 +00:00
if check_project_name:
project_name = \
2021-12-29 21:55:09 +00:00
_get_git_project_name(base_dir, nickname, domain, subject)
2022-01-02 14:59:05 +00:00
if not project_name:
return False
2020-05-02 11:08:38 +00:00
return True
2022-01-02 14:59:05 +00:00
def _get_git_hash(patch_str: str) -> str:
2020-05-03 11:38:09 +00:00
"""Returns the commit hash from a given patch
"""
2022-01-02 14:59:05 +00:00
patch_lines = patch_str.split('\n')
for line in patch_lines:
2020-05-03 11:38:09 +00:00
if line.startswith('From '):
words = line.split(' ')
if len(words) > 1:
if len(words[1]) > 20:
return words[1]
break
return None
2022-01-02 14:59:05 +00:00
def _get_patch_description(patch_str: str) -> str:
2020-05-03 13:50:01 +00:00
"""Returns the description from a given patch
"""
2022-01-02 14:59:05 +00:00
patch_lines = patch_str.split('\n')
2020-05-03 13:50:01 +00:00
description = ''
started = False
2022-01-02 14:59:05 +00:00
for line in patch_lines:
2020-05-03 13:50:01 +00:00
if started:
if line.strip() == '---':
break
description += line + '\n'
if line.startswith('Subject:'):
started = True
return description
2021-12-29 21:55:09 +00:00
def convert_post_to_patch(base_dir: str, nickname: str, domain: str,
post_json_object: {}) -> bool:
"""Detects whether the given post contains a patch
2020-05-03 12:52:13 +00:00
and if so then converts it to a Patch ActivityPub type
"""
2022-04-09 15:11:22 +00:00
if not has_object_string_type(post_json_object, False):
return False
2021-12-25 22:09:19 +00:00
if post_json_object['object']['type'] == 'Patch':
return True
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('summary'):
return False
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('content'):
return False
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('attributedTo'):
return False
if get_attributed_to(post_json_object['object']['attributedTo']) is None:
2020-08-06 16:21:46 +00:00
return False
2021-12-29 21:55:09 +00:00
if not is_git_patch(base_dir, nickname, domain,
post_json_object['object']['type'],
post_json_object['object']['summary'],
post_json_object['object']['content'],
False):
return False
2022-01-02 14:59:05 +00:00
patch_str = _git_format_content(post_json_object['object']['content'])
commit_hash = _get_git_hash(patch_str)
if not commit_hash:
return False
2021-12-25 22:09:19 +00:00
post_json_object['object']['type'] = 'Patch'
# add a commitedBy parameter
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('committedBy'):
post_json_object['object']['committedBy'] = \
get_attributed_to(post_json_object['object']['attributedTo'])
2022-01-02 14:59:05 +00:00
post_json_object['object']['hash'] = commit_hash
2021-12-25 22:09:19 +00:00
post_json_object['object']['description'] = {
"mediaType": "text/plain",
2022-01-02 14:59:05 +00:00
"content": _get_patch_description(patch_str)
}
2020-05-03 12:52:13 +00:00
# remove content map
2021-12-25 22:09:19 +00:00
if post_json_object['object'].get('contentMap'):
del post_json_object['object']['contentMap']
2020-05-03 12:52:13 +00:00
print('Converted post to Patch ActivityPub type')
return True
2022-01-02 14:59:05 +00:00
def _git_add_from_handle(patch_str: str, handle: str) -> str:
2020-05-03 09:48:12 +00:00
"""Adds the activitypub handle of the sender to the patch
"""
2022-01-02 14:59:05 +00:00
from_str = 'AP-signed-off-by: '
if from_str in patch_str:
return patch_str
patch_lines = patch_str.split('\n')
patch_str = ''
for line in patch_lines:
patch_str += line + '\n'
2020-05-03 09:48:12 +00:00
if line.startswith('From:'):
2022-01-02 14:59:05 +00:00
if from_str not in patch_str:
patch_str += from_str + handle + '\n'
return patch_str
2020-05-03 09:48:12 +00:00
2021-12-29 21:55:09 +00:00
def receive_git_patch(base_dir: str, nickname: str, domain: str,
2022-01-02 14:59:05 +00:00
message_type: str, subject: str, content: str,
from_nickname: str, from_domain: str) -> bool:
2020-05-02 11:08:38 +00:00
"""Receive a git patch
"""
2021-12-29 21:55:09 +00:00
if not is_git_patch(base_dir, nickname, domain,
2022-01-02 14:59:05 +00:00
message_type, subject, content):
2020-05-02 11:08:38 +00:00
return False
2020-05-02 16:05:28 +00:00
2022-01-02 14:59:05 +00:00
patch_str = _git_format_content(content)
2020-05-02 16:05:28 +00:00
2022-01-02 14:59:05 +00:00
patch_lines = patch_str.split('\n')
patch_filename = None
project_dir = None
patches_dir = acct_dir(base_dir, nickname, domain) + '/patches'
2020-05-02 10:07:50 +00:00
# get the subject line and turn it into a filename
2022-01-02 14:59:05 +00:00
for line in patch_lines:
2020-05-02 10:07:50 +00:00
if line.startswith('Subject:'):
2022-01-02 14:59:05 +00:00
patch_subject = \
2020-05-02 16:44:46 +00:00
line.replace('Subject:', '').replace('/', '|')
2022-01-02 14:59:05 +00:00
patch_subject = patch_subject.replace('[PATCH]', '').strip()
patch_subject = patch_subject.replace(' ', '_')
project_name = \
2021-12-29 21:55:09 +00:00
_get_git_project_name(base_dir, nickname, domain, subject)
2022-01-02 14:59:05 +00:00
if not os.path.isdir(patches_dir):
os.mkdir(patches_dir)
project_dir = patches_dir + '/' + project_name
if not os.path.isdir(project_dir):
os.mkdir(project_dir)
patch_filename = \
project_dir + '/' + patch_subject + '.patch'
2020-05-02 10:07:50 +00:00
break
2022-01-02 14:59:05 +00:00
if not patch_filename:
2020-05-02 10:07:50 +00:00
return False
2022-01-02 14:59:05 +00:00
patch_str = \
_git_add_from_handle(patch_str,
'@' + from_nickname + '@' + from_domain)
2021-11-25 21:18:53 +00:00
try:
2024-07-14 13:01:46 +00:00
with open(patch_filename, 'w+', encoding='utf-8') as fp_patch:
fp_patch.write(patch_str)
2022-01-02 14:59:05 +00:00
patch_notify_filename = \
2021-12-26 12:02:29 +00:00
acct_dir(base_dir, nickname, domain) + '/.newPatchContent'
2022-06-09 14:46:30 +00:00
with open(patch_notify_filename, 'w+',
2024-07-14 13:01:46 +00:00
encoding='utf-8') as fp_patch_notify:
fp_patch_notify.write(patch_str)
2021-11-25 21:18:53 +00:00
return True
2021-12-25 15:28:52 +00:00
except OSError as ex:
2022-01-02 14:59:05 +00:00
print('EX: receive_git_patch ' + patch_filename + ' ' + str(ex))
2020-05-02 16:44:46 +00:00
return False
2023-08-27 13:02:19 +00:00
def get_repo_url(actor_json: {}) -> str:
"""Returns a link used for code repo
"""
if not actor_json.get('attachment'):
return ''
2023-11-29 10:58:48 +00:00
if not isinstance(actor_json['attachment'], list):
return ''
2024-08-12 20:56:41 +00:00
repo_type = ('github', 'ghub', 'gitlab', 'glab', 'codeberg', 'launchpad',
2023-08-27 13:02:19 +00:00
'sourceforge', 'bitbucket', 'gitea')
for property_value in actor_json['attachment']:
name_value = None
if property_value.get('name'):
name_value = property_value['name']
elif property_value.get('schema:name'):
name_value = property_value['schema:name']
if not name_value:
continue
if name_value.lower() not in repo_type:
continue
if not property_value.get('type'):
continue
prop_value_name, prop_value = \
get_attachment_property_value(property_value)
if not prop_value:
continue
if not property_value['type'].endswith('PropertyValue'):
continue
if '<a href="' in property_value[prop_value_name]:
repo_url = property_value[prop_value_name].split('<a href="')[1]
if '"' in repo_url:
repo_url = repo_url.split('"')[0]
else:
repo_url = property_value[prop_value_name]
if '.' not in repo_url:
continue
2024-04-24 19:35:04 +00:00
repo_url = remove_html(repo_url)
return remove_link_tracking(repo_url)
2024-08-15 21:37:12 +00:00
repo_sites = ('github.com', 'gitlab.com', 'codeberg.org')
for property_value in actor_json['attachment']:
if not property_value.get('type'):
continue
prop_value_name, prop_value = \
get_attachment_property_value(property_value)
if not prop_value:
continue
if not property_value['type'].endswith('PropertyValue'):
continue
repo_url = property_value[prop_value_name]
if not string_contains(repo_url, repo_sites):
continue
repo_url = remove_html(repo_url)
return remove_link_tracking(repo_url)
2023-08-27 13:02:19 +00:00
return ''