From 9d97665c9570729809feec60683e563f5557f05c Mon Sep 17 00:00:00 2001
From: Bob Mottram <bob@libreserver.org>
Date: Sat, 18 Feb 2023 22:53:33 +0000
Subject: [PATCH] Get video transcript

---
 daemon.py | 26 ++++++++++++++++++++++++++
 media.py  | 10 +++++++++-
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/daemon.py b/daemon.py
index 49c08084e..93e25a567 100644
--- a/daemon.py
+++ b/daemon.py
@@ -146,6 +146,7 @@ from media import replace_you_tube
 from media import replace_twitter
 from media import attach_media
 from media import path_is_video
+from media import path_is_transcript
 from media import path_is_audio
 from blocking import import_blocking_file
 from blocking import export_blocking_file
@@ -8931,6 +8932,7 @@ class PubServer(BaseHTTPRequestHandler):
         """
         if is_image_file(path) or \
            path_is_video(path) or \
+           path_is_transcript(path) or \
            path_is_audio(path):
             media_str = path.split('/media/')[1]
             media_filename = base_dir + '/media/' + media_str
@@ -8947,6 +8949,30 @@ class PubServer(BaseHTTPRequestHandler):
                 last_modified_time_str = \
                     last_modified_time.strftime('%a, %d %b %Y %H:%M:%S GMT')
 
+                if media_filename.endswith('.vtt'):
+                    media_transcript = None
+                    try:
+                        with open(media_filename, 'r',
+                                  encoding='utf-8') as fp_vtt:
+                            media_transcript = fp_vtt.read()
+                            media_file_type = 'text/vtt'
+                    except OSError:
+                        print('EX: unable to read media binary ' +
+                              media_filename)
+                    if media_transcript:
+                        self._set_headers_etag(media_filename, media_file_type,
+                                               media_transcript, None,
+                                               None, True,
+                                               last_modified_time_str)
+                        self._write(media_transcript)
+                        fitness_performance(getreq_start_time,
+                                            self.server.fitness,
+                                            '_GET', '_show_media',
+                                            self.server.debug)
+                        return
+                    self._404()
+                    return
+
                 media_binary = None
                 try:
                     with open(media_filename, 'rb') as av_file:
diff --git a/media.py b/media.py
index 0d05fb222..5ab900802 100644
--- a/media.py
+++ b/media.py
@@ -640,7 +640,7 @@ def attach_media(base_dir: str, http_prefix: str,
                 'mediaType': 'text/vtt',
                 'name': system_language,
                 'type': 'Document',
-                'url': http_prefix + '://' + domain + '/' + media_path
+                'url': http_prefix + '://' + domain + '/' + media_path + '.vtt'
              }
             post_json['attachment'].append(video_transcript_json)
 
@@ -697,6 +697,14 @@ def path_is_video(path: str) -> bool:
     return False
 
 
+def path_is_transcript(path: str) -> bool:
+    """Is the given path a video transcript WebVTT file?
+    """
+    if path.endswith('.vtt'):
+        return True
+    return False
+
+
 def path_is_audio(path: str) -> bool:
     """Is the given path an audio file?
     """