In [1]:
from moviepy.editor import VideoFileClip
import pysrt

from omegaconf import OmegaConf
from pathlib import Path
import math

In [2]:
class Frames:
    """
    输入视频，
    返回采样的已经匹配的图像和字幕。
    """
    def __init__(self, video_id, path_config_path_str='../configs/path.yaml'):
        # 导入配置。
        self.path_config = OmegaConf.load(path_config_path_str)

        # 加载视频、字幕的路径。
        self.base_dir = Path(self.path_config['datasets']['base_dir'])
        self.base_video_dir = Path(self.path_config['datasets']['base_video_dir'])
        self.base_subtitle_dir = Path(self.path_config['datasets']['base_subtitle_dir'])

        # 加载视频图像和字幕。
        self.video_path, self.subtitle_path = self.get_video_and_subtitle_path(video_id)
        self.video_clip = self.load_video(self.video_path)
        self.subtitle = self.load_subtitle(self.subtitle_path)

        # 进行采样的序列方法。
        self.duration = math.floor(self.get_video_info()['duration'])
        self.default_timestamps_list = [i for i in range(self.duration + 1)]

    def get_video_and_subtitle_path(self, video_id):
        """得到视频和字幕的路径。按照dataset文件结构。"""
        video_path = self.base_video_dir / f"{video_id}.mp4"
        subtitle_path = self.base_subtitle_dir / f"{video_id}.srt"
        return video_path, subtitle_path

    def load_video(self, video_path):
        """使用moviepy处理图像。"""
        return VideoFileClip(str(video_path))

    def load_subtitle(self, subtitle_path):
        """使用pysrt处理字幕。"""
        return pysrt.open(subtitle_path)

    def get_video_info(self):
        """视频基本信息。"""
        return {
            'duration': self.video_clip.duration,
            'fps': self.video_clip.fps,
            'resolution': self.video_clip.size
        }

    def get_frame_image_by_time(self):
        """返回图像序列。类型为原始的numpy.array。"""
        frames_image = []
        for timestamp in self.default_timestamps_list:
            frame = self.video_clip.get_frame(timestamp)
            frames_image.append(frame)

        return frames_image

    def get_frame_subtitle_by_time(self):
        """返回帧的字幕。复杂度可优化。"""
        frames_subtitle = []
        for timestamp in self.default_timestamps_list:
            timestamp = pysrt.SubRipTime(seconds=timestamp)

            found_subtitle = None
            for subtitle in self.subtitle:
                if subtitle.start <= timestamp <= subtitle.end:
                    found_subtitle = subtitle.text
                    break
            frames_subtitle.append(found_subtitle)

        return frames_subtitle

In [3]:
frames = Frames('7183343454993485115')

In [4]:
frames.get_video_info()

{'duration': 7.89, 'fps': 25.0, 'resolution': [576, 1024]}

In [5]:
eg1 = frames.get_frame_image_by_time()

In [6]:
# for frame in eg1:
#     frame
eg1

[array([[[192,  63,  29],
         [191,  62,  28],
         [203,  58,  40],
         ...,
         [191,  59,  26],
         [181,  63,  20],
         [186,  68,  25]],
 
        [[193,  64,  30],
         [190,  61,  27],
         [204,  59,  41],
         ...,
         [191,  59,  26],
         [180,  62,  19],
         [174,  56,  13]],
 
        [[196,  59,  27],
         [199,  62,  30],
         [183,  65,  10],
         ...,
         [189,  61,  18],
         [201,  57,  27],
         [208,  64,  34]],
 
        ...,
 
        [[201,  62,  17],
         [202,  63,  18],
         [181,  68,  24],
         ...,
         [183,  64,  23],
         [188,  60,  35],
         [190,  62,  37]],
 
        [[201,  61,  39],
         [197,  57,  35],
         [206,  60,  32],
         ...,
         [200,  59,  35],
         [197,  57,  31],
         [198,  58,  32]],
 
        [[184,  44,  22],
         [201,  61,  39],
         [205,  59,  31],
         ...,
         [207,  66,  42],
  

In [7]:
eg2 = frames.get_frame_subtitle_by_time()
eg2

[None,
 '人客户端 你好 中国航天\nS',
 '你好\nS',
 '人客户端 你好 人客户端 你好',
 '你好 中国航天 你好 中国航天',
 '你好 中国航天',
 '你好 中国航天',
 '你好 中国航天']