In [13]:
from moviepy.editor import VideoFileClip
import pysrt

from omegaconf import OmegaConf
from pathlib import Path
import math

In [11]:

class Frames:
    def __init__(self, video_id):
        self.path_config = OmegaConf.load('../configs/path.yaml')
        self.base_dir_path = Path(self.path_config['datasets']['base_dir'])

        self.video_path, self.subtitle_path = self.get_video_and_subtitle_path(video_id)
        self.video_clip = self.load_video(self.video_path)
        # self.subtitle = self.load_subtitle(self.subtitle_path)

        self.duration = math.floor(self.get_video_info()['duration'])
        self.default_timestamps_list = [i for i in range(self.duration + 1)]

    def get_video_and_subtitle_path(self, video_id):
        video_path = self.base_dir_path / f"{video_id}.mp4"
        subtitle_path = self.base_dir_path / f"{video_id}.srt"
        return video_path, subtitle_path

    def load_video(self, video_path):
        return VideoFileClip(str(video_path))

    def load_subtitle(self, subtitle_path):
        return pysrt.open(subtitle_path)

    def get_video_info(self):
        return {
            'duration': self.video_clip.duration,
            'fps': self.video_clip.fps,
            'resolution': self.video_clip.size
        }

    def get_frame_image_by_time(self, timestamps_list):
        frames_image = []
        for timestamp in self.default_timestamps_list:
            frame = self.video_clip.get_frame(timestamp)
            frames_image.append(frame)
        
        return frames_image

    def get_frame_subtitle_by_time(self, timestamps_list):
        pass

In [14]:
frames = Frames('7183343454993485115')

In [15]:
frames.get_video_info()

{'duration': 7.89, 'fps': 25.0, 'resolution': [576, 1024]}

In [17]:
eg1 = frames.get_frame_image_by_time(None)

In [22]:
for frame in eg1:
    print(frame.shape)

(1024, 576, 3)
(1024, 576, 3)
(1024, 576, 3)
(1024, 576, 3)
(1024, 576, 3)
(1024, 576, 3)
(1024, 576, 3)
(1024, 576, 3)


In [23]:
import pandas as pd

In [37]:
all_data = pd.read_json(r'D:\dcmt\dataset\news_emotion\all_data.json', dtype={'video_id': str})
all_data

Unnamed: 0,video_id,title,duration,emotion,account_name
0,7183343454993485115,新年快乐，皆得所愿！#你好2023,7.89,盼望,人民日报
1,7185030947190967555,种菜、K歌、打太极，看航天员在轨生活的快乐瞬间。,18.37,喜悦,人民日报
2,7185400788041108796,美导弹驱逐舰过航台湾海峡，东部战区：一切动向尽在掌握。,17.13,生气,人民日报
3,7186896342118124855,2022年，全国308名民警、179名辅警因公牺牲。中国人民警察节，致敬英雄！,20.65,难过,人民日报
4,7187017932747148581,中国驻日本大使馆：即日起，暂停审发日本公民赴华普通签证。,7.57,喜悦,人民日报
...,...,...,...,...,...
1359,7318346122936864009,31日晚7点，国家主席习近平将发表二〇二四年新年贺词！央视新闻，全程直播！ #2024新年贺词,17.96,喜悦,央视新闻
1360,7318603668754844968,2024马上要来啦！愿您在新的一年，一针一线织就锦绣前程！当非遗刺绣遇见宋画千里江山，新年新...,26.05,喜悦,央视新闻
1361,7318712480367250739,“我向大家致以新年的祝福！”跨年之夜，这个温暖而坚定的声音，给予我们前行的力量。#2024新年贺词,21.80,喜悦,央视新闻
1362,7318714640379284755,习近平：悠久历史，博大文明，是我们的自信之基、力量之源。#2024新年贺词,13.89,信任,央视新闻
