In [5]:
import pandas as pd
from pathlib import Path
from omegaconf import OmegaConf, DictConfig

In [2]:
from moviepy.editor import VideoFileClip
import pysrt
from datetime import timedelta

In [3]:
def get_video_info(video_file):
    """获取视频时长和帧数"""
    clip = VideoFileClip(video_file)
    duration = clip.duration  # 视频时长（秒）
    fps = clip.fps  # 帧率
    resolution = clip.size
    return duration, fps, resolution

In [6]:
demo_data_path = OmegaConf.load('../configs/demo_data_path.yaml')

video_path_str = demo_data_path['video_path_str']
audio_path_str = demo_data_path['audio_path_str']
subtitle_path_str = demo_data_path['subtitle_path_str']

video_eg_path_str = demo_data_path['video_eg_path_str']
subtitle_eg_path_str = demo_data_path['subtitle_eg_path_str']
audio_eg_path_str = demo_data_path['audio_eg_path_str']

In [5]:
subtitle_eg_path_str

In [16]:
def print_fps(video_path_str):
    video_path = Path(video_path_str)
    for file in video_path.glob("*.mp4"):
        print(file.name)
        print(get_video_info(str(file)))

print_fps(video_path_str)

In [21]:
subs = pysrt.open(subtitle_eg_path_str)

In [None]:
for sub in subs:
    print(sub)

In [25]:
for sub in subs:
    print(sub)
    print(sub.start)
    print(sub.end)
    print(sub.text)
    break

In [31]:
def get_subtitle_at_time(srt_file, target_time):
    subtitles = pysrt.open(srt_file)

    for subtitle in subtitles:
        # 检查字幕是否在指定时间点内
        if subtitle.start <= target_time <= subtitle.end:
            return subtitle.text
    return None  # 如果没有找到对应的字幕

In [44]:
get_subtitle_at_time(subtitle_eg_path_str, pysrt.SubRipTime(seconds=5))

In [5]:
def extract_frames(video_path, timestamps):
    # 加载视频文件
    clip = VideoFileClip(video_path)
    frames = []

    # 遍历给定的时间戳
    for timestamp in timestamps:
        # 获取视频帧并转换为 numpy 数组
        frame = clip.get_frame(timestamp)
        frames.append(frame)

    return frames

In [8]:
eg_frames = extract_frames(video_eg_path_str, [1, 2, 3])

In [10]:
eg_frames[0].shape

In [11]:
def get_subtitles_at_times(srt_file, target_times):
    subtitles = pysrt.open(srt_file)
    results = []

    for target_time in target_times:
        found_subtitle = None
        for subtitle in subtitles:
            # 检查字幕是否在指定时间点内
            if subtitle.start <= target_time <= subtitle.end:
                found_subtitle = subtitle.text
                break
        results.append(found_subtitle)
    return results


In [12]:
get_subtitles_at_times(subtitle_eg_path_str, [pysrt.SubRipTime(seconds=5), pysrt.SubRipTime(seconds=10)])

In [1]:
import torchaudio

In [2]:
waveform, sample_rate = torchaudio.load(r"D:\dcmt\dataset\news_emotion\audio\7183343024657747258.wav", format="wav")

In [3]:
sample_rate

In [3]:
waveform

In [4]:
from embedding import AudioEncoder

In [5]:
audio_encoder = AudioEncoder()

In [6]:
from audio import Audio

In [7]:
audio = Audio('7184773290962013477')

In [8]:
audio.load_audio()

In [9]:
outputs = audio_encoder.encode(audio.load_audio())

In [10]:
outputs

In [11]:
outputs.shape

In [12]:
outputs.last_hidden_state.shape

In [13]:
outputs.last_hidden_state

In [14]:
outputs.extract_features.shape

In [1]:
from embedding import TextEncoder

In [2]:
text_encoder = TextEncoder()

In [12]:
text_outputs = text_encoder.encode("喂喂喂，你是谁？ How are you?今天是星期五")

In [13]:
text_outputs

In [14]:
text_outputs.last_hidden_state.shape

In [11]:
text_outputs.pooler_output.shape