In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import csv
import re

# 辅助函数：将时间戳转换为总秒数并舍去小数部分
def time_to_seconds(time_str):
    h, m, s = time_str.split(':')
    s, ms = s.split(',')
    total_seconds = int(h) * 3600 + int(m) * 60 + int(s)  # 忽略毫秒部分，直接保留整数秒
    return total_seconds

def srt_to_csv(srt_file, csv_file):
    # 打开.srt文件
    with open(srt_file, 'r', encoding='utf-8') as file:
        srt_data = file.read()

    pattern = re.compile(r'(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})\n(.*?)(?=\n\n|\Z)', re.DOTALL)
    matches = pattern.findall(srt_data)

    # 打开.csv文件进行写入
    with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['start_time', 'stop_time', 'annotation']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        # 写入表头
        writer.writeheader()

        for match in matches:
            start_time_sec = time_to_seconds(match[0])  # 转换为整数秒数
            stop_time_sec = time_to_seconds(match[1])  # 转换为整数秒数
            annotation = match[2].replace('\n', ' ')  # 将多行字幕合并成一行

            # 写入.csv文件
            writer.writerow({
                'start_time': start_time_sec,
                'stop_time': stop_time_sec,
                'annotation': annotation
            })

    print(f"成功将 {srt_file} 转换为 {csv_file}")

# 路径
#srt_file = '/Users/zhaolingfeng/Documents/DataSet/Hiroshima/ESTE-SIM/front  9/25（已上传云盘）/.srt'  # 输入的srt文件路径
srt_file = '/content/drive/MyDrive/Random/Hiroshima/ABC2025/New_Data/Annotation_CSV/Front_T1F02.srt'  # 输入的srt文件路径
#srt_file = '/Users/zhaolingfeng/Documents/DataSet/Hiroshima/ESTE-SIM/front  9/27（已上传云盘）/.srt'  # 输入的srt文件路径

#csv_file = '/Users/zhaolingfeng/Documents/DataSet/Hiroshima/EESTE-SIM_Label/SIM-F26001.csv'  # 输出的csv文件路径
csv_file = '/content/drive/MyDrive/Random/Hiroshima/ABC2025/New_Data/Annotation_CSV/Front_T1F02.csv'  # 输出的csv文件路径
#csv_file = '/Users/zhaolingfeng/Documents/DataSet/Hiroshima/EESTE-SIM_Label/SIM-F26001.csv'  # 输出的csv文件路径

srt_to_csv(srt_file, csv_file)

成功将 /content/drive/MyDrive/Random/Hiroshima/ABC2025/New_Data/Annotation_CSV/Front_T1F02.srt 转换为 /content/drive/MyDrive/Random/Hiroshima/ABC2025/New_Data/Annotation_CSV/Front_T1F02.csv


In [20]:
import csv
import re

# 辅助函数：将时间戳转换为总秒数并舍去小数部分
def time_to_seconds(time_str, keep_milliseconds=False):
    h, m, s = time_str.split(':')
    s, ms = s.split(',')
    total_seconds = int(h) * 3600 + int(m) * 60 + int(s)  # 忽略毫秒部分，直接保留整数秒
    if keep_milliseconds:
        return f"{total_seconds}.{ms}"  # 返回带毫秒的格式
    return total_seconds

def srt_to_csv(srt_file, csv_file):
    # 打开.srt文件
    with open(srt_file, 'r', encoding='utf-8') as file:
        srt_data = file.read()

    pattern = re.compile(r'(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})\n(.*?)(?=\n\n|\Z)', re.DOTALL)
    matches = pattern.findall(srt_data)

    # 打开.csv文件进行写入
    with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['start_time', 'stop_time', 'annotation']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        # 写入表头
        writer.writeheader()

        # 处理每一行字幕
        for i, match in enumerate(matches):
            start_time_sec = time_to_seconds(match[0])

            # 如果是最后一条字幕，保留stop time的毫秒部分
            if i == len(matches) - 1:
                stop_time_sec = time_to_seconds(match[1], keep_milliseconds=True)
            else:
                stop_time_sec = time_to_seconds(match[1])

            annotation = match[2].replace('\n', ' ')  # 将多行字幕合并成一行

            # 写入.csv文件
            writer.writerow({
                'start_time': start_time_sec,
                'stop_time': stop_time_sec,
                'annotation': annotation
            })

    print(f"成功将 {srt_file} 转换为 {csv_file}")

# 路径
srt_file = '/content/drive/MyDrive/Random/Hiroshima/ABC2025/New_Data/Annotation_CSV/Front_T1N01.srt'  # 输入的srt文件路径
csv_file = '/content/drive/MyDrive/Random/Hiroshima/ABC2025/New_Data/Annotation_CSV/Front_T1N01.csv'  # 输出的csv文件路径

srt_to_csv(srt_file, csv_file)

成功将 /content/drive/MyDrive/Random/Hiroshima/ABC2025/New_Data/Annotation_CSV/Front_T1N01.srt 转换为 /content/drive/MyDrive/Random/Hiroshima/ABC2025/New_Data/Annotation_CSV/Front_T1N01.csv
