In [None]:
import os
import whisper

def seconds_to_srt_time(seconds):
    """Converts seconds to SRT time format."""
    hours, remainder = divmod(seconds, 3600)
    minutes, remainder = divmod(remainder, 60)
    seconds, milliseconds = divmod(remainder, 1)
    return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{int(milliseconds * 1000):03}"

def generate_srt_file(input_mp4_file, output_srt_file):
    """Generates an SRT file from the specified MP4 file.

    Args:
        input_mp4_file: The path to the MP4 file to transcribe.
        output_srt_file: The path to the SRT file to create.
    """

    # Load the Whisper model.
    model = whisper.load_model("base")

    # Transcribe the audio file.
    transcript = model.transcribe(input_mp4_file)

    # Create the SRT file.
    with open(output_srt_file, "w") as srt_file:
        for index, segment in enumerate(transcript["segments"]):
            start_time = seconds_to_srt_time(segment['start'])
            end_time = seconds_to_srt_time(segment['end'])
            srt_file.write(f"{index + 1}\n")
            srt_file.write(f"{start_time} --> {end_time}\n")
            srt_file.write(f"{segment['text']}\n\n")


In [None]:
# Get the input and output directories.
input_dir = os.path.join(os.getcwd(), "input")
output_dir = os.path.join(os.getcwd(), "output")

# List all of the MP4 files in the input directory.
mp4_files = os.listdir(input_dir)

# For each MP4 file, generate an SRT file and save it to the output directory.
for mp4_file in mp4_files:
  input_mp4_file = os.path.join(input_dir, mp4_file)
  output_srt_file = os.path.join(output_dir, mp4_file + ".srt")
  generate_srt_file(input_mp4_file, output_srt_file)