In [1]:
import os
import requests
import json
import tqdm  
import datetime
from openai import OpenAI
import pandas as pd
import time
from moviepy.editor import VideoFileClip
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_audio
from pydub import AudioSegment
import pysrt

In [2]:
client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)

In [3]:
def make_openai_audio(file, model_name):
    audio_file= open(file, "rb")
    transcription = client.audio.transcriptions.create(
        model="whisper-1",
        file=audio_file,
        response_format="srt"
    )
    return transcription

In [4]:
def extract_audio_from_video(input_path, output_path):
    video_clip = VideoFileClip(input_path)
    audio_clip = video_clip.audio
    audio_clip.write_audiofile(output_path)

In [5]:
def export_srt_from_string(subtitle_string, filename):
    subtitle_entries = subtitle_string.strip().split('\n\n')
    
    with open(filename, 'w', encoding='utf-8') as file:
        for idx, entry in enumerate(subtitle_entries, start=1):
            lines = entry.split('\n')
            start_time, end_time = lines[1].split(' --> ')
            text = lines[2]
            file.write(f"{idx}\n{start_time} --> {end_time}\n{text}\n\n")


In [6]:
def split_audio(input_file, chunk_size):
    video_clip = VideoFileClip(input_file)
    audio_clip = video_clip.audio
    audio_clip.write_audiofile("temp.mp3")
    chunk_files = []
    file_size = os.path.getsize("temp.mp3")
    num_chunks = int(file_size / chunk_size) + 1
    print("num_chunks " + str(num_chunks))
    chunk_duration = video_clip.duration / num_chunks
    for chunk_index in range(num_chunks):
        chunk_start = chunk_index * chunk_duration
        chunk_end = min((chunk_index + 1) * chunk_duration, video_clip.duration)
        chunk_duration = chunk_end - chunk_start

        chunk_filename = os.path.join("cache", f"audio_chunk_{chunk_index + 1}.mp3")
        chunk_files.append(chunk_filename)
        audio_clip.subclip(chunk_start, chunk_end).write_audiofile(chunk_filename, codec="mp3")
    os.remove("temp.mp3")
    return chunk_files



In [7]:
def combine_srt_files(output_srt, audio_clips, srt_files):
    combined_srt = pysrt.SubRipFile()
    previous_duration = 0

    for audio_clip, srt_file in zip(audio_clips, srt_files):
        audio = AudioSegment.from_file(audio_clip)
        audio_duration_ms = len(audio)  

        single_srt = pysrt.open(srt_file)

        for item in single_srt:
            item.start.ordinal += previous_duration * 10**3  
            item.end.ordinal += previous_duration * 10**3
            combined_srt.append(item)

        previous_duration += audio_duration_ms / 1000  

    combined_srt.save(output_srt, encoding='utf-8')

In [8]:
def time_decorator(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()  
        result = func(*args, **kwargs)  
        end_time = time.time()  
        elapsed_time = end_time - start_time  
        print(f"The function took {elapsed_time} seconds to complete.")
        return result
    return wrapper

In [9]:
@time_decorator
def generate_transcript(input_video, output_script):
    chunk_size = 25 * 1000 * 1000  # 25MB in bytes
    audio_clips = split_audio(input_video, chunk_size)
    print("Split Complete")
    srt_files = []
    for index, output_file in enumerate(audio_clips):
        print(output_file)
        transcript = make_openai_audio(output_file, "whisper-1")
        srt_filename = os.path.join("cache",f"audio_chunk_{index + 1}.srt")
        srt_files.append(srt_filename)
        export_srt_from_string(transcript, srt_filename)
    print("API call complete")
    output_srt = output_script  
    
    combine_srt_files(output_srt, audio_clips, srt_files)

    print("Combined SRT file created.")
    return 0

In [10]:
input_video = "videos/andrew_ng_1.mp4"
output_script = "transcripts/andrew_ng_1.srt"
generate_transcript(input_video, output_script)

MoviePy - Writing audio in temp.mp3


                                                                                

MoviePy - Done.
num_chunks 1
MoviePy - Writing audio in cache/audio_chunk_1.mp3


                                                                                

MoviePy - Done.
Split Complete
cache/audio_chunk_1.mp3
API call complete
Combined SRT file created.
The function took 13.901123285293579 seconds to complete.


0