In [2]:
import moviepy.editor as mp
import os
import cv2
import speech_recognition as sr

In [3]:
def extract_audio(video_path, audio_path):
    video = mp.VideoFileClip(video_path)
    audio = video.audio
    audio.write_audiofile(audio_path, codec='pcm_s16le', ffmpeg_params=["-ac", "1"])


def extract_frames(video_path, frames_path, max_frames=200):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    if frame_count < max_frames:
        max_frames = frame_count

    frame_interval = frame_count // max_frames

    frame_list = []

    for i in range(0, frame_count, frame_interval):
        cap.set(1, i)
        _, frame = cap.read()
        frame_list.append(frame)

    cap.release()

    for i, frame in enumerate(frame_list):
        cv2.imwrite(os.path.join(frames_path, f"frame_{i+1}.jpg"), frame)

def extract_transcript(audio_path, transcript_path):
    recognizer = sr.Recognizer()

    with sr.AudioFile(audio_path) as source:
        audio = recognizer.record(source)

    transcript = recognizer.recognize_google(audio)

    with open(transcript_path, "w") as file:
        file.write(transcript)

if __name__ == "__main__":
    video_path = "Sample.mp4"
    audio_path = "output_audio.wav"
    frames_path = "frames_folder"
    transcript_path = "transcript.txt"

    # Task 1: Extract audio
    extract_audio(video_path, audio_path)

    # Task 2: Extract frames
    if not os.path.exists(frames_path):
        os.makedirs(frames_path)
    extract_frames(video_path, frames_path)

    # Task 3: Extract transcript
    extract_transcript(audio_path, transcript_path)


MoviePy - Writing audio in output_audio.wav


                                                                  

MoviePy - Done.
