#### 1. YT Video Downloading

In [14]:
from pathlib import Path
import os

# SET CONFIG
video_url = "https://youtu.be/1aA1WGON49E?feature=shared"
output_video_path = "./video_data/"
output_folder = "./mixed_data/"
output_frames_path = os.makedirs("./mixed_data/frames/", exist_ok=True) or "./mixed_data/frames/"
output_audio_path = "./mixed_data/output_audio.wav"

filepath = output_video_path + "trimmed_vid.mp4"
Path(output_folder).mkdir(parents=True, exist_ok=True)

In [15]:
import yt_dlp
import subprocess

def download_video(url, output_path, start_time=11):
    """
    Download a video from a given URL using yt-dlp, trim it from a specific start time, and save it.

    Parameters:
    url (str): The URL of the video to download.
    output_path (str): The path to save the video.
    start_time (int): The start time in seconds (default: 34s).

    Returns:
    dict: Metadata of the downloaded video.
    """
    video_path = f"{output_path}/input_vid.mp4"
    trimmed_path = f"{output_path}/trimmed_vid.mp4"

    ydl_opts = {
        'format': 'bestvideo+bestaudio/best',
        'outtmpl': video_path,
        'merge_output_format': 'mp4',
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(url, download=True)
    
    # Trim the video from start_time onwards
    ffmpeg_cmd = [
        "ffmpeg", "-y", "-i", video_path,  # Input video
        "-ss", str(start_time),  # Start at 34 seconds
        "-c", "copy", trimmed_path  # Copy streams without re-encoding
    ]
    subprocess.run(ffmpeg_cmd, check=True)

    metadata = {
        "Author": info_dict.get("uploader", "Unknown"),
        "Title": info_dict.get("title", "Unknown"),
        "Views": info_dict.get("view_count", 0),
        "Trimmed Video": trimmed_path
    }

    return metadata


In [16]:
metadata_vid = download_video(video_url, output_video_path)

[youtube] Extracting URL: https://youtu.be/1aA1WGON49E?feature=shared
[youtube] 1aA1WGON49E: Downloading webpage
[youtube] 1aA1WGON49E: Downloading tv client config
[youtube] 1aA1WGON49E: Downloading player 69f581a5
[youtube] 1aA1WGON49E: Downloading tv player API JSON
[youtube] 1aA1WGON49E: Downloading ios player API JSON
[youtube] 1aA1WGON49E: Downloading m3u8 information
[info] 1aA1WGON49E: Downloading 1 format(s): 399+251
[download] Destination: video_data\input_vid.f399.mp4
[download] 100% of    5.29MiB in 00:00:00 at 9.38MiB/s   
[download] Destination: video_data\input_vid.f251.webm
[download] 100% of    1.02MiB in 00:00:00 at 3.42MiB/s   
[Merger] Merging formats into "video_data\input_vid.mp4"
Deleting original file video_data\input_vid.f399.mp4 (pass -k to keep)
Deleting original file video_data\input_vid.f251.webm (pass -k to keep)


In [17]:
print(metadata_vid)

{'Author': 'TEDx Talks', 'Title': 'A one minute TEDx Talk for the digital age | Woody Roseland | TEDxMileHigh', 'Views': 298236, 'Trimmed Video': './video_data//trimmed_vid.mp4'}


#### 2. Video to Audio

In [18]:
from moviepy import VideoFileClip
def video_to_audio(video_path, output_audio_path):
    """
    Convert a video to audio and save it to the output path.

    Parameters:
    video_path (str): The path to the video file.
    output_audio_path (str): The path to save the audio to.

    """
    clip = VideoFileClip(video_path)
    audio = clip.audio
    audio.write_audiofile(output_audio_path)

In [19]:
video_to_audio(filepath, output_audio_path)

{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'isom', 'minor_version': '512', 'compatible_brands': 'isomav01iso2mp41', 'encoder': 'Lavf61.9.104'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': 582, 'fps': 23.976023976023978, 'codec_name': 'av1', 'profile': '(libaom-av1)', 'metadata': {'Metadata': '', 'handler_name': 'ISO Media file produced by Google Inc.', 'vendor_id': '[0][0][0][0]'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': 'eng', 'default': True, 'fps': 48000, 'bitrate': 99, 'metadata': {'Metadata': '', 'handler_name': 'SoundHandler', 'vendor_id': '[0][0][0][0]'}}], 'input_number': 0}], 'duration': 69.78, 'bitrate': 685, 'start': 0.001, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'av1', 'video_profile': '(libaom-av1)', 'video_size': [1920, 1080], 'video_bitrate': 582, 'vi

                                                                      

MoviePy - Done.




#### 3. Transcribing with OpenAI Whisper

In [20]:
from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [21]:
import torch
print(torch. __version__ )
print(torch.cuda.is_available())

2.6.0+cu118
True


In [23]:
whisper = pipeline("automatic-speech-recognition", model='openai/whisper-large', device=0)

Device set to use cuda:0


In [25]:
text = whisper('mixed_data/output_audio.wav',return_timestamps=True,generate_kwargs={'language':'en','task':'transcribe'})

Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


In [27]:
with open(output_folder + "output_text.txt", "w", encoding="utf-8") as file:
    file.write(text['text'])

In [28]:
# Save the entire dictionary as a JSON file
import json
with open(output_folder + "output_data.json", "w", encoding="utf-8") as json_file:
    json.dump(text, json_file, ensure_ascii=False, indent=4)

In [46]:
text['text']

" Life will sometimes feel like a fight. The punches, jabs, and hooks will come in the form of challenges, obstacles, and failures. But if you stay in the ring and learn from those past fights, at the end of each round, you'll be still standing. Mr. Contest Chair, fellow fighters, can you think of a time when life tried to knock you down? Who was your toughest opponent? Most often, our most challenging opponent is ourselves. Round one, college. I dropped out of college not one, not two, not three, but four times. I told myself college isn't for me and I would never go back. Round two, marriage. I married my soul mate, the love of my life, my best friend. He was fine too. We were married for eight long, beautiful, amazing months. You heard right, months, not years. It was like immediately after we said I do, the heavyweight champion came in and delivered an electrifying knockout blow to our vows. Boom! Round three, speaking. In 2015, I competed for the first time in the international sp

#### 4. Video to Image Frames

In [29]:
import os
import subprocess

def video_to_images_ffmpeg(video_path, output_folder, fps=30):
    os.makedirs(output_folder, exist_ok=True)
    
    # FFmpeg command for extracting frames with GPU acceleration
    ffmpeg_cmd = [
        'ffmpeg',
        '-hwaccel', 'auto',  # Use GPU for decoding
        '-i', video_path,    # Input video path
        '-vf', f'fps={fps}', # Set frames per second
        os.path.join(output_frames_path, 'frame%04d.png')  # Output file pattern
    ]
    
    # Execute FFmpeg command
    subprocess.run(ffmpeg_cmd, check=True)
    
    print(f"Frames saved to {output_folder}")

# Example usage
video_to_images_ffmpeg("video_data/input_vid.mp4", "frames", fps=30)


Frames saved to frames
