In [None]:
pip install youtube-transcript-api

In [None]:
pip install --upgrade google-api-python-client

In [None]:
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import SRTFormatter
from googleapiclient.discovery import build
import os
import re

In [None]:
# Initialize the YouTube API client
api_key = os.getenv("YOUTUBE_API_KEY")

# Initialize the YouTube API client
youtube = build('youtube', 'v3', developerKey=api_key)

def get_video_ids_with_captions(query, max_results=50):
    video_ids = []
    # Make a search request to the YouTube API
    search_response = youtube.search().list(
        q=query,
        part='id',
        maxResults=max_results,
        type='video',
        videoCaption='closedCaption',  # Only retrieve videos with captions
    ).execute()

    # Extract video IDs
    for search_result in search_response.get('items', []):
        video_ids.append(search_result['id']['videoId'])

    return video_ids

def sanitize_filename(filename):
    """Sanitize the filename by removing or replacing invalid characters."""
    valid_filename = re.sub(r'[^\w\s-]', '', filename).strip().lower()
    return re.sub(r'[-\s]+', '-', valid_filename)

def main():
    query = "hindi old comedy movies"  # Update this with your search term
    video_ids = get_video_ids_with_captions(query)

    # Ensure the directory exists
    output_dir = "video_ids"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Sanitize the query to create a valid filename
    filename = os.path.join(output_dir, f"{sanitize_filename(query)}.txt")

    # Save the video IDs to a file named after the query, inside the specified directory
    with open(filename, "w") as file:
        for video_id in video_ids:
            file.write(f"{video_id}\n")

    print(f"Saved {len(video_ids)} video IDs to {filename}.")

if __name__ == "__main__":
    main()

In [None]:
def download_transcript(video_id, output_folder):
    try:
        # Fetching the transcript in Hindi
        transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['hi'])
        
        # Formatting the transcript as SRT
        formatter = SRTFormatter()
        srt_transcript = formatter.format_transcript(transcript)

        # Ensure the output directory exists
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        # Saving the transcript as an SRT file
        filename = os.path.join(output_folder, f"{video_id}.srt")
        with open(filename, "w") as text_file:
            text_file.write(srt_transcript)
            
        print(f"Transcript for video {video_id} has been saved in '{output_folder}' folder.")
    except Exception as e:
        print(f"An error occurred with video {video_id}: {e}")

def process_video_ids(directory, output_folder):
    for filename in os.listdir(directory):
        if filename.endswith(".txt"):
            filepath = os.path.join(directory, filename)
            with open(filepath, 'r') as file:
                video_ids = file.read().splitlines()
                for video_id in video_ids:
                    download_transcript(video_id, output_folder)

if __name__ == "__main__":
    video_ids_directory = "video_ids"
    subtitles_output_folder = "subtitles"
    process_video_ids(video_ids_directory, subtitles_output_folder)