In [2]:
video_list_f = 'videos_data/5760-L01-video_details.json'

In [3]:
# Import the json module to work with JSON data
import json

# Initialize an empty list to store video IDs
video_ids = []

# Open the JSON file containing the video list
with open(video_list_f) as f:
    # Load the JSON data from the file
    video_list = json.load(f)
    
    # Loop through each video in the video list
    for video in video_list:
        # Loop through each item in the video's items
        for item in video['items']:
            # Append the video ID to the list of video IDs
            video_ids.append(item['id'])


In [4]:
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound

def check_transcript_availability(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        return True
    except (TranscriptsDisabled, NoTranscriptFound):
        return False


In [5]:
for videos in video_ids:
    transcript_available = check_transcript_availability(videos)
    if transcript_available:
        print(f"Transcripts are available for video ID: {videos}")
    else:
        print(f"No transcripts found for video ID: {videos}")

Transcripts are available for video ID: zbFHYjaqjzw
Transcripts are available for video ID: sjV7NNwm1GU
Transcripts are available for video ID: -Xj7zDwwU_I
Transcripts are available for video ID: eKN22NFl58U
Transcripts are available for video ID: sM-VI3alvAI
Transcripts are available for video ID: QLT1vrnJXWI
Transcripts are available for video ID: dhRIHF1DENI
Transcripts are available for video ID: rphiCdR68TE
Transcripts are available for video ID: z7aqJpKfPC0
Transcripts are available for video ID: SXmVnHgwOZs


In [17]:
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import JSONFormatter

video_id = video_ids[1]

# Must be a single transcript.
transcript = YouTubeTranscriptApi.get_transcript(video_id)

formatter = JSONFormatter()

# .format_transcript(transcript) turns the transcript into a JSON string.
json_formatted = formatter.format_transcript(transcript)


# Use the video ID as the filename.
filename = f'{video_id}.json'

# Now we can write it out to a file.
with open('outputs/transcripts/' + filename, 'w', encoding='utf-8') as json_file:
    json_file.write(json_formatted)

# Now should have a new JSON file that you can easily read back into Python.

In [2]:
import os
from dotenv import load_dotenv
from youtube_transcript_api import YouTubeTranscriptApi
from googleapiclient.discovery import build

# Set up YouTube API client
load_dotenv()
api_key = os.getenv('googleKey')
youtube = build("youtube", "v3", developerKey=api_key)

# Extract video ID from URL
video_id = "sjV7NNwm1GU"

# Fetch video details
video_response = youtube.videos().list(
    part="snippet",
    id=video_id
).execute()

# Get video title
video_title = video_response["items"][0]["snippet"]["title"]

# Fetch transcript
transcript = YouTubeTranscriptApi.get_transcript(video_id)

# Extract chapters
chapters = []
current_chapter = {"start": 0, "title": "Introduction"}

for entry in transcript:
    if "{ts:" in entry["text"]:
        timestamp = int(entry["text"].split("{ts:")[1].split("}")[0])
        if timestamp > current_chapter["start"]:
            chapters.append(current_chapter)
            current_chapter = {"start": timestamp, "title": entry["text"].split("}")[-1].strip()}

# Add the last chapter
chapters.append(current_chapter)

# Print chapters
print(f"Chapters for video: {video_title}")
for i, chapter in enumerate(chapters, 1):
    print(f"{i}. {chapter['title']} (starts at {chapter['start']} seconds)")


Chapters for video: Bio Nano Technology-New Frontiers in Molecular Engineering: Andreas Mershin at TEDxAthens
1. Introduction (starts at 0 seconds)
