In [1]:
"""
%pip install python-dotenv
%pip google-api-python-client
"""

'\n%pip install python-dotenv\n%pip google-api-python-client\n'

In [2]:
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build

# Load environment variables from a .env file
load_dotenv()

# Get the YouTube API developer key from the environment variables
DEVELOPER_KEY = os.getenv('DEVELOPER_KEY')

# Build the YouTube service object
youtube = build('youtube', 'v3', developerKey=DEVELOPER_KEY)

def get_video_title(video_id):
    # Extract the video ID from the URL
    #video_id = video_url.split('v=')[-1]
    
    # Create a request to get the video details
    request = youtube.videos().list(part='snippet', id=video_id)
    
    # Execute the request and get the response
    response = request.execute()
    
    # Extract the video title from the response
    title = response['items'][0]['snippet']['title']
    # Extract the video channel title
    channel_title = response['items'][0]['snippet']['channelTitle']
    # Extract the video published date
    published_date = response['items'][0]['snippet']['publishedAt']
    
    return title, channel_title, published_date

# Example video URL
video_url = "https://www.youtube.com/watch?v=1Dx7LDwINLU"
video_id = video_url.split('v=')[-1]

# Print the video context
video_context = get_video_title(video_id)
video_title = video_context[0]
video_channel = video_context[1]
video_published = video_context[2]
video_datePublished = video_published.split('T')[0]
video_yearPublished = video_datePublished.split('-')[0]

print(f"{video_title} by {video_channel} was published in {video_yearPublished}")

Biomolecules (Updated 2023) by Amoeba Sisters was published in 2023


In [3]:
def create_filename(video_title, video_channel, video_yearPublished):
    # Replace spaces with underscores
    f_title = ''.join(e for e in video_title if e.isalnum() or e.isspace()).replace(' ', '_')
    
    # Concatenate the video title, channel, and published date
    filename = f'{video_channel}_{video_yearPublished}_{f_title}.md'
    
    return filename

# Create a filename for the video
t_filename = create_filename(video_title, video_channel, video_yearPublished)
print(t_filename)

Amoeba Sisters_2023_Biomolecules_Updated_2023.md


In [4]:
from pytube import YouTube
from innertube import InnerTube
from datetime import timedelta

In [5]:
# Constant to identify the transcript panel in the YouTube engagement panels
PANEL_IDENTIFIER_TRANSCRIPT = "engagement-panel-searchable-transcript"

# Function to extract transcript parameters from the engagement panels in the provided `next_data`
def extract_transcript_params(next_data):
    engagement_panels = next_data["engagementPanels"]

    for engagement_panel in engagement_panels:
        engagement_panel_section = engagement_panel[
            "engagementPanelSectionListRenderer"
        ]

        # Check if the panel identifier matches the transcript panel identifier
        if (
            engagement_panel_section.get("panelIdentifier")
            != PANEL_IDENTIFIER_TRANSCRIPT
        ):
            continue

        # Return the parameters required to fetch the transcript
        return engagement_panel_section["content"]["continuationItemRenderer"][
            "continuationEndpoint"
        ]["getTranscriptEndpoint"]["params"]


#video_url = "https://www.youtube.com/watch?v=1Dx7LDwINLU"
#yt = YouTube(video_url)
#video_id = yt.video_id

# Retrieve the video title with error handling
try:
    #video_title = yt.title
    print(video_title)
except Exception as e:
    print(f"Error retrieving video title: {e}")
    video_title = "Unknown Title"


# Create an instance of the InnerTube client
client = InnerTube(client_name="WEB")

# Fetch the initial data for the video
data = client.next(video_id)

# Extract the transcript parameters from the fetched data
transcript_params = extract_transcript_params(data)

# Retrieve the transcript using the extracted parameters
transcript = client.get_transcript(transcript_params)

# Access the list of transcript segments
transcript_segments = transcript["actions"][0]["updateEngagementPanelAction"][
    "content"
]["transcriptRenderer"]["content"]["transcriptSearchPanelRenderer"]["body"][
    "transcriptSegmentListRenderer"
][
    "initialSegments"
]

# Open a new Markdown file for writing
#with open(t_filename, w) as file:
with open(f"samples/{t_filename}", "w") as file:
    file.write(f"# {video_title}\n")
    file.write(f"{video_title} by {video_channel} was published in {video_yearPublished}\n")
    file.write(f"{video_url}\n")
    # Iterate over each segment in the list of transcript segments
    for transcript_segment in transcript_segments:
        # Check if the segment contains a transcript snippet
        if "transcriptSegmentRenderer" in transcript_segment:
            transcript_segment_renderer = transcript_segment["transcriptSegmentRenderer"]
            start_time = transcript_segment_renderer["startTimeText"]["simpleText"]
            snippet = transcript_segment_renderer["snippet"]["runs"][0]["text"]
            # Clean up the text snippet
            snippet = snippet.replace("\xa0\xa0", " ")
            snippet = snippet.replace("\xa0\n", " ")
            # Write the cleaned snippet to the file
            file.write(f"{snippet}\n")
        # Check if the segment contains chapter information
        elif "transcriptSectionHeaderRenderer" in transcript_segment:
            section = transcript_segment["transcriptSectionHeaderRenderer"]
            # Check if the required keys are present in the section
            if "startMs" in section and "endMs" in section and "accessibility" in section:
                start = float(section["startMs"]) / 1000.
                end = float(section["endMs"]) / 1000.
                label = section["accessibility"]["accessibilityData"]["label"]
                # Write the label along with the formatted start and end times to the file
                file.write("\n")
                file.write(f"## {label}\n")
                file.write(f"{timedelta(seconds=start)} - {timedelta(seconds=end)}\n")
            else:
                # Write a message if the required keys are not found in the section
                file.write("Required keys not found in section\n")
        else:
            # Write a message if the segment type is unknown
            file.write("Unknown segment type\n")

Biomolecules (Updated 2023)
