In [28]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import os, json
import requests


API_KEY = os.getenv("YTB_API_KEY")

# Initialize the YouTube API client
youtube = build('youtube', 'v3', developerKey=API_KEY)

# Define the channel ID or username
channel_name = "nasaastrobiology"  # You can also use a channel's username
video_urls = []


In [29]:
def get_channel_id(channel_name):
    """
    Retrieves the id of a youtube channel from its channel name.

    Args:
      channel_name: Name of the youtube channel which is not the full name of channel but the name after the '@'
                    the channel link.

    Returns:
      The id of of the given channel.
    """
    url = "https://www.youtube.com/@" + channel_name
    r = requests.get(url)
    # Retrieve the whole page source
    text = r.text
    # Split the text to get only the section containing the channel id
    id = text.split("youtube.com/channel/")[1].split('">')[0]
    return id

In [30]:
def fetch_video_ids(channel_name):
    """
    Fetches the video IDs of the videos in the uploads playlist of a channel.
    Args:
      channel_name: The name of the channel.
    Returns:
      A list of {video ID, video url, title}.
    """
    # Make a request to youtube api
    base_url = "https://www.googleapis.com/youtube/v3/channels"
    channel_id = get_channel_id(channel_name)
    params = {"part": "contentDetails", "id": channel_id, "key": API_KEY}
    try:
        response = requests.get(base_url, params=params)
        response = json.loads(response.content)
    except HttpError as e:
        print(f"An HTTP error occurred: {e}")
        return []

    if "items" not in response or not response["items"]:
        raise Exception(f"No playlist found for {channel_name}")

    # Retrieve the uploads playlist ID for the given channel
    playlist_id = response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]

    # Retrieve all videos from uploads playlist
    videos = []
    next_page_token = None

    while True:
        playlist_items_response = (
            youtube.playlistItems()
            .list(
                # part="contentDetails",
                part="snippet",
                playlistId=playlist_id,
                maxResults=50,
                pageToken=next_page_token,
            )
            .execute()
        )

        videos += playlist_items_response["items"]

        next_page_token = playlist_items_response.get("nextPageToken")

        if not next_page_token:
            break

    # Extract video URLs
    video_urls = []

    for video in videos:
        video_id = video["snippet"]["resourceId"]["videoId"]
        video_url = f"https://www.youtube.com/watch?v={video_id}"
        video_title = video["snippet"]["title"]
        video_published_ts = video["snippet"]["publishedAt"]
        video_urls.append({"ID": video_id, "URL": video_url, "Title": video_title, "Published At": video_published_ts})

    return video_urls

In [31]:
urls = fetch_video_ids(channel_name)

In [36]:
# Filter URLs based on the "Published At" year
urls = [video['URL'] for video in urls if video['Published At'][:4] in ['2023', '2024']]


In [39]:
urls_with_channel = [url + "&ab_channel=NASAAstrobiology" for url in urls]

In [40]:
# Save the URLs to a txt file
file_path = 'nasaastrobiology.txt'
with open(file_path, 'w') as file:
    for url in urls_with_channel:
        file.write(url + '\n')

file_path

'nasaastrobiology.txt'