In [1]:
# Import Dependencies
import os, re
from pytubefix.contrib.search import Search, Filter
from pytubefix.innertube import _default_clients
from IPython.display import IFrame, display, clear_output

In [2]:
def read_unique_items_from_file(file: str) -> list:
    if os.path.exists(file):
        with open(file, "r") as f:
            return list(set(url.strip() for url in f.readlines() if url.strip()))
    return []
def ask_user(prompt: str) -> bool:
    while True:
        answer = input(prompt).lower()
        if ("yes").startswith(answer.lower()):
            return True
        elif ("no").startswith(answer.lower()):
            return False
def display_video(url: str):
    display(IFrame(url, width=560, height=315, extras=["controls", "autoplay", 'allow="autoplay"']))
def get_yt_id(url: str):
    # Find the part after the '?' (query parameters)
    if '?' in url: query_string = url.split('?')[-1]
    else: return None
    # Split the query parameters by '&'
    params = query_string.split('&')
    # Find the parameter by name
    for param in params:
        key, value = param.split('=')
        if key == "v": return value
    return None

In [3]:
# Folder Name
output_path = "YouTube URL Collection"
# File Name
used_urls_filename = "Used URLs.txt"
unused_urls_filename = "Unused URLs.txt"
# Boolean Flag
authorize_yt = False
# Additional Dependency Configurations
_default_clients["ANDROID_MUSIC"] = _default_clients["ANDROID_CREATOR"]
# Existing YouTube Ids
used_yt_ids = set([
    get_yt_id(url)
    for url in read_unique_items_from_file(os.path.join(output_path, used_urls_filename))
    if get_yt_id(url) != None
])
unused_yt_ids = set([
    get_yt_id(url)
    for url in read_unique_items_from_file(os.path.join(output_path, unused_urls_filename))
    if get_yt_id(url) != None
])
# Print Colors
class pcolors:
    ENDC = "\033[0m"
    BRIGHTCYAN = "\033[38;2;67;227;235m"

In [None]:
# YouTube Search Query
search_query = """
    
    ("Kamala" OR "Harris") AND ("Pennsylvania")
    
"""
search_query.replace('\n', '').strip()
# YouTube Filters
yt_filters = { 
    "type": Filter.get_type("Video"),
    # Under 4 minutes | 4 - 20 minutes | Over 20 minutes
    "duration": Filter.get_duration("4 - 20 minutes") 
}
# Search YouTube Videos
print(f'...Searching...')
yt_list = Search(
    query=search_query,
    filters=yt_filters,
    use_oauth=authorize_yt,
    allow_oauth_cache=authorize_yt
)
last_yt_videos_len = len(yt_list.videos)
# Loop To Search Per Batch
ass = None
while True:
    for yt in yt_list.videos:
        url = yt.watch_url.strip()
        video_id = get_yt_id(url)
        # Check If Video / Url Already Seen
        if video_id == None or video_id in used_yt_ids or video_id in unused_yt_ids: continue
        # Check If Year > 2020
        publish_date = yt.publish_date.year
        if publish_date != None and publish_date <= 2020: continue
        # Reset Output Console
        clear_output(wait=True)
        # Display the Vid
        display_video(f'{yt.embed_url}?autoplay=1')
        print("") # Just New Line for Better Output
        print(f'Title: {pcolors.BRIGHTCYAN}{yt.title}{pcolors.ENDC}')
        print("") # Just New Line for Better Output
        print(f'Publish Year: {pcolors.BRIGHTCYAN}{yt.publish_date.year if yt.publish_date != None else "None"}{pcolors.ENDC}')
        print("") # Just New Line for Better Output
        print(f'URL If Video Failed To Load: {url}')
        print("") # Just New Line for Better Output
        if ask_user("Do you want to add this Video URL (y/n)?"): 
            # Create Video Directory
            os.makedirs(output_path, exist_ok=True)
            with open(os.path.join(output_path, used_urls_filename), "w") as file:
                used_yt_ids.add(video_id)
                file.write("\n".join([
                    f'https://www.youtube.com/watch?v={yt_id}'
                    for yt_id in list(used_yt_ids)
                    if yt_id != None
                ]))
        else:
            # Create Video Directory
            os.makedirs(output_path, exist_ok=True)
            with open(os.path.join(output_path, unused_urls_filename), "w") as file:
                unused_yt_ids.add(video_id)
                file.write("\n".join([
                    f'https://www.youtube.com/watch?v={yt_id}'
                    for yt_id in list(unused_yt_ids)
                    if yt_id != None
                ]))
    # Search Next Batch
    print(f'...Searching [{len(yt_list.videos)} Total Video]...')
    yt_list.get_next_results()
    yt_videos_len = len(yt_list.videos)
    # Stop If No More Videos Found
    if yt_videos_len > last_yt_videos_len:
        last_yt_videos_len = yt_videos_len
        continue
    break


Title: [38;2;67;227;235mAction News anchor Brian Taff's full interview with VP Kamala Harris[0m

Publish Year: [38;2;67;227;235m2024[0m

URL If Video Failed To Load: https://youtube.com/watch?v=9AunRg_V078

