# Setup YouTube API Client
Import required libraries and initialize the YouTube API client with authentication credentials.

In [1]:
# Import required libraries
from googleapiclient.discovery import build
from google.oauth2.credentials import Credentials
import pandas as pd
import os

# Initialize the YouTube API client with authentication credentials
api_key = ''
youtube = build('youtube', 'v3', developerKey=api_key, static_discovery=False)

# Create Comment Collection Functions
Implement the YouTubeCommentCollector class with methods to fetch comments from videos.

In [2]:
class YouTubeCommentCollector:
    def __init__(self, api_key):
        # Initialize the YouTube API client
        self.youtube = build('youtube', 'v3', developerKey=api_key, static_discovery=False)

    def get_video_comments(self, video_id, max_results=100):
        comments = []
        try:
            # Create a request to fetch comments
            request = self.youtube.commentThreads().list(
                part="snippet",
                videoId=video_id,
                maxResults=max_results
            )
            
            while request:
                # Execute the request and get the response
                response = request.execute()
                
                # Extract comments from the response
                for item in response['items']:
                    comment = item['snippet']['topLevelComment']['snippet']
                    comments.append({
                        'text': comment['textDisplay'],
                        'author': comment['authorDisplayName'],
                        'date': comment['publishedAt'],
                        'liktextes': comment['likeCount'],
                    })
                
                # Check if there are more comments to fetch
                if 'nextPageToken' in response:
                    request = self.youtube.commentThreads().list(
                        part="snippet",
                        videoId=video_id,
                        maxResults=max_results,
                        pageToken=response['nextPageToken']
                    )
                else:
                    request = None
        except Exception as e:
            print(f"An error occurred: {e}")
        
        # Return the collected comments as a pandas DataFrame
        return pd.DataFrame(comments)

# Extract Video IDs from URLs
Create utility functions to extract video IDs from different YouTube URL formats.

In [7]:
# Extract Video IDs from URLs

def extract_video_id(url):
    """Extract video ID from YouTube URL."""
    if 'youtu.be' in url:
        return url.split('/')[-1].split('?')[0]
    elif 'youtube.com' in url:
        return url.split('v=')[1].split('&')[0]
    return url

# Example usage
youtube_urls = [
    "https://youtu.be/0e3GPea1Tyg?si=JrfvZL6tlLBfcrVu"
]

video_ids = [extract_video_id(url) for url in youtube_urls]
video_ids

['0e3GPea1Tyg']

# Collect and Process Comments
Demonstrate how to collect comments from multiple videos and combine them into a pandas DataFrame.

In [8]:
# Collect and Process Comments

# Initialize YouTube comment collector
collector = YouTubeCommentCollector(api_key)

# Collect comments from multiple videos
all_comments = []
for video_id in video_ids:
    comments_df = collector.get_video_comments(video_id)
    all_comments.append(comments_df)

# Combine all comments into a single pandas DataFrame
combined_df = pd.concat(all_comments, ignore_index=True)

# Display the combined DataFrame
combined_df.head()

Unnamed: 0,text,author,date,likes
0,"Like I said in the video, subscribe if you hav...",@MrBeast,2021-11-24T21:02:45Z,1058903
1,imagine finding 061,@Players_1500,2024-12-22T12:12:02Z,0
2,晚安 希望 韓國女明星(秀智) 和 (潤娥) 可以嫁個好男人 一輩子幸福+458（每天情人...,@許閔翔-i5r,2024-12-22T10:48:47Z,0
3,Beneran😮,@AhmadYusri-v3u,2024-12-22T10:26:03Z,0
4,Who is here after watching Ronald&#39;s video ...,@Hoyadeeduroon,2024-12-22T10:08:46Z,0


# Store Comments Data
Save the collected comments to a CSV file and show basic statistics about the collected data.

In [9]:
# Store Comments Data

# Save the collected comments to a CSV file
output_file = '../data/youtube_comments.csv'
combined_df.to_csv(output_file, index=False)

# Show basic statistics about the collected data
print("Number of comments collected:", len(combined_df))
print("Number of unique authors:", combined_df['author'].nunique())
print("Most liked comment:")
print(combined_df.loc[combined_df['likes'].idxmax()])

Number of comments collected: 151297
Number of unique authors: 130816
Most liked comment:
text      Like I said in the video, subscribe if you hav...
author                                             @MrBeast
date                                   2021-11-24T21:02:45Z
likes                                               1058903
Name: 0, dtype: object
