In [None]:
!pip install google-api-python-client

In [9]:
import os
import googleapiclient.discovery
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import json
import csv

# Set up API key and YouTube API client
API_KEY = 'Your-Youtube-data-api'
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=API_KEY)

# Channel IDs for the specified publications
channels = {
    "Tamil Murasu" : "UCs0xZ60FSNxFxHPVFFsXNTA"
}

# Hardcoded last processed date
LAST_PROCESSED_DATE = datetime(2024, 6, 1)  # Replace with the desired date

def get_channel_statistics(channel_id):
    try:
        request = youtube.channels().list(
            part="statistics",
            id=channel_id
        )
        response = request.execute()
        print(response)
        if 'items' not in response or len(response['items']) == 0:
            print(f"No data found for channel ID: {channel_id}")
            return None
        stats = response['items'][0]['statistics']
        return stats
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

def get_videos(channel_id, last_processed_date):
    videos = []
    request = youtube.search().list(
        part="snippet",
        channelId=channel_id,
        maxResults=50,
        order="date",
        publishedAfter=last_processed_date.isoformat() + "Z" if last_processed_date else None
    )
    while request:
        response = request.execute()
        if 'items' in response:
            for item in response['items']:
                if item['id']['kind'] == 'youtube#video':
                    videos.append(item)
        request = youtube.search().list_next(request, response)
    return videos

def get_video_statistics(video_id):
    request = youtube.videos().list(
        part="statistics,snippet",
        id=video_id
    )
    response = request.execute()
    return response['items'][0]

def main():
    all_data = []

    for name, channel_id in channels.items():
        print(f"Processing channel: {name}")
        
        # Get channel statistics
        stats = get_channel_statistics(channel_id)
        subscribers = stats['subscriberCount']
        print(f"Subscribers: {subscribers}")
        
        # Get video details (incremental data)
        videos = get_videos(channel_id, LAST_PROCESSED_DATE)

        # Convert the list of videos to a JSON string
        videos_json = json.dumps(videos, indent=4, ensure_ascii=False)

        # Save the JSON string to a file
        with open('videos.json', 'w', encoding='utf-8') as json_file:
            json_file.write(videos_json)

        print("Videos have been saved to videos.json")
        
        video_count = len(videos)
        print(f"Number of videos: {video_count}")
        
        for video in videos:
            video_id = video['id']['videoId']
            video_stats = get_video_statistics(video_id)
            if video_stats is None:
                continue
            
            # Extract necessary information
            title = video_stats['snippet']['title']
            publish_date = video_stats['snippet']['publishedAt']
            view_count = int(video_stats['statistics'].get('viewCount', 0))
            like_count = int(video_stats['statistics'].get('likeCount', 0))
            comment_count = int(video_stats['statistics'].get('commentCount', 0))
            high_thumbnail_url = video_stats['snippet']['thumbnails']['high']['url']
            description = video_stats['snippet'].get('description', '')

            # Properly decode Unicode characters in title
            title = bytes(title, 'utf-8').decode('utf-8')
            
            publish_date = datetime.strptime(publish_date, "%Y-%m-%dT%H:%M:%SZ")

            all_data.append({
                'channel': name,
                'video_id': video_id,
                'title': title,
                'publish_date': publish_date,
                'view_count': view_count,
                'like_count': like_count,
                'comment_count': comment_count,
                'subscribers': subscribers,
                'thumbnail_high_url': high_thumbnail_url,
                'description': description
            })

    if all_data:
        # Convert data to DataFrame and save to CSV with quoting for columns containing spaces
        df = pd.DataFrame(all_data)
        df.to_csv('youtube_channel_analytics_incremental.csv', mode='a', header=not os.path.exists('youtube_channel_analytics_incremental.csv'), index=False, quoting=csv.QUOTE_ALL, encoding='utf-8')
        print("Data saved to youtube_channel_analytics_incremental.csv")
    else:
        print("No new videos to process.")

if __name__ == "__main__":
    main()

Processing channel: Tamil Murasu
{'kind': 'youtube#channelListResponse', 'etag': 'doBdLqN4a_naMpLg15oKFJpLHX8', 'pageInfo': {'totalResults': 1, 'resultsPerPage': 5}, 'items': [{'kind': 'youtube#channel', 'etag': 'eG-IhmyEDYLaYGyUzpyG0GmsoAc', 'id': 'UCs0xZ60FSNxFxHPVFFsXNTA', 'statistics': {'viewCount': '1342240', 'subscriberCount': '4860', 'hiddenSubscriberCount': False, 'videoCount': '461'}}]}
Subscribers: 4860
Videos have been saved to videos.json
Number of videos: 137
Data saved to youtube_channel_analytics_incremental.csv
