In [1]:
from googleapiclient.discovery import build
from IPython.display import JSON
import pandas as pd
from dateutil import parser
import api_key as ak

In [2]:
api_key = ak.api_key

In [3]:
channel_ids = ["UCjLEmnpCNeisMxy134KPwWw",]

In [4]:
api_service_name = "youtube"
api_version = "v3"

youtube = build(
    api_service_name, api_version, developerKey=api_key)


In [5]:
def get_channel_stats(youtube,channel_ids):
    all_data=[]
    request = youtube.channels().list(
        part="contentDetails,statistics,snippet",
        id=",".join(channel_ids)
    )
    response = request.execute()

    for item in response['items']:
        data = {
            'channelName': item['snippet']['title'],
            'subscribers': item['statistics']['subscriberCount'],
            'views': item['statistics']['viewCount'],
            'totalVideo': item['statistics']['videoCount'],
            'playlistId': item['contentDetails']['relatedPlaylists']['uploads']
        }
        all_data.append(data)
    return (pd.DataFrame(all_data))

In [6]:
channel_stats = get_channel_stats(youtube,channel_ids)

In [7]:
channel_stats

Unnamed: 0,channelName,subscribers,views,totalVideo,playlistId
0,Kobo Kanaeru Ch. hololive-ID,2410000,152341392,458,UUjLEmnpCNeisMxy134KPwWw


In [8]:
def get_video_ids(youtube,playlist_id):
    video_ids = []
    request = youtube.playlistItems().list(
        part="contentDetails",
        playlistId=playlist_id,
        maxResults = 50)
    response = request.execute()

    for item in response['items']:
        video_ids.append(item['contentDetails']['videoId'])

    next_page_token = response.get('nextPageToken')
    
    while next_page_token is not None:
        request = youtube.playlistItems().list(
            part="contentDetails",
            playlistId=playlist_id,
            maxResults = 50,
            pageToken =next_page_token)
        response = request.execute()
        for item in response['items']:
            video_ids.append(item['contentDetails']['videoId'])
        next_page_token = response.get('nextPageToken')
    return video_ids

In [9]:
video_ids = get_video_ids(youtube,channel_stats['playlistId'][0])

In [10]:
len(video_ids)

428

In [11]:
def get_video_details(youtube,video_ids):
    all_video_info = []
    stats_to_keep = {
        'snippet':['channelTitle','title','description','tags','publishedAt'],
        'statistics':['viewCount','likeCount','commentCount'],
        'contentDetails':['duration','definition','caption'],
        'liveStreamingDetails':['actualStartTime','actualEndTime','scheduledStartTime','scheduledEndTime','concurrentViewers','activeLiveChatId']
    }
    for i in range (0,len(video_ids),50):
        request = youtube.videos().list(
            part="snippet,statistics,contentDetails,liveStreamingDetails",
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute()

        for video in response['items']:
            video_info = {}
            video_info['video_id'] = video['id']

            for k in stats_to_keep.keys():
                for v in stats_to_keep[k]:
                    try:
                        video_info[v] = video[k][v]
                    except:
                        video_info[v] = None
            
            all_video_info.append(video_info)
    
    return pd.DataFrame(all_video_info)

In [12]:
video_df = get_video_details(youtube,video_ids)

In [13]:
video_df['tagsCount'] = video_df['tags'].str.len()

In [14]:
numeric_cols = ['viewCount','likeCount','commentCount','tagsCount']
video_df[numeric_cols] = video_df[numeric_cols].apply(pd.to_numeric,errors='coerce',axis=1)

In [15]:
video_df.head(1)

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,commentCount,duration,definition,caption,actualStartTime,actualEndTime,scheduledStartTime,scheduledEndTime,concurrentViewers,activeLiveChatId,tagsCount
0,xSJYWm_ufmw,Kobo Kanaeru Ch. hololive-ID,【Tekken 8】KOBO COBA GAME BERANTEM UNTUK PERTAM...,==================================\nDONASI LOK...,"[indo, indonesia, indonesian, person, story, c...",2024-05-06T11:00:11Z,0.0,87.0,0.0,P0D,sd,False,,,2024-05-06T14:00:00Z,,,Cg0KC3hTSllXbV91Zm13KicKGFVDakxFbW5wQ05laXNNeH...,53.0


In [16]:
video_df.isnull().sum()

video_id                0
channelTitle            0
title                   0
description             0
tags                  241
publishedAt             0
viewCount               0
likeCount               0
commentCount            1
duration                0
definition              0
caption                 0
actualStartTime        19
actualEndTime          19
scheduledStartTime     18
scheduledEndTime      428
concurrentViewers     428
activeLiveChatId      426
tagsCount             241
dtype: int64

In [17]:
def convert_to_date_time(date):
    try:
        return parser.parse(date)
    except:
        return None

In [18]:
date_time_cols = ['publishedAt','actualStartTime','actualEndTime','scheduledStartTime','scheduledEndTime']
video_df[date_time_cols] = video_df[date_time_cols].apply(lambda x: [convert_to_date_time(item) for item in x ])
video_df['publishDayName'] = video_df['publishedAt'].apply(lambda x: x.strftime("%A"))

In [19]:
import isodate
video_df['durationSecs'] = video_df['duration'].apply(lambda x:isodate.parse_duration(x).total_seconds())
video_df[video_df["video_id"]=="4ko8ZH4boWU"]


Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,commentCount,duration,...,caption,actualStartTime,actualEndTime,scheduledStartTime,scheduledEndTime,concurrentViewers,activeLiveChatId,tagsCount,publishDayName,durationSecs
427,4ko8ZH4boWU,Kobo Kanaeru Ch. hololive-ID,3007kk のライブ配信,,,2022-02-28 05:29:28+00:00,0.0,14.0,0.0,P0D,...,False,NaT,NaT,NaT,,,Cg0KCzRrbzhaSDRib1dVKicKGFVDakxFbW5wQ05laXNNeH...,,Monday,0.0


In [20]:
import numpy as np
video_df['tags'] = video_df['tags'].to_json()
video_df = video_df.drop(video_df[video_df.video_id == "4ko8ZH4boWU"].index)
len(video_df)

427

In [21]:
video_df.to_csv('out.csv', index=False)

#UPLOAD TO GBQ

In [22]:
from google.oauth2 import service_account
pk_json_input = "youtube-api-422410-d3df76a1cd66.json"
auth = service_account.Credentials.from_service_account_file(pk_json_input)

In [23]:
video_df.to_gbq(
'youtube_stats_data.youtube_stats_table',credentials=auth, if_exists='replace',
)

  video_df.to_gbq(
