In [1]:
from googleapiclient.discovery import build
import pandas as pd
from IPython.display import JSON
import numpy as np
import time

### Create API Key

To create Youtube API key go to : Google Developer Console (https://console.developers.google.com)

In [2]:
api_key = "AIzaSyDxrhCCJu1B1cGFSEInHt9xmkoVXR_s6WY"
channel_ids = ['UC16niRr50-MSBwiO3YDb3RA']

### Get credentials and create an API client

In [3]:
api_service_name = "youtube"
api_version = "v3"
youtube = build(
        api_service_name, api_version, developerKey=api_key)



### Create function to get channel statistics and playlistID of the channel

In [4]:
def get_channel_stats(youtube,channel_ids):
    request = youtube.channels().list(part = "snippet,contentDetails,statistics",id = channel_ids)
    response = request.execute()
 
    all_data =[]
    for item in response['items']:
        data = {'channelName' : item['snippet']['title'],'subscribers' :item['statistics']['subscriberCount'], 'views' :item['statistics']['viewCount'],
                  'totalVideos' : item['statistics']['videoCount'],
                'playlistId' :item['contentDetails']['relatedPlaylists']['uploads']}

        all_data.append(data)
    return(pd.DataFrame(all_data))

In [5]:
channel_stats = get_channel_stats(youtube,channel_ids)

In [6]:
channel_stats

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,BBC News,13500000,4119803292,17046,UU16niRr50-MSBwiO3YDb3RA


### With using unique playlistID, get all videoIDs

In [7]:
playlistId = "UU16niRr50-MSBwiO3YDb3RA"

def get_video_ids(youtube,playlistId):     
    video_ids =[]
    request = youtube.playlistItems().list(
        part="snippet,contentDetails",
        playlistId=playlistId,
        maxResults = 50 )

    response = request.execute()
    
    for item in response['items']:  
        video_ids.append(item['contentDetails']['videoId'])

    next_page_token = response.get('nextPageToken')

    while next_page_token is not None:
        request = youtube.playlistItems().list(
                part="snippet,contentDetails",
                playlistId=playlistId,
                pageToken = next_page_token, 
                maxResults = 50)  

        response = request.execute()

        for item in response['items']:  
            video_ids.append(item['contentDetails']['videoId'])
        next_page_token = response.get('nextPageToken')
        
    return video_ids
    
    

In [8]:
video_ids = get_video_ids(youtube,playlistId)

### With this function we took the useful data out of JSON datatype. To avoid any error, try-except method used because there are some videos that API cannot grab to the data (for example if the comment section are closed by admin).

In [12]:
def filter_response_data(video):
    video_id = video["id"]
    
    try:
        channel_title = video['snippet']['channelTitle']
    except:
        channel_title = None          
    try:
        video_title = video['snippet']['title']
        video_title = str(video_title).replace("&amp:","")
    except:
        video_title = None
    try:
        description = video['snippet']['description']
    except:
        description = None
    try:
        tags = video['snippet']['tags']
    except:
        tags = None
    try:
        publishedAt = video['snippet']['publishedAt']
        publishedAt = str(publishedAt).split("T")[0]
    except:
        publishedAt = None

    try:
        view_count = video['statistics']['viewCount']
    except:
        view_count = None
    try:
        like_count = video['statistics']['likeCount']
    except:
        like_count = None
    try:
        commentCount = video['statistics']['commentCount']
    except:
        commentCount = None

    try:
        duration = video['contentDetails']['duration']
    except:
        duration = None
    try:
        definition = video['contentDetails']['definition']
    except:
        definition = None
    

    row = { 
        'video_id':video_id,
        'channel_title': channel_title, 
        'video_title':video_title,
        'tags':tags,
        'publishedAt':publishedAt,

        "view_count":view_count,
        "like_count":like_count,
        "commentCount":commentCount,

        "duration":duration,
        "definition":definition,
        
        'description':description
        }

    return pd.DataFrame([row])

### We got all details and save them csv file consecutively to not lose any data because of error which can be occur. Last csv file contains all the data.

In [13]:
data_frame = pd.DataFrame(columns =["video_id","channel_title","video_title","tags","publishedAt","view_count","like_count","commentCount","duration","definition","description"])

def get_video_details(youtube, video_ids, df):
    all_video_info =[]
    page_counter=0
    for i in range(0, len(video_ids), 50):
        page_counter = page_counter + 1

        request = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=video_ids[i:i+50]
        )
        
        response = request.execute()
        
        for video in response['items']:
            
            new_df = filter_response_data(video)
            df = pd.concat([df, new_df], axis =0, ignore_index=True)
            

        if page_counter % 20 == 0:
            data_to_save = df.copy()
            document_name = "Data/bbc_news_" + str(page_counter) + "_pages_v5.csv"
            with open(document_name, "w") as csv_file:
                time.sleep(5)
                data_to_save.to_csv(csv_file) 
            print("Pages saved: " + str(page_counter))
    
    data_to_save = df.copy()
    document_name = "Data/bbc_news_" + str(page_counter) + "_pages_v5.csv"
    with open(document_name, "w") as csv_file:
        time.sleep(5)
        data_to_save.to_csv(csv_file) 
    print("Pages saved: " + str(page_counter))

    return df


In [15]:
video_df = get_video_details(youtube,video_ids,data_frame)
video_df

Pages saved: 20
Pages saved: 40
Pages saved: 60
Pages saved: 80
Pages saved: 100
Pages saved: 120
Pages saved: 140
Pages saved: 160
Pages saved: 180
Pages saved: 200
Pages saved: 220
Pages saved: 240
Pages saved: 260
Pages saved: 280
Pages saved: 300
Pages saved: 320
Pages saved: 340
Pages saved: 341


Unnamed: 0,video_id,channel_title,video_title,tags,publishedAt,view_count,like_count,commentCount,duration,definition,description
0,Y8l3zzlyM4M,BBC News,US President Biden shakes hands with Chinese P...,"[bbc, bbc news, news]",2022-11-14,151642,2879,1318,PT7M33S,hd,The leaders of the US and China have met face-...
1,ma1Jny_cOjU,BBC News,Suspect arrested after six killed in Istanbul ...,"[bbc, bbc news, news]",2022-11-14,72879,1085,499,PT6M56S,hd,Turkish authorities say a suspect has been arr...
2,VD_BegNj3rs,BBC News,Can AI-generated improvised comedy be funny? -...,"[bbc, bbc news, news]",2022-11-13,22076,366,253,PT4M25S,hd,Artificial intelligence (AI) is being used in ...
3,WeGIK9qi-KU,BBC News,World War Two: 'I thought I was going on a day...,"[bbc, bbc news, news]",2022-11-13,28744,770,169,PT3M25S,hd,“I thought I was going on a day trip.”\n\nSept...
4,qu4RdyQVGZs,BBC News,How could real and virtual people connect in t...,"[bbc, bbc news, news]",2022-11-13,22392,478,317,PT5M7S,hd,Digital people have become part of the latest ...
...,...,...,...,...,...,...,...,...,...,...,...
17042,VOilMejK2tk,BBC News,GIANT SINKHOLE SWALLOWS TREES - BBC NEWS,"[US, bbc news, British Broadcasting Corporatio...",2013-08-22,489,7,4,PT31S,sd,Subscribe to BBC News www.youtube.com/bbcnews\...
17043,SnPKetrLNLk,BBC News,NORTHERN LIGHTS ON DISPLAY IN SCOTLAND - BBC NEWS,"[scotland, Bbc News, northern lights, Caithnes...",2013-08-22,4744,26,2,PT2M22S,sd,Subscribe to BBC News www.youtube.com/bbcnews\...
17044,46v2jtu45-0,BBC News,Russia: Military Hovercraft Shocks Sunbathers ...,"[russia, russia hovercraft, beach, British Bro...",2013-08-22,6132,85,12,PT51S,sd,Sunseekers on Russia's Baltic Coast were in fo...
17045,BO_7gT3XpTE,BBC News,Tariq Ramadan: 'Egypt's Return to A Mubarak St...,"[Tariq Ramadan (Author), news, bbc, bbc news, ...",2013-08-21,812,12,11,PT6M44S,sd,"Tariq Ramadan, a Muslim theologian whose grand..."
