In [1]:
from googleapiclient.discovery import build
import pandas as pd
from IPython.display import JSON
import numpy as np

### Create API Key

To create Youtube API key go to : Google Developer Console (https://console.developers.google.com)

In [2]:
api_key = "AIzaSyCnsxY7TWDEaQ_Fdn5DJj6___BsHzuQdvI"
channel_ids = ['UC16niRr50-MSBwiO3YDb3RA']

### Get credentials and create an API client

In [3]:
api_service_name = "youtube"
api_version = "v3"
youtube = build(
        api_service_name, api_version, developerKey=api_key)



### Create function to get channel statistics and playlistID of the channel

In [4]:
def get_channel_stats(youtube,channel_ids):
    request = youtube.channels().list(part = "snippet,contentDetails,statistics",id = channel_ids)
    response = request.execute()
 
    all_data =[]
    for item in response['items']:
        data = {'channelName' : item['snippet']['title'],'subscribers' :item['statistics']['subscriberCount'], 'views' :item['statistics']['viewCount'],
                  'totalVideos' : item['statistics']['videoCount'],
                'playlistId' :item['contentDetails']['relatedPlaylists']['uploads']}

        all_data.append(data)
    return(pd.DataFrame(all_data))

In [5]:
channel_stats = get_channel_stats(youtube,channel_ids)

In [6]:
channel_stats

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,BBC News,12900000,3902031696,16575,UU16niRr50-MSBwiO3YDb3RA


### With using unique playlistID, get all videoIDs

In [7]:
playlistId="UU16niRr50-MSBwiO3YDb3RA"

def get_video_ids(youtube,playlistId):      
    video_ids =[]
    request = youtube.playlistItems().list(
        part="snippet,contentDetails",
        playlistId=playlistId,
        #maxResults = 50
                                            )
    response = request.execute()
    for item in response['items']:  
        video_ids.append(item['contentDetails']['videoId'])
        next_page_token = response.get('nextPageToken')
    while next_page_token is not None:
        request = youtube.playlistItems().list(
                part="snippet,contentDetails",
                playlistId=playlistId,
                #maxResults = 50,
                pageToken = next_page_token)                          
        response = request.execute()
        for item in response['items']:  
            video_ids.append(item['contentDetails']['videoId'])
            next_page_token = response.get('nextPageToken')
        
    return video_ids
    
    

In [8]:
video_ids = get_video_ids(youtube,playlistId)

### With this function we took the useful data out of JSON datatype. To avoid any error, try-except method used because there are some videos that API cannot grab to the data (for example if the comment section are closed by admin).

In [10]:
def filter_response_data(video):
    video_id = video["id"]
    
    try:
        channel_title = video['snippet']['channelTitle']
    except:
        channel_title = None          
    try:
        video_title = video['snippet']['title']
        video_title = str(video_title).replace("&amp:","")
    except:
        video_title = None
    try:
        description = video['snippet']['description']
    except:
        description = None
    try:
        tags = video['snippet']['tags']
    except:
        tags = None
    try:
        publishedAt = video['snippet']['publishedAt']
        publishedAt = str(publishedAt).split("T")[0]
    except:
        publishedAt = None

    try:
        view_count = video['statistics']['viewCount']
    except:
        view_count = None
    try:
        like_count = video['statistics']['likeCount']
    except:
        like_count = None
    try:
        favoriteCount = video['statistics']['favoriteCount']
    except:
        favoriteCount = None
    try:
        commentCount = video['statistics']['commentCount']
    except:
        commentCount = None

    try:
        duration = video['contentDetails']['duration']
    except:
        duration = None
    try:
        definition = video['contentDetails']['definition']
    except:
        definition = None
    try:
        caption = video['contentDetails']['caption']
    except:
        caption = None

    row = { 
        'video_id':video_id,
        'channel_title': channel_title, 
        'video_title':video_title,
        'tags':tags,
        'publishedAt':publishedAt,

        "view_count":view_count,
        "like_count":like_count,
        "favoriteCount":favoriteCount,
        "commentCount":commentCount,

        "duration":duration,
        "definition":definition,
        "caption":caption,
        
        'description':description
        }

    return pd.DataFrame([row])

### We got all details and save them csv file consecutively to not lose any data because of error which can be occur. Last csv file contains all the data.

In [11]:
data_frame = pd.DataFrame(columns =["video_id","channel_title","video_title","tags","publishedAt","view_count","like_count","favoriteCount","commentCount","duration","definition","caption","description"])

def get_video_details(youtube, video_ids, df):
    all_video_info =[]
    page_counter=0
    for i in range(0, len(video_ids), 50):
        page_counter = page_counter + 1

        request = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=video_ids[i:i+50]
        )
        
        response = request.execute()
        

        for video in response['items']:
            
            new_df = filter_response_data(video)
            df = pd.concat([df, new_df], axis =0, ignore_index=True)
            

        if page_counter % 20 == 0:
            data_to_save = df.copy()
            document_name = "Data/bbc_news_" + str(page_counter) + "_pages_v5.csv"
            with open(document_name, "w") as csv_file:
                data_to_save.to_csv(csv_file) 
            print("Pages saved: " + str(page_counter))
    
    data_to_save = df.copy()
    document_name = "Data/bbc_news_" + str(page_counter) + "_pages_v5.csv"
    with open(document_name, "w") as csv_file:
        data_to_save.to_csv(csv_file) 
    print("Pages saved: " + str(page_counter))

    return df


In [12]:
video_df = get_video_details(youtube,video_ids,data_frame)
video_df

Pages saved: 20
Pages saved: 40
Pages saved: 60
Pages saved: 80
Pages saved: 100
Pages saved: 120
Pages saved: 140
Pages saved: 160
Pages saved: 180
Pages saved: 200
Pages saved: 220
Pages saved: 240
Pages saved: 260
Pages saved: 280
Pages saved: 300
Pages saved: 320
Pages saved: 332


Unnamed: 0,video_id,channel_title,video_title,tags,publishedAt,view_count,like_count,favoriteCount,commentCount,duration,definition,caption,description
0,jH_JCf8yNxc,BBC News,Chinese motorcyclists join wildfire battle – B...,"[bbc, bbc news, news]",2022-09-05,6615,157,0,43,PT2M6S,hd,false,A record-long heatwave and drought have trigge...
1,wE4sLU2651E,BBC News,Canada police hunt suspects over stabbing ramp...,"[bbc, bbc news, news]",2022-09-05,5566,136,0,,PT3M13S,hd,false,Canadian police have launched a huge manhunt f...
2,G_DVY9Pd6Tg,BBC News,Pakistan army leads flood rescue missions - BB...,"[bbc, bbc news, news, Pakistan, floods, floodi...",2022-09-04,65559,1508,0,644,PT2M23S,hd,false,Pakistan is calling on rich nations to pay rep...
3,5Qf57h1wCjo,BBC News,New ticketless transport tracks where you go v...,"[bbc, bbc news, news]",2022-09-04,34945,565,0,248,PT4M55S,hd,false,"Contactless payments have become commonplace, ..."
4,jDc2lYJ6rJ0,BBC News,Vanishing glaciers threaten Europe's water sup...,"[bbc, bbc news, news, glaciers, Europe, Climat...",2022-09-04,54270,877,0,376,PT2M55S,hd,false,Switzerland's glaciers have lost more than hal...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
16571,VOilMejK2tk,BBC News,GIANT SINKHOLE SWALLOWS TREES - BBC NEWS,"[US, bbc news, British Broadcasting Corporatio...",2013-08-22,480,7,0,3,PT31S,sd,false,Subscribe to BBC News www.youtube.com/bbcnews\...
16572,SnPKetrLNLk,BBC News,NORTHERN LIGHTS ON DISPLAY IN SCOTLAND - BBC NEWS,"[scotland, Bbc News, northern lights, Caithnes...",2013-08-22,4670,24,0,2,PT2M22S,sd,false,Subscribe to BBC News www.youtube.com/bbcnews\...
16573,46v2jtu45-0,BBC News,Russia: Military Hovercraft Shocks Sunbathers ...,"[russia, russia hovercraft, beach, British Bro...",2013-08-22,5993,82,0,12,PT51S,sd,false,Sunseekers on Russia's Baltic Coast were in fo...
16574,BO_7gT3XpTE,BBC News,Tariq Ramadan: 'Egypt's Return to A Mubarak St...,"[Tariq Ramadan (Author), news, bbc, bbc news, ...",2013-08-21,789,12,0,9,PT6M44S,sd,false,"Tariq Ramadan, a Muslim theologian whose grand..."
