In [None]:
import googleapiclient.discovery
import pandas as pd
import seaborn as sns
from datetime import datetime

In [None]:
YT_API_KEY = "" # fill your API key here
channel_ids = [
    "UCwu4cYxN24_2O0X9ykap8hw", # wyeth
    "UC4adGM7Q1oiH0eWYdtM_EHw", # pediasure
    "UCx4BmO2RjaFfUv4HB685vDA", # nutrilon
    "UC3zUFRtowBsPAEwdO6TV2bw", # morinaga
    "UC1ucIleAU9-NWaxh2k9ltPw", # bebelac
    "UCwU1sjdXRELEDf058SJQjhQ", # sgm
    "UCfL8ndP9r55-K1fUnz2bAtQ", # enfagrow
    "UC658JebjTsQJGp3GcJXZCMQ", # lactogrow
]
yt = googleapiclient.discovery.build('youtube', 'v3', developerKey=YT_API_KEY)
print(yt)

In [3]:
def datetime_formatter(date_obj):
    # Convert to datetime object
    dt_object = datetime.strptime(date_obj, '%Y-%m-%dT%H:%M:%SZ')

    # Convert to the desired format (Month, Year)
    formatted_date = dt_object.strftime('%B, %Y')

    return formatted_date

In [4]:
def get_channel_details(yt: any, channel_ids: list):
    request = yt.channels().list(
        part="snippet,contentDetails,statistics",
        id=",".join(channel_ids)
    )
    response = request.execute()
    channel_stats = [
        {
            "name": channel["snippet"]["title"],
            "published_at": datetime_formatter(channel["snippet"]["publishedAt"]),
            "views": channel["statistics"]["viewCount"],
            "subscribers": channel["statistics"]["subscriberCount"],
            "total_videos": channel["statistics"]["videoCount"],
            "upload_id": channel["contentDetails"]["relatedPlaylists"]["uploads"]
        }
        for channel in response["items"]
    ]
    return channel_stats

In [5]:
channel_stats_data = get_channel_details(yt, channel_ids)

In [6]:
channel_data = pd.DataFrame(channel_stats_data)

In [7]:
channel_data

Unnamed: 0,name,published_at,views,subscribers,total_videos,upload_id
0,Aku Anak SGM,"July, 2011",681448698,675000,362,UUwU1sjdXRELEDf058SJQjhQ
1,Nestlé LACTOGROW,"December, 2017",59621342,34300,67,UU658JebjTsQJGp3GcJXZCMQ
2,Wyeth Nutrition Indonesia,"August, 2017",187240881,24100,146,UUwu4cYxN24_2O0X9ykap8hw
3,Enfa Smart Center,"February, 2013",294690977,49900,272,UUfL8ndP9r55-K1fUnz2bAtQ
4,Nutriclub Indonesia,"December, 2013",119742281,22700,175,UUx4BmO2RjaFfUv4HB685vDA
5,Morinaga Platinum,"May, 2012",89883816,25500,361,UU3zUFRtowBsPAEwdO6TV2bw
6,Bebeclub Indonesia,"July, 2013",299146109,54600,359,UU1ucIleAU9-NWaxh2k9ltPw
7,Abbott Family Indonesia,"September, 2017",449455998,14200,162,UU4adGM7Q1oiH0eWYdtM_EHw


In [8]:
channel_data["views"] = pd.to_numeric(channel_data["views"])
channel_data["subscribers"] = pd.to_numeric(channel_data["subscribers"])
channel_data["total_videos"] = pd.to_numeric(channel_data["total_videos"])
channel_data.dtypes

name            object
published_at    object
views            int64
subscribers      int64
total_videos     int64
upload_id       object
dtype: object

## Vs Views

In [None]:
sns.set(rc={"figure.figsize": (20, 10)})
vc = sns.barplot(x="name", y="views", data=channel_data)

## Vs Subscribers

In [None]:
sc = sns.barplot(x="name", y="subscribers", data=channel_data)

## Vs Total Videos

In [None]:
sc = sns.barplot(x="name", y="total_videos", data=channel_data)

## Get list of video ids

In [12]:
def get_video_ids(yt, upload_id: str):
    request = yt.playlistItems().list(
        part="contentDetails",
        playlistId=upload_id,
        maxResults=50
    )
    
    response = request.execute()
    vids = response["items"]
    video_ids = [vid["contentDetails"]["videoId"] for vid in vids]
    
    next_page = response.get("nextPageToken")
    is_next_page = True
    while is_next_page:
        if next_page is None:
            is_next_page = False
        else:
            request = yt.playlistItems().list(
                part="contentDetails",
                playlistId=upload_id,
                maxResults=50,
                pageToken=next_page
            )
            response = request.execute()
            vids = response["items"]
            for vid in vids:
                video_ids.append(vid["contentDetails"]["videoId"])
            next_page = response.get("nextPageToken")
            
    
    return video_ids

In [13]:
all_video_ids = []
for ch in channel_stats_data:
    all_video_ids += get_video_ids(yt, ch["upload_id"])
    
len(all_video_ids)

1907

## Get video details

In [14]:
def get_video_details(yt, video_ids: list):
    all_video_details = []
    for idx in range(0, len(video_ids), 50):
        request = yt.videos().list(
            part="snippet,contentDetails,statistics",
            id=",".join(video_ids[idx:idx+50]),
            maxResults=50
        )
        response = request.execute()
        vids_details = response["items"]
        for vid in vids_details:
            all_video_details.append(
                {
                    "brand_name": vid["snippet"]["channelTitle"],
                    "title": vid["snippet"]["title"],
                    "published_date": vid["snippet"].get("publishedAt", ""),
                    "description": vid["snippet"].get("description", ""),
                    "views": vid["statistics"].get("viewCount", 0),
                    "likes": vid["statistics"].get("likeCount", 0),
                    "dislikes": vid["statistics"].get("dislikeCount", 0),
                    "favorites": vid["statistics"].get("favoriteCount", 0),
                    "comments": vid["statistics"].get("commentCount", 0),
                    "link": "https://www.youtube.com/watch?v={}".format(vid["id"])
                    
                }
            )
    return all_video_details

In [15]:
video_data = get_video_details(yt, all_video_ids)
len(video_data)

1907

In [16]:
video_df = pd.DataFrame(video_data)
video_df["published_date"] = pd.to_datetime(video_df["published_date"]).dt.date
video_df["views"] = pd.to_numeric(video_df["views"])
video_df["likes"] = pd.to_numeric(video_df["likes"])
video_df["dislikes"] = pd.to_numeric(video_df["dislikes"])
video_df["favorites"] = pd.to_numeric(video_df["favorites"])
video_df["comments"] = pd.to_numeric(video_df["comments"])
video_df.dtypes

brand_name        object
title             object
published_date    object
description       object
views              int64
likes              int64
dislikes           int64
favorites          int64
comments           int64
link              object
dtype: object

In [17]:
top_10_by_views = video_df.sort_values(by="views", ascending=False).head(10)

## TOP 10 VIDEOS BY VIEWS

In [18]:
top_10_by_views

Unnamed: 0,brand_name,title,published_date,description,views,likes,dislikes,favorites,comments,link
287,Aku Anak SGM,Upin & Ipin Musim 12: Untuk Prestasi - Bagian 2,2018-10-19,Upin & Ipin bersama SGM EKSPLOR 3+ dan SGM AKT...,111281879,304924,0,0,0,https://www.youtube.com/watch?v=rkxk6ChkypI
588,Enfa Smart Center,Enfagrow A+ MFGM Pro - Dukung #HigherIQ si Kec...,2023-01-14,"Mom, yuk optimalkan perkembangan IQ A+ Kids de...",51900826,0,0,0,0,https://www.youtube.com/watch?v=jjDfhRQGMeg
1484,Bebeclub Indonesia,Bebelac Gold Soya 3,2020-06-04,"Ibu, sudahkah ibu mencoba Bebelac Gold Soya 3 ...",35897012,79,0,0,0,https://www.youtube.com/watch?v=pvmTThS5THU
1774,Abbott Family Indonesia,Jaga Daya Tahan Tubuh si Kecil,2022-09-26,Pastikan proses eksplorasi si Kecil tidak terh...,31241285,11,0,0,0,https://www.youtube.com/watch?v=3v3hw24MIzI
286,Aku Anak SGM,Upin & Ipin Musim 12: Untuk Prestasi - Bagian 3,2018-10-19,Upin & Ipin bersama SGM EKSPLOR 3+ dan SGM AKT...,30069094,88758,0,0,0,https://www.youtube.com/watch?v=CHe5rmEynZE
1756,Abbott Family Indonesia,Ensure Gold dengan HMB dan Triple Protein Duku...,2023-05-19,"Untuk memulihkan kekuatan, tubuh butuh waktu 6...",29097178,10,0,0,4,https://www.youtube.com/watch?v=wvymLw1kf6c
605,Enfa Smart Center,Enfagrow Essential - Smart with Heart,2021-09-15,"Dengan semakin banyak si Kecil bertanya, semak...",28075373,0,0,0,0,https://www.youtube.com/watch?v=29LCboGEEEY
1778,Abbott Family Indonesia,"Ensure Gold: Bantu jaga kekuatan, massa otot, ...",2022-08-29,Baru! Ensure Gold Rasa Gandum yang lezat denga...,26853401,54,0,0,28,https://www.youtube.com/watch?v=EoIShdUxYZg
1398,Bebeclub Indonesia,Dukung si Kecil Lakukan Aksi Hebatnya dengan B...,2023-06-16,"Bu, aksi hebat si Kecil tak perlu menunggu mom...",25688349,15,0,0,0,https://www.youtube.com/watch?v=aNUlKUrNN8M
650,Enfa Smart Center,Enfagrow A+ Neurapro - Imunitas Kuat Dukung Be...,2020-09-17,"Tahukah ibu, daya tahan tubuh si Kecil menjadi...",25595972,0,0,0,0,https://www.youtube.com/watch?v=Nu0LsJBj55E


In [None]:
sns.set(rc={"figure.figsize": (8, 8)})
top_videos_chart_from_all_channels = sns.barplot(x="views", y="title", hue="brand_name", data=top_10_by_views)

In [20]:
import base64
from IPython.display import HTML

def create_download_link( df, title = "Download CSV file", filename = "data.csv"):
    csv = df.to_csv()
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

create_download_link(video_df)