In [1]:
import json, requests, urllib
from pathlib import Path
from datetime import datetime
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Access token of Youtube
key = "####################################"
# Prefix for API URLs
api_prefix = "https://www.googleapis.com"
# The MV titles I want to analyse from youtube trending 1~6 at 28-Oct
video_ids = ["b1kbLwvqugk", "b7QlX3yR2xs", "CiwMDFh_Rog", "jDvYDzFOK9A", "j5y6xLpRwx4", "ospQ06jJe-I"]
# The video names of these videos
video_names = {"b1kbLwvqugk":"Anti-Hero", "b7QlX3yR2xs":"Bejeweled", "CiwMDFh_Rog":"Music For a Sushi Restaurant", "jDvYDzFOK9A":"I Drink Wine", "j5y6xLpRwx4":"Monotonía", "ospQ06jJe-I":"Lift Me Up"}
# Singer names of MV
singer_names = {"b1kbLwvqugk":"Taylor Swift", "b7QlX3yR2xs":"Taylor Swift", "CiwMDFh_Rog":"Harry Styles", "jDvYDzFOK9A":"Adale", "j5y6xLpRwx4":"Shakira", "ospQ06jJe-I":"Rihanna"}
# Channel id of MV
channel_ids = ["UCqECaJ8Gagnn7YCbPEzWH6g", "UCZFWPqqPkFlNwIxcpsLOwew", "UCsRM0YB_dabtEPGPTKo-gcw", "UCYLNGLIzMhRTi6ZOLjAPSmw", "UCcgqSM4YEo5vVQpqwN-MaNw"]

In [3]:
# Create directory for raw data storage, if it does not already exist
dir_raw = Path("raw")
dir_raw.mkdir(parents=True, exist_ok=True)

## Data Collection
Define a method to retrive data

In [4]:
def fetch(endpoint, params={}):
    # construct the url
    url = api_prefix
    if not endpoint.startswith("/"):
        url += "/"
    url += endpoint
    (api_prefix, endpoint)
    params["key"] = key
    params["part"] = "snippet,statistics"
    url += "?" + urllib.parse.urlencode(params)
    print("Fetching %s" % url)
    # fetch the page
    response = requests.get(url)
    jdata = response.text
    return json.loads(jdata)

In [5]:
video_metadata = {}
for video_id in video_ids:
    video_data = fetch("/youtube/v3/videos", {"id":video_id})
    video_metadata[video_id] = video_data

Fetching https://www.googleapis.com/youtube/v3/videos?id=b1kbLwvqugk&key=AIzaSyC05vieJOuBZqdz0agcxYWfPfasr3WodDk&part=snippet%2Cstatistics
Fetching https://www.googleapis.com/youtube/v3/videos?id=b7QlX3yR2xs&key=AIzaSyC05vieJOuBZqdz0agcxYWfPfasr3WodDk&part=snippet%2Cstatistics
Fetching https://www.googleapis.com/youtube/v3/videos?id=CiwMDFh_Rog&key=AIzaSyC05vieJOuBZqdz0agcxYWfPfasr3WodDk&part=snippet%2Cstatistics
Fetching https://www.googleapis.com/youtube/v3/videos?id=jDvYDzFOK9A&key=AIzaSyC05vieJOuBZqdz0agcxYWfPfasr3WodDk&part=snippet%2Cstatistics
Fetching https://www.googleapis.com/youtube/v3/videos?id=j5y6xLpRwx4&key=AIzaSyC05vieJOuBZqdz0agcxYWfPfasr3WodDk&part=snippet%2Cstatistics
Fetching https://www.googleapis.com/youtube/v3/videos?id=ospQ06jJe-I&key=AIzaSyC05vieJOuBZqdz0agcxYWfPfasr3WodDk&part=snippet%2Cstatistics


In [6]:
video_metadata["b1kbLwvqugk"]["items"][0]["snippet"]["publishedAt"]

'2022-10-21T12:00:06Z'

In [7]:
metadata_rows = []
for video_id in video_ids:
    row = {"title": video_names[video_id], "id": video_id}
    row["original title"] = video_metadata[video_id]["items"][0]["snippet"]["title"]
    row["channel id"] = video_metadata[video_id]["items"][0]["snippet"]["channelId"]
    row["singer"] = singer_names[video_id]
    row["release time"] = video_metadata[video_id]["items"][0]["snippet"]["publishedAt"]
    row["view"] = video_metadata[video_id]["items"][0]["statistics"]["viewCount"]
    row["like"] = video_metadata[video_id]["items"][0]["statistics"]["likeCount"]
    row["comment"] = video_metadata[video_id]["items"][0]["statistics"]["commentCount"]
    metadata_rows.append(row)
pd.DataFrame(metadata_rows).set_index("title")

Unnamed: 0_level_0,id,original title,channel id,singer,release time,view,like,comment
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Anti-Hero,b1kbLwvqugk,Taylor Swift - Anti-Hero (Official Music Video),UCANLZYMidaCbLQFWXBC95Jg,Taylor Swift,2022-10-21T12:00:06Z,38020923,2099186,65380
Bejeweled,b7QlX3yR2xs,Taylor Swift - Bejeweled (Official Music Video),UCANLZYMidaCbLQFWXBC95Jg,Taylor Swift,2022-10-25T04:00:09Z,17854236,1216916,42706
Music For a Sushi Restaurant,CiwMDFh_Rog,Harry Styles - Music For a Sushi Restaurant (O...,UCbOCbp5gXL8jigIBZLqMPrw,Harry Styles,2022-10-27T16:00:07Z,5776071,496505,26535
I Drink Wine,jDvYDzFOK9A,Adele - I Drink Wine (Official Video),UComP_epzeKzvBX156r6pm1Q,Adale,2022-10-26T16:00:06Z,5571196,329700,13099
Monotonía,j5y6xLpRwx4,"Shakira, Ozuna - Monotonía (Official Video)",UCGnjeahCJW1AF34HBmQTJ-Q,Shakira,2022-10-20T00:00:11Z,68008474,2846996,141039
Lift Me Up,ospQ06jJe-I,Rihanna - Lift Me Up (Visualizer),UC2xskkQVFEpLcGFnNSLQY0A,Rihanna,2022-10-28T04:08:02Z,3611151,387880,21263


In [8]:
def fetch_video_conditions(video_id):
    # fetch the current data
    endpoint = "/youtube/v3/videos"
    params = {"id":video_id}
    conditions_data = fetch(endpoint, params)
    # write it out to our raw dataset directory
    date_suffix = datetime.now().strftime("%Y%m%d-%H00")
    fname = "%s-%s.json" % (video_names[video_id], date_suffix)
    out_path = dir_raw / fname
    print("Writing data to %s" % out_path)
    fout = open(out_path, "w")
    json.dump(conditions_data, fout, indent=4, sort_keys=True)
    fout.close()

In [9]:
def fetch_channel_conditions(channel_id):
    # fetch the current data
    endpoint = "/youtube/v3/channels"
    params = {"id":channel_id}
    conditions_data = fetch(endpoint, params)
    # write it out to our raw dataset directory
    date_suffix = datetime.now().strftime("%Y%m%d-%H00")
    fname = "%s-%s.json" % (conditions_data["items"][0]["snippet"]["title"], date_suffix)
    out_path = dir_raw / fname
    print("Writing data to %s" % out_path)
    fout = open(out_path, "w")
    json.dump(conditions_data, fout, indent=4, sort_keys=True)
    fout.close()

In [10]:
for video_id in video_ids:
    fetch_video_conditions(video_id)
for channel_id in channel_ids:
    fetch_channel_conditions(channel_id)

Fetching https://www.googleapis.com/youtube/v3/videos?id=b1kbLwvqugk&key=AIzaSyC05vieJOuBZqdz0agcxYWfPfasr3WodDk&part=snippet%2Cstatistics
Writing data to raw/Anti-Hero-20221028-2000.json
Fetching https://www.googleapis.com/youtube/v3/videos?id=b7QlX3yR2xs&key=AIzaSyC05vieJOuBZqdz0agcxYWfPfasr3WodDk&part=snippet%2Cstatistics
Writing data to raw/Bejeweled-20221028-2000.json
Fetching https://www.googleapis.com/youtube/v3/videos?id=CiwMDFh_Rog&key=AIzaSyC05vieJOuBZqdz0agcxYWfPfasr3WodDk&part=snippet%2Cstatistics
Writing data to raw/Music For a Sushi Restaurant-20221028-2000.json
Fetching https://www.googleapis.com/youtube/v3/videos?id=jDvYDzFOK9A&key=AIzaSyC05vieJOuBZqdz0agcxYWfPfasr3WodDk&part=snippet%2Cstatistics
Writing data to raw/I Drink Wine-20221028-2000.json
Fetching https://www.googleapis.com/youtube/v3/videos?id=j5y6xLpRwx4&key=AIzaSyC05vieJOuBZqdz0agcxYWfPfasr3WodDk&part=snippet%2Cstatistics
Writing data to raw/Monotonía-20221028-2000.json
Fetching https://www.googleapis.com/yo