### **1. Importing Libraries**

In [1]:
import pandas as pd
import numpy as np

### **2. Importing Data**

In [2]:
df = pd.read_json("datasets/CLEANED_videos_count_data.json")

In [3]:
df = df.T

In [4]:
display(df)

Unnamed: 0,id,title,kind,viewCount,likeCount,commentCount,date,time,artist
0,YudHcBIxlYw,JISOO - ‘꽃(FLOWER)’ M\/V,Music Video,390657394,10117824,1340919,2023-09-19,17:01:02.392672,JISOO
1,POe9SOEKotk,BLACKPINK - ‘Shut Down’ M\/V,Music Video,522252772,10824498,2239017,2023-09-19,17:01:02.392707,BLACKPINK
2,gQlMMD8auMs,BLACKPINK - ‘Pink Venom’ M\/V,Music Video,731994719,16069895,3569773,2023-09-19,17:01:02.392717,BLACKPINK
3,awkkyBH2zEo,LISA - 'LALISA' M\/V,Music Video,653808793,17313679,2815724,2023-09-19,17:01:02.392725,LISA
4,K9_VFxzCuQ0,ROSÉ - 'Gone' M\/V,Music Video,267505076,7376149,1240229,2023-09-19,17:01:02.392734,ROSÉ
...,...,...,...,...,...,...,...,...,...
32429,EKHdMwRaU60,BLACKPINK​ - '붐바야(BOOMBAYAH)' 0828 SBS Inkigayo,Performance,61196115,831425,23097,2024-06-10,01:02:16.196168,BLACKPINK
32430,metZ_f8aqC0,BLACKPINK​ - '휘파람(WHISTLE)' 0821 SBS Inkigayo,Performance,42928615,904399,26175,2024-06-10,01:02:16.196170,BLACKPINK
32431,RGmL76BBGZk,BLACKPINK​ - '붐바야(BOOMBAYAH)' 0821 SBS Inkigay...,Performance,17439572,548481,14709,2024-06-10,01:02:16.306414,BLACKPINK
32432,vAqAp1tJnkc,BLACKPINK - '휘파람’(WHISTLE) 0814 SBS Inkigayo,Performance,48650666,864791,29349,2024-06-10,01:02:16.306423,BLACKPINK


### **3. Data Information**

#### **Data dimensions**: 


In [5]:
df.shape

(32434, 9)

#### **Details for each row:**

The information of one video, including id, title, artist, views, likes,...

In [6]:
df.head(1)

Unnamed: 0,id,title,kind,viewCount,likeCount,commentCount,date,time,artist
0,YudHcBIxlYw,JISOO - ‘꽃(FLOWER)’ M\/V,Music Video,390657394,10117824,1340919,2023-09-19,17:01:02.392672,JISOO


#### **Details for each column:**

| Column       | Description                                                                                     |
|--------------|-------------------------------------------------------------------------------------------------|
| id           | The unique identifier for the video on YouTube.                                                 |
| title        | The title of the video.                                                                         |
| kind         | The type or category of the video content.                                                      |
| viewCount    | The total number of views the video has received on YouTube.                                     |
| likeCount    | The total number of likes the video has received on YouTube.                                     |
| commentCount | The total number of comments posted on the video on YouTube.                                     |
| date         | The date when the data was collected or the statistics were obtained.                            |
| time         | The time when the data was collected or the statistics were obtained (if applicable).            |
| artist       | The artist or creator associated with the video content.                                         |


#### **Data types of each column:**

In [7]:
df.dtypes

id              object
title           object
kind            object
viewCount       object
likeCount       object
commentCount    object
date            object
time            object
artist          object
dtype: object

Convert the `viewCount`, `likeCount`, `commentCount` to int:

In [8]:
df['viewCount'] = df['viewCount'].astype(np.int64)
df['likeCount'] = df['likeCount'].astype(np.int64)
df['commentCount'] = df['commentCount'].astype(np.int64)

Convert the `date` column to datetime type:

In [9]:
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')

In [10]:
#df['time'] = pd.to_datetime(df['time'], format='%H:%M:%S.%f').dt.time

#### **Number of missing values in each column:**

In [11]:
df.isnull().sum()

id              0
title           0
kind            0
viewCount       0
likeCount       0
commentCount    0
date            0
time            0
artist          0
dtype: int64

#### **Investigating missing data days**

In [12]:
all_dates = pd.date_range(start=df['date'].min(), end=df['date'].max(), freq='D')
missing_dates = all_dates[~all_dates.isin(df['date'])]
print("Missing dates:")
if len(missing_dates) > 0:
    print(missing_dates)

Missing dates:


#### **Date of First Data Collection? Date of Last Data Collection?**

In [13]:
print("Date of First Data Collection:", df['date'].dt.date.min())
print("Date of Last Data Collection:", df['date'].dt.date.max())

Date of First Data Collection: 2023-09-19
Date of Last Data Collection: 2024-06-10


#### **The number of distinct videos in the dataset:**

In [14]:
df.id.nunique()

122

#### **Number of videos for each video types:**

In [15]:
kind_video_counts = df.drop_duplicates(subset=['kind', 'id']).groupby('kind').size().reset_index(name='count').sort_values(by='count',ascending=False)
kind_video_counts['percentage'] = round(kind_video_counts['count'] / kind_video_counts['count'].sum() * 100,2)
display(kind_video_counts)

Unnamed: 0,kind,count,percentage
2,Performance,87,70.16
0,Dance Practice Video,20,16.13
1,Music Video,17,13.71


#### **Number of videos for each artist:**

In [16]:
artist_video_counts = df.drop_duplicates(subset=['artist', 'id']).groupby('artist').size().reset_index(name='count').sort_values(by='count',ascending=False)
artist_video_counts['percentage'] = round(artist_video_counts['count'] / artist_video_counts['count'].sum() * 100,2)
display(artist_video_counts)

Unnamed: 0,artist,count,percentage
0,BLACKPINK,95,77.87
1,JENNIE,11,9.02
4,ROSÉ,8,6.56
3,LISA,6,4.92
2,JISOO,2,1.64


In [17]:
artist_video_counts_2 = df.drop_duplicates(subset=['artist','id','kind']).groupby(['artist','kind']).size().reset_index(name='count').sort_values(by=['count'],ascending=False).sort_values(by=['artist'])
display(artist_video_counts_2)

Unnamed: 0,artist,kind,count
2,BLACKPINK,Performance,71
0,BLACKPINK,Dance Practice Video,12
1,BLACKPINK,Music Video,12
5,JENNIE,Performance,6
3,JENNIE,Dance Practice Video,4
4,JENNIE,Music Video,1
6,JISOO,Dance Practice Video,1
7,JISOO,Music Video,1
8,JISOO,Performance,1
11,LISA,Performance,3


#### **Most Viewed Video by Each Video types:**

In [18]:
most_viewed_videos = df.loc[df.groupby('kind')['viewCount'].idxmax()].reset_index()
most_viewed_videos['link'] = f'https://www.youtube.com/watch?v=' + most_viewed_videos['id']
print("Most Viewed Video by Each Video types:")
display(most_viewed_videos[['kind', 'title', 'viewCount','link']])

Most Viewed Video by Each Video types:


Unnamed: 0,kind,title,viewCount,link
0,Dance Practice Video,BLACKPINK - 'How You Like That' DANCE PERFORMA...,1641649218,https://www.youtube.com/watch?v=32si5cfrCNc
1,Music Video,BLACKPINK - ‘뚜두뚜두 (DDU-DU DDU-DU)’ M\/V,2204229993,https://www.youtube.com/watch?v=IHNzOHi8sJs
2,Performance,LISA - 'MONEY' EXCLUSIVE PERFORMANCE VIDEO,1005306778,https://www.youtube.com/watch?v=dNCWe_6HAM8


#### **Most Viewed Video by Each Artist**

In [19]:
most_viewed_videos = df.loc[df.groupby('artist')['viewCount'].idxmax()].reset_index()
most_viewed_videos['link'] = f'https://www.youtube.com/watch?v=' + most_viewed_videos['id']
print("Most Viewed Video by Each Artist:")
display(most_viewed_videos[['artist', 'title', 'viewCount','link']])

Most Viewed Video by Each Artist:


Unnamed: 0,artist,title,viewCount,link
0,BLACKPINK,BLACKPINK - ‘뚜두뚜두 (DDU-DU DDU-DU)’ M\/V,2204229993,https://www.youtube.com/watch?v=IHNzOHi8sJs
1,JENNIE,JENNIE - 'SOLO' M\/V,1014192986,https://www.youtube.com/watch?v=b73BI9eUkjM
2,JISOO,JISOO - ‘꽃(FLOWER)’ M\/V,509564275,https://www.youtube.com/watch?v=YudHcBIxlYw
3,LISA,LISA - 'MONEY' EXCLUSIVE PERFORMANCE VIDEO,1005306778,https://www.youtube.com/watch?v=dNCWe_6HAM8
4,ROSÉ,ROSÉ - 'On The Ground' M\/V,362002502,https://www.youtube.com/watch?v=CKZvWhCqx1s


#### **Top 10 Videos with the Highest Number of Views**

In [20]:
latest_view_per_video = df.drop_duplicates(subset='id', keep='last')
top_10_viewed_videos = latest_view_per_video.nlargest(10, 'viewCount').reset_index()
top_10_viewed_videos['link'] = f'https://www.youtube.com/watch?v=' + top_10_viewed_videos['id']
print("Top 10 Videos with the Highest Number of Views:")
display(top_10_viewed_videos[['title', 'kind','artist','viewCount','link']])

Top 10 Videos with the Highest Number of Views:


Unnamed: 0,title,kind,artist,viewCount,link
0,BLACKPINK - ‘뚜두뚜두 (DDU-DU DDU-DU)’ M\/V,Music Video,BLACKPINK,2204229993,https://www.youtube.com/watch?v=IHNzOHi8sJs
1,BLACKPINK - 'Kill This Love' M\/V,Music Video,BLACKPINK,1968035875,https://www.youtube.com/watch?v=2S24-y0Ij3Y
2,BLACKPINK - '붐바야 (BOOMBAYAH)' M\/V,Music Video,BLACKPINK,1702232543,https://www.youtube.com/watch?v=bwmSjveL3Lc
3,BLACKPINK - 'How You Like That' DANCE PERFORMA...,Dance Practice Video,BLACKPINK,1641649218,https://www.youtube.com/watch?v=32si5cfrCNc
4,BLACKPINK - '마지막처럼 (AS IF IT'S YOUR LAST)' M\/V,Music Video,BLACKPINK,1379603571,https://www.youtube.com/watch?v=Amq-qlqbjYA
5,BLACKPINK - 'How You Like That' M\/V,Music Video,BLACKPINK,1285610241,https://www.youtube.com/watch?v=ioNng23DkIM
6,JENNIE - 'SOLO' M\/V,Music Video,JENNIE,1014192986,https://www.youtube.com/watch?v=b73BI9eUkjM
7,LISA - 'MONEY' EXCLUSIVE PERFORMANCE VIDEO,Performance,LISA,1005306778,https://www.youtube.com/watch?v=dNCWe_6HAM8
8,BLACKPINK - 'Ice Cream (with Selena Gomez)' M\/V,Music Video,BLACKPINK,909154949,https://www.youtube.com/watch?v=vRXZj0DzXIA
9,BLACKPINK - '휘파람 (WHISTLE)' M\/V,Music Video,BLACKPINK,891906696,https://www.youtube.com/watch?v=dISNgvVpWlo


#### **Top 10 Videos with the Highest Number of Likes**

In [21]:
latest_like_per_video = df.drop_duplicates(subset='id', keep='last')
top_10_liked_videos = latest_like_per_video.nlargest(10, 'likeCount').reset_index()
top_10_liked_videos['link'] = f'https://www.youtube.com/watch?v=' + top_10_viewed_videos['id']
print("Top 10 Videos with the Highest Number of Likes:")
display(top_10_liked_videos[['title', 'kind','artist','likeCount','link']])

Top 10 Videos with the Highest Number of Likes:


Unnamed: 0,title,kind,artist,likeCount,link
0,BLACKPINK - 'Kill This Love' M\/V,Music Video,BLACKPINK,25508417,https://www.youtube.com/watch?v=IHNzOHi8sJs
1,BLACKPINK - 'How You Like That' M\/V,Music Video,BLACKPINK,24942515,https://www.youtube.com/watch?v=2S24-y0Ij3Y
2,BLACKPINK - ‘뚜두뚜두 (DDU-DU DDU-DU)’ M\/V,Music Video,BLACKPINK,23885252,https://www.youtube.com/watch?v=bwmSjveL3Lc
3,BLACKPINK - 'Ice Cream (with Selena Gomez)' M\/V,Music Video,BLACKPINK,20112040,https://www.youtube.com/watch?v=32si5cfrCNc
4,BLACKPINK - 'How You Like That' DANCE PERFORMA...,Dance Practice Video,BLACKPINK,18515351,https://www.youtube.com/watch?v=Amq-qlqbjYA
5,LISA - 'LALISA' M\/V,Music Video,LISA,17516444,https://www.youtube.com/watch?v=ioNng23DkIM
6,BLACKPINK - ‘Pink Venom’ M\/V,Music Video,BLACKPINK,16601043,https://www.youtube.com/watch?v=b73BI9eUkjM
7,BLACKPINK - '붐바야 (BOOMBAYAH)' M\/V,Music Video,BLACKPINK,16550369,https://www.youtube.com/watch?v=dNCWe_6HAM8
8,BLACKPINK - 'Lovesick Girls' M\/V,Music Video,BLACKPINK,15719474,https://www.youtube.com/watch?v=vRXZj0DzXIA
9,JENNIE - 'SOLO' M\/V,Music Video,JENNIE,14978218,https://www.youtube.com/watch?v=dISNgvVpWlo


#### **Top 10 Videos with the Highest Number of Comments**

In [22]:
latest_cmt_per_video = df.drop_duplicates(subset='id', keep='last')
top_10_cmt_videos = latest_cmt_per_video.nlargest(10, 'commentCount').reset_index()
top_10_cmt_videos['link'] = f'https://www.youtube.com/watch?v=' + top_10_viewed_videos['id']
print("Top 10 Videos with the Highest Number of Comments:")
display(top_10_cmt_videos[['title', 'kind','artist','commentCount','link']])

Top 10 Videos with the Highest Number of Comments:


Unnamed: 0,title,kind,artist,commentCount,link
0,BLACKPINK - 'How You Like That' M\/V,Music Video,BLACKPINK,5070986,https://www.youtube.com/watch?v=IHNzOHi8sJs
1,BLACKPINK - ‘Pink Venom’ M\/V,Music Video,BLACKPINK,3518527,https://www.youtube.com/watch?v=2S24-y0Ij3Y
2,JENNIE - 'SOLO' M\/V,Music Video,JENNIE,3398250,https://www.youtube.com/watch?v=bwmSjveL3Lc
3,BLACKPINK - ‘뚜두뚜두 (DDU-DU DDU-DU)’ M\/V,Music Video,BLACKPINK,3268615,https://www.youtube.com/watch?v=32si5cfrCNc
4,BLACKPINK - 'Ice Cream (with Selena Gomez)' M\/V,Music Video,BLACKPINK,3017486,https://www.youtube.com/watch?v=Amq-qlqbjYA
5,LISA - 'LALISA' M\/V,Music Video,LISA,2808230,https://www.youtube.com/watch?v=ioNng23DkIM
6,ROSÉ - 'On The Ground' M\/V,Music Video,ROSÉ,2598761,https://www.youtube.com/watch?v=b73BI9eUkjM
7,BLACKPINK - 'Kill This Love' M\/V,Music Video,BLACKPINK,2487166,https://www.youtube.com/watch?v=dNCWe_6HAM8
8,BLACKPINK - ‘Shut Down’ M\/V,Music Video,BLACKPINK,2121609,https://www.youtube.com/watch?v=vRXZj0DzXIA
9,BLACKPINK - 'Lovesick Girls' M\/V,Music Video,BLACKPINK,2033394,https://www.youtube.com/watch?v=dISNgvVpWlo


#### **Views Growth in 24 Hours:**

In [23]:
latest_date_data = df[df['date'] == df['date'].max()]
previous_date_data = df[df['date'] == (df['date'].max() - pd.Timedelta(days=1))]
merged_data = pd.merge(latest_date_data, previous_date_data, on=['title'], suffixes=('_latest', '_previous'), how='inner')
merged_data['viewCount_increase'] = merged_data['viewCount_latest'] - merged_data['viewCount_previous']

top_10_increase_videos = merged_data.nlargest(10, 'viewCount_increase').reset_index()
top_10_increase_videos['link'] = f'https://www.youtube.com/watch?v=' + top_10_increase_videos['id_latest']
print("Top 10 Videos with the Highest Increase in Views from the Previous Day:")
display(top_10_increase_videos[['title','date_latest','viewCount_increase','viewCount_latest','link']])

Top 10 Videos with the Highest Increase in Views from the Previous Day:


Unnamed: 0,title,date_latest,viewCount_increase,viewCount_latest,link
0,BLACKPINK - 'How You Like That' DANCE PERFORMA...,2024-06-10,628549,1641649218,https://www.youtube.com/watch?v=32si5cfrCNc
1,BLACKPINK - ‘Pink Venom’ M\/V,2024-06-10,427221,856407261,https://www.youtube.com/watch?v=gQlMMD8auMs
2,BLACKPINK - 'Kill This Love' M\/V,2024-06-10,376647,1968035875,https://www.youtube.com/watch?v=2S24-y0Ij3Y
3,BLACKPINK - ‘Shut Down’ M\/V,2024-06-10,368888,630144980,https://www.youtube.com/watch?v=POe9SOEKotk
4,JISOO - ‘꽃(FLOWER)’ M\/V,2024-06-10,344301,509564275,https://www.youtube.com/watch?v=YudHcBIxlYw
5,LISA - 'MONEY' EXCLUSIVE PERFORMANCE VIDEO,2024-06-10,323419,1005306778,https://www.youtube.com/watch?v=dNCWe_6HAM8
6,BLACKPINK - ‘뚜두뚜두 (DDU-DU DDU-DU)’ M\/V,2024-06-10,320369,2204229993,https://www.youtube.com/watch?v=IHNzOHi8sJs
7,JENNIE - ‘You & Me’ DANCE PERFORMANCE VIDEO,2024-06-10,248598,155919069,https://www.youtube.com/watch?v=eQNHDV7lKgE
8,BLACKPINK - '붐바야 (BOOMBAYAH)' M\/V,2024-06-10,245709,1702232543,https://www.youtube.com/watch?v=bwmSjveL3Lc
9,BLACKPINK - '마지막처럼 (AS IF IT'S YOUR LAST)' M\/V,2024-06-10,214751,1379603571,https://www.youtube.com/watch?v=Amq-qlqbjYA
