In [181]:
import pandas as pd
import json
import glob

In [182]:
def add_country_column(csv_files , df):

    for file in csv_files:
        temp = pd.read_csv(file, encoding="latin1")
        temp['country'] = file[10:12]
        df = pd.concat([df, temp], ignore_index=True)
    
    return df

In [183]:
def add_trending_weeek_day_column(df):
    
    df['trending_weeek_day'] = df['trand_date'].dt.day_name()

    return df

In [184]:
# 1. **Unir todos los datasets de videos (CSVs) de diferentes regiones en un solo DataFrame**:

def merge_all_csv_files(csv_route, df):

    csv_files = glob.glob(csv_route)
    df_final = add_country_column(csv_files, df)
    
    return df_final

In [185]:
# 2. **Asignar el nombre de la categoría al dataset principal**:

def add_category_column(df, json_route):
    
    categories = json.load(open(json_route))
    categories = categories.get('items')

    categories_dict = {int(category['id']): category['snippet']['title'] for category in categories}
    
    df['category_name'] = df['category_id'].map(categories_dict)

    return df


In [186]:
# 3. **Calcular cuántos días pasaron desde la fecha de publicación del video ('publish_time') 
#   hasta la fecha en que el video empezó a ser trending ('trending_date')**:

def add_days_to_trend_column(df):
    df['trand_date'] = pd.to_datetime(df['trending_date'], format='%y.%d.%m')
    df['publish_date'] = pd.to_datetime(df['publish_time'], format='%Y-%m-%dT%H:%M:%S.%fZ')

    df['days_to_trend'] = (df['trand_date'] - df['publish_date']).dt.days
    df['days_to_trend'] = df['days_to_trend'].replace(-1, 0)
    return df

In [187]:
# 3. **Determinar la cantidad de trendings por día de la semana por País.**

def get_trending_counts_by_country(df):
    
    final_df = add_trending_weeek_day_column(df)
    final_df = final_df.pivot_table(index='trending_weeek_day', columns='country', values='title', aggfunc='count')

    return final_df

In [188]:
# 4. **Hacer un calendario (date, no datetime) de videos publicados por país.
# Deben estar TODAS las fechas desde ma mínima del dataset hasta la máxima.**

def get_publications_by_day(df):
    
    calendar = df.pivot_table(index='publish_date', columns='country', values='video_id', aggfunc='count')
    date_index = pd.date_range(start=calendar.index.min(), end=calendar.index.max(), freq='D')
    calendar = calendar.reindex(date_index).fillna(0)
    calendar

    return calendar

In [189]:
# 5. **Contar el número total de tags por video.**:
def add_tags_count_column(df):
    
    df['tags_count'] = df['tags'].str.split('|').apply(len)

    return df

In [190]:
# 6. **Identificar los tags más comunes entre todos los videos.**:

def get_common_tags(df):
    
    tags = df['tags'].str.split('|').explode()
    common_tags = tags.value_counts().head()

    return common_tags

In [191]:
# 7. **Calcular el número promedio de tags en videos que se volvieron trending en menos de 3 días después de su publicación.**:
def get_trending_videos_average_tags(df):
    
    trending_filtered = df[(df['days_to_trend'] <= 3) & (df['tags_count'] > 0)]
    trending_average = trending_filtered['tags_count'].mean()

    return trending_average

In [207]:
# 8. **Calcular el porcentaje de videos por categoría
# en cada país y comparar la popularidad de las categorías entre diferentes regiones**

def get_porcentage_by_category(df):
    
    category_percentage = df.pivot_table(index='category_name', columns='country', values='video_id', aggfunc='count')
    category_percentage = category_percentage.fillna(0)

    total_videos = category_percentage.sum()
    category_percentage = category_percentage.div(total_videos) * 100
    category_percentage = category_percentage.round(2)

    return category_percentage

In [193]:
# 9. **Determinar qué categorías tienen el mayor y menor engagement promedio.**:
#   - Engagement Rate: `(likes - dislikes) / views`.

def get_engagement_rate(df):
    
    df['engagement_rate'] = (df['likes'] - df['dislikes']) / df['views']
    engagement = df.groupby('category_name')['engagement_rate'].mean()
    engagement.sort_values(ascending=False)

    return engagement

In [211]:
# 10. **Analizar si existe una correlación entre el día de la semana de
# publicación y la velocidad con la que un video se vuelve trending.**

def get_trending_by_day(df):
    
    df['trending_weeek_day'] = df['trand_date'].dt.day_name()
    df['trending_weeek_day'] = pd.Categorical(df['trending_weeek_day'], categories=['Monday', 'Tuesday', 'Wednesday',
    'Thursday', 'Friday', 'Saturday', 'Sunday'], ordered=True)
    df['days_to_trend'] = df['days_to_trend'].replace(0, 1)
    df_final = df.groupby('trending_weeek_day')['days_to_trend'].mean()

    return df_final

In [195]:
# 11. **Comparar el engagement de videos con comentarios y ratings habilitados versus deshabilitados.**

def get_engagement_videos_by_rating(df):
    
    df['comments_disabled'] = df['comments_disabled'].astype(int)
    df['ratings_disabled'] = df['ratings_disabled'].astype(int)
    df['engagement_rate'] = (df['likes'] - df['dislikes']) / df['views']
    engagement = df.groupby(['comments_disabled', 'ratings_disabled'])['engagement_rate'].mean()

    return engagement

## Functions call

In [196]:
df_final = pd.DataFrame()
csv_route = "./sources/*.csv"
json_route = "./sources/CA_category_id.json"

In [197]:
df_final = merge_all_csv_files(csv_route, df_final)
df_final.head()

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description,country
0,n1WpP7iowLc,17.14.11,Eminem - Walk On Water (Audio) ft. BeyoncÃ©,EminemVEVO,10,2017-11-10T17:00:03.000Z,"Eminem|""Walk""|""On""|""Water""|""Aftermath/Shady/In...",17158579,787425,43420,125882,https://i.ytimg.com/vi/n1WpP7iowLc/default.jpg,False,False,False,Eminem's new track Walk on Water ft. BeyoncÃ© ...,CA
1,0dBIkQ4Mz1M,17.14.11,PLUSH - Bad Unboxing Fan Mail,iDubbbzTV,23,2017-11-13T17:00:00.000Z,"plush|""bad unboxing""|""unboxing""|""fan mail""|""id...",1014651,127794,1688,13030,https://i.ytimg.com/vi/0dBIkQ4Mz1M/default.jpg,False,False,False,STill got a lot of packages. Probably will las...,CA
2,5qpjK5DgCt4,17.14.11,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,2017-11-12T19:05:24.000Z,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146035,5339,8181,https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpg,False,False,False,WATCH MY PREVIOUS VIDEO â¶ \n\nSUBSCRIBE âº ...,CA
3,d380meD0W0M,17.14.11,I Dare You: GOING BALD!?,nigahiga,24,2017-11-12T18:01:41.000Z,"ryan|""higa""|""higatv""|""nigahiga""|""i dare you""|""...",2095828,132239,1989,17518,https://i.ytimg.com/vi/d380meD0W0M/default.jpg,False,False,False,I know it's been a while since we did this sho...,CA
4,2Vv-BfVoq4g,17.14.11,Ed Sheeran - Perfect (Official Music Video),Ed Sheeran,10,2017-11-09T11:04:14.000Z,"edsheeran|""ed sheeran""|""acoustic""|""live""|""cove...",33523622,1634130,21082,85067,https://i.ytimg.com/vi/2Vv-BfVoq4g/default.jpg,False,False,False,ð§: https://ad.gt/yt-perfect\nð°: https://...,CA


In [198]:
add_category = add_category_column(df_final, json_route)
df_final.head()

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description,country,category_name
0,n1WpP7iowLc,17.14.11,Eminem - Walk On Water (Audio) ft. BeyoncÃ©,EminemVEVO,10,2017-11-10T17:00:03.000Z,"Eminem|""Walk""|""On""|""Water""|""Aftermath/Shady/In...",17158579,787425,43420,125882,https://i.ytimg.com/vi/n1WpP7iowLc/default.jpg,False,False,False,Eminem's new track Walk on Water ft. BeyoncÃ© ...,CA,Music
1,0dBIkQ4Mz1M,17.14.11,PLUSH - Bad Unboxing Fan Mail,iDubbbzTV,23,2017-11-13T17:00:00.000Z,"plush|""bad unboxing""|""unboxing""|""fan mail""|""id...",1014651,127794,1688,13030,https://i.ytimg.com/vi/0dBIkQ4Mz1M/default.jpg,False,False,False,STill got a lot of packages. Probably will las...,CA,Comedy
2,5qpjK5DgCt4,17.14.11,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,2017-11-12T19:05:24.000Z,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146035,5339,8181,https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpg,False,False,False,WATCH MY PREVIOUS VIDEO â¶ \n\nSUBSCRIBE âº ...,CA,Comedy
3,d380meD0W0M,17.14.11,I Dare You: GOING BALD!?,nigahiga,24,2017-11-12T18:01:41.000Z,"ryan|""higa""|""higatv""|""nigahiga""|""i dare you""|""...",2095828,132239,1989,17518,https://i.ytimg.com/vi/d380meD0W0M/default.jpg,False,False,False,I know it's been a while since we did this sho...,CA,Entertainment
4,2Vv-BfVoq4g,17.14.11,Ed Sheeran - Perfect (Official Music Video),Ed Sheeran,10,2017-11-09T11:04:14.000Z,"edsheeran|""ed sheeran""|""acoustic""|""live""|""cove...",33523622,1634130,21082,85067,https://i.ytimg.com/vi/2Vv-BfVoq4g/default.jpg,False,False,False,ð§: https://ad.gt/yt-perfect\nð°: https://...,CA,Music


In [199]:
df_final = add_days_to_trend_column(df_final)
df_final.head()

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,...,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description,country,category_name,trand_date,publish_date,days_to_trend
0,n1WpP7iowLc,17.14.11,Eminem - Walk On Water (Audio) ft. BeyoncÃ©,EminemVEVO,10,2017-11-10T17:00:03.000Z,"Eminem|""Walk""|""On""|""Water""|""Aftermath/Shady/In...",17158579,787425,43420,...,https://i.ytimg.com/vi/n1WpP7iowLc/default.jpg,False,False,False,Eminem's new track Walk on Water ft. BeyoncÃ© ...,CA,Music,2017-11-14,2017-11-10 17:00:03,3
1,0dBIkQ4Mz1M,17.14.11,PLUSH - Bad Unboxing Fan Mail,iDubbbzTV,23,2017-11-13T17:00:00.000Z,"plush|""bad unboxing""|""unboxing""|""fan mail""|""id...",1014651,127794,1688,...,https://i.ytimg.com/vi/0dBIkQ4Mz1M/default.jpg,False,False,False,STill got a lot of packages. Probably will las...,CA,Comedy,2017-11-14,2017-11-13 17:00:00,0
2,5qpjK5DgCt4,17.14.11,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,2017-11-12T19:05:24.000Z,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146035,5339,...,https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpg,False,False,False,WATCH MY PREVIOUS VIDEO â¶ \n\nSUBSCRIBE âº ...,CA,Comedy,2017-11-14,2017-11-12 19:05:24,1
3,d380meD0W0M,17.14.11,I Dare You: GOING BALD!?,nigahiga,24,2017-11-12T18:01:41.000Z,"ryan|""higa""|""higatv""|""nigahiga""|""i dare you""|""...",2095828,132239,1989,...,https://i.ytimg.com/vi/d380meD0W0M/default.jpg,False,False,False,I know it's been a while since we did this sho...,CA,Entertainment,2017-11-14,2017-11-12 18:01:41,1
4,2Vv-BfVoq4g,17.14.11,Ed Sheeran - Perfect (Official Music Video),Ed Sheeran,10,2017-11-09T11:04:14.000Z,"edsheeran|""ed sheeran""|""acoustic""|""live""|""cove...",33523622,1634130,21082,...,https://i.ytimg.com/vi/2Vv-BfVoq4g/default.jpg,False,False,False,ð§: https://ad.gt/yt-perfect\nð°: https://...,CA,Music,2017-11-14,2017-11-09 11:04:14,4


In [200]:
get_trending_counts_by_country = get_trending_counts_by_country(df_final)
get_trending_counts_by_country

country,CA,DE,FR,GB,IN,JP,KR,MX,RU,US
trending_weeek_day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Friday,5787,5791,5773,5528,5289,2970,4955,5735,5785,5793
Monday,5759,5747,5775,5471,5264,2937,4892,5698,5772,5793
Saturday,5990,5993,5980,5739,5617,3022,5055,5925,5947,5993
Sunday,5794,5767,5730,5483,5356,2804,4810,5745,5732,5794
Thursday,5783,5780,5749,5489,5182,2868,4818,5709,5783,5793
Tuesday,5988,5986,5978,5674,5476,2953,5116,5943,5953,5992
Wednesday,5780,5776,5739,5532,5168,2969,4921,5696,5767,5791


In [201]:
get_publications_by_day = get_publications_by_day(df_final)
get_publications_by_day

country,CA,DE,FR,GB,IN,JP,KR,MX,RU,US
2006-07-23 08:24:11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2006-07-24 08:24:11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2006-07-25 08:24:11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2006-07-26 08:24:11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2006-07-27 08:24:11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
2018-06-09 08:24:11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2018-06-10 08:24:11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2018-06-11 08:24:11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2018-06-12 08:24:11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [202]:
df_final = add_tags_count_column(df_final)
df_final.head()

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,...,ratings_disabled,video_error_or_removed,description,country,category_name,trand_date,publish_date,days_to_trend,trending_weeek_day,tags_count
0,n1WpP7iowLc,17.14.11,Eminem - Walk On Water (Audio) ft. BeyoncÃ©,EminemVEVO,10,2017-11-10T17:00:03.000Z,"Eminem|""Walk""|""On""|""Water""|""Aftermath/Shady/In...",17158579,787425,43420,...,False,False,Eminem's new track Walk on Water ft. BeyoncÃ© ...,CA,Music,2017-11-14,2017-11-10 17:00:03,3,Tuesday,6
1,0dBIkQ4Mz1M,17.14.11,PLUSH - Bad Unboxing Fan Mail,iDubbbzTV,23,2017-11-13T17:00:00.000Z,"plush|""bad unboxing""|""unboxing""|""fan mail""|""id...",1014651,127794,1688,...,False,False,STill got a lot of packages. Probably will las...,CA,Comedy,2017-11-14,2017-11-13 17:00:00,0,Tuesday,11
2,5qpjK5DgCt4,17.14.11,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,2017-11-12T19:05:24.000Z,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146035,5339,...,False,False,WATCH MY PREVIOUS VIDEO â¶ \n\nSUBSCRIBE âº ...,CA,Comedy,2017-11-14,2017-11-12 19:05:24,1,Tuesday,23
3,d380meD0W0M,17.14.11,I Dare You: GOING BALD!?,nigahiga,24,2017-11-12T18:01:41.000Z,"ryan|""higa""|""higatv""|""nigahiga""|""i dare you""|""...",2095828,132239,1989,...,False,False,I know it's been a while since we did this sho...,CA,Entertainment,2017-11-14,2017-11-12 18:01:41,1,Tuesday,14
4,2Vv-BfVoq4g,17.14.11,Ed Sheeran - Perfect (Official Music Video),Ed Sheeran,10,2017-11-09T11:04:14.000Z,"edsheeran|""ed sheeran""|""acoustic""|""live""|""cove...",33523622,1634130,21082,...,False,False,ð§: https://ad.gt/yt-perfect\nð°: https://...,CA,Music,2017-11-14,2017-11-09 11:04:14,4,Tuesday,10


In [204]:
get_common_tags = get_common_tags(df_final)
get_common_tags

tags
[none]      37698
"funny"     14969
"comedy"    11967
"2018"      11047
"news"       6004
Name: count, dtype: int64

In [205]:
get_trending_videos_average_tags = get_trending_videos_average_tags(df_final)
get_trending_videos_average_tags

np.float64(17.091201124008112)

In [208]:
get_porcentage_by_category = get_porcentage_by_category(df_final)
get_porcentage_by_category

country,CA,DE,FR,GB,IN,JP,KR,MX,RU,US
category_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Autos & Vehicles,0.87,2.15,1.66,0.37,0.19,1.37,0.35,0.63,4.04,0.94
Comedy,9.25,6.24,10.69,4.71,9.21,3.62,6.0,4.33,7.82,8.45
Education,2.43,2.08,1.89,1.18,3.29,0.55,1.42,1.32,1.82,4.05
Entertainment,32.96,37.68,24.18,23.5,44.87,30.52,26.12,33.55,15.16,24.37
Film & Animation,5.05,5.85,5.31,6.64,4.45,5.95,6.42,3.23,7.76,5.73
Gaming,3.29,3.86,3.59,4.61,0.18,5.02,4.06,2.47,2.66,2.0
Howto & Style,4.92,4.3,5.81,4.97,2.27,3.9,1.63,6.14,5.1,10.14
Movies,0.01,0.0,0.03,0.0,0.04,0.0,0.0,0.0,0.0,0.0
Music,9.14,5.84,9.72,35.42,10.36,6.29,5.32,8.39,4.83,15.83
News & Politics,10.19,7.23,9.24,3.16,14.07,6.79,22.12,7.74,13.78,6.08


In [209]:
get_engagement_rate = get_engagement_rate(df_final)
get_engagement_rate

category_name
Autos & Vehicles        0.040297
Comedy                  0.048277
Education               0.048281
Entertainment           0.028358
Film & Animation        0.026601
Gaming                  0.045026
Howto & Style           0.051673
Movies                  0.016326
Music                   0.046193
News & Politics         0.017935
People & Blogs          0.032106
Pets & Animals          0.042088
Science & Technology    0.045981
Shows                   0.009829
Sports                  0.019822
Trailers                0.006150
Travel & Events         0.036305
Name: engagement_rate, dtype: float64

In [212]:
get_trending_by_day = get_trending_by_day(df_final)
get_trending_by_day

  df_final = df.groupby('trending_weeek_day')['days_to_trend'].mean()


trending_weeek_day
Monday       7.691515
Tuesday      7.811312
Wednesday    7.118294
Thursday     6.843468
Friday       6.492435
Saturday     6.570873
Sunday       6.930020
Name: days_to_trend, dtype: float64

In [213]:
get_engagement_videos_by_rating = get_engagement_videos_by_rating(df_final)
get_engagement_videos_by_rating

comments_disabled  ratings_disabled
0                  0                   0.034477
                   1                   0.000000
1                  0                   0.013852
                   1                   0.000000
Name: engagement_rate, dtype: float64