In [8]:
import pandas as pd
import plotly.express as px
import folium

In [9]:
import sys
import os

# Add project root to Python path
sys.path.append(os.path.abspath(".."))

from src.preprocess import load_data, clean_data, feature_engineering

In [10]:
# --- Load and preprocess ---
path = r"C:\Users\a12u\OneDrive\Desktop\Courses\IBM Data Science\Data Visualization with python\media-consumption-analysis\data\raw\netflix_titles.csv"
df = load_data(path)
df = clean_data(df)
df = feature_engineering(df)


--- 1. Interactive bar chart: Top 10 genres ---

In [11]:

top_genres = df['primary_genre'].value_counts().head(10).reset_index()
top_genres.columns = ['Genre', 'Count']

fig = px.bar(
    top_genres,
    x='Count',
    y='Genre',
    orientation='h',
    color='Count',
    color_continuous_scale='Viridis',
    title="Top 10 Genres on Netflix (Interactive)"
)
fig.show()


--- 2. Timeline: Content added per year ---

In [12]:

yearly = df['year_added'].value_counts().sort_index().reset_index()
yearly.columns = ['Year', 'Count']

fig2 = px.line(
    yearly,
    x='Year',
    y='Count',
    markers=True,
    title="Content Added to Netflix Over Time"
)
fig2.show()


--- 3. Interactive map: Countries producing Netflix content ---

In [13]:
import pandas as pd

cities_df = pd.read_excel(r"C:\Users\a12u\OneDrive\Desktop\Courses\IBM Data Science\Data Visualization with python\media-consumption-analysis\data\worldcities.xlsx")
cities_df.head()

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
0,Tokyo,Tokyo,35.687,139.7495,Japan,JP,JPN,Tōkyō,primary,37785000.0,1392685764
1,Jakarta,Jakarta,-6.175,106.8275,Indonesia,ID,IDN,Jakarta,primary,33756000.0,1360771077
2,Delhi,Delhi,28.61,77.23,India,IN,IND,Delhi,admin,32226000.0,1356872604
3,Guangzhou,Guangzhou,23.13,113.26,China,CN,CHN,Guangdong,admin,26940000.0,1156237133
4,Mumbai,Mumbai,19.0761,72.8775,India,IN,IND,Mahārāshtra,admin,24973000.0,1356226629


In [14]:
capital_coords = (
    cities_df[cities_df['capital'] == 'primary']
    .groupby('country')[['lat', 'lng']]
    .mean()
    .round(4)
    .reset_index()
    .set_index('country')
    .T
    .to_dict()
)


In [15]:
world_map = folium.Map(location=[20, 0], zoom_start=2)

for country, count in df['country'].value_counts().items():
    if country in capital_coords:
        coords = [capital_coords[country]['lat'], capital_coords[country]['lng']]
        folium.CircleMarker(
            location=coords,
            radius=count / 50,
            popup=f"{country}: {count}",
            color='blue',
            fill=True,
            fill_color='blue'
        ).add_to(world_map)

world_map

In [16]:

world_map.save(r"C:\Users\a12u\OneDrive\Desktop\Courses\IBM Data Science\Data Visualization with python\media-consumption-analysis\notebooks\images\netflix_map.html")