## 1. Import Libraries and Load Data

In [5]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display
import warnings
warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:.2f}'.format)

print("Libraries imported successfully!")

Libraries imported successfully!


In [2]:
# Load the dataset
df = pd.read_csv('../../data/raw/Per-capita.csv')

# Display first few rows
print(f"Dataset Shape: {df.shape[0]:,} rows Ã— {df.shape[1]} columns")
print("\n" + "="*60)
print("First 5 rows of the dataset:")
print("="*60)
df.head()

Dataset Shape: 537 rows Ã— 39 columns

First 5 rows of the dataset:


Unnamed: 0,Country Name,Country Code,Series Name,Series Code,1990 [YR1990],1991 [YR1991],1992 [YR1992],1993 [YR1993],1994 [YR1994],1995 [YR1995],1996 [YR1996],1997 [YR1997],1998 [YR1998],1999 [YR1999],2000 [YR2000],2001 [YR2001],2002 [YR2002],2003 [YR2003],2004 [YR2004],2005 [YR2005],2006 [YR2006],2007 [YR2007],2008 [YR2008],2009 [YR2009],2010 [YR2010],2011 [YR2011],2012 [YR2012],2013 [YR2013],2014 [YR2014],2015 [YR2015],2016 [YR2016],2017 [YR2017],2018 [YR2018],2019 [YR2019],2020 [YR2020],2021 [YR2021],2022 [YR2022],2023 [YR2023],2024 [YR2024]
0,Afghanistan,AFG,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,..,..,..,..,..,..,..,..,..,..,308.318269746638,277.118051443941,338.139973643387,346.071627096223,338.637273888197,363.640141436773,367.75831169358,410.757728879097,417.647282647498,488.830652491949,542.871030476037,525.42698276582,568.929021458341,580.603833333096,575.146245808546,565.569730408751,563.872336723147,562.769574140988,553.125151688293,557.861533207459,527.834554499306,408.625855217403,377.665627080705,378.06630312259,..
1,Afghanistan,AFG,"Population, total",SP.POP.TOTL,12045660,12238879,13278974,14943172,16250794,17065836,17763266,18452091,19159996,19887785,20130327.0,20284307.0,21378117.0,22733049.0,23560654.0,24404567.0,25424094.0,25909852.0,26482622.0,27466101.0,28284089.0,29347708.0,30560034.0,31622704.0,32792523.0,33831764.0,34700612.0,35688935.0,36743039.0,37856121.0,39068979.0,40000412.0,40578842.0,41454761.0,42647492
2,Albania,ALB,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1682.8810874018,1218.96427317362,1138.23773998809,1254.67888414888,1367.22058963712,1558.96638565461,1683.76965547424,1495.90035328523,1630.43874131845,1841.80762760566,1991.91435867116,2188.91854451375,2297.10853513924,2428.68934653563,2567.29763687121,2712.86983830331,2894.36032580174,3105.8796756386,3345.98161761476,3459.24695646692,3579.82413181046,3677.89457937717,3720.22876474985,3790.68016321899,3883.63262828191,3981.72662261867,4143.98988316994,4283.98262744437,4452.23714652476,4563.4673625169,4437.65346927974,4880.72346162808,5178.88431481499,5444.93000074979,5726.02570530968
3,Albania,ALB,"Population, total",SP.POP.TOTL,3286542,3266790,3247039,3227287,3207536,3187784,3168033,3148281,3128530,3108778,3089027.0,3060173.0,3051010.0,3039616.0,3026939.0,3011487.0,2992547.0,2970017.0,2947314.0,2927519.0,2913021.0,2905195.0,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0,2837849.0,2811666.0,2777689.0,2745972.0,2714617
4,Algeria,DZA,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,3605.68701212608,3478.50976269176,3455.92936294162,3302.91950568634,3201.56914402566,3255.18726869561,3322.95538141474,3297.46603361265,3410.88254401644,3471.49196276497,3553.32420521991,3610.00605631983,3754.66081538446,3945.89816094007,4066.28204806495,4223.60252133943,4279.61547120827,4339.25055772831,4367.56538017541,4336.10054052949,4456.6102741643,4501.3543013352,4518.43978956464,4543.23444006767,4634.10149179952,4685.05902729002,4768.73140081027,4742.90075532529,4717.0035885361,4672.66408678635,4363.68533767783,4456.74687637774,4544.46688060372,4660.40545687895,4747.34624759338


## 2. Data Cleaning and Preprocessing

In [3]:
# Clean column names (remove [YR...])
df.columns = [col.split(' [')[0] for col in df.columns]

# Melt the dataframe
id_vars = ['Country Name', 'Country Code', 'Series Name', 'Series Code']
value_vars = [col for col in df.columns if col not in id_vars]

df_long = df.melt(id_vars=id_vars, value_vars=value_vars, var_name='Year', value_name='Value')

# Convert Year to numeric
df_long['Year'] = pd.to_numeric(df_long['Year'], errors='coerce')

# Convert Value to numeric (handle '..')
df_long['Value'] = pd.to_numeric(df_long['Value'], errors='coerce')

# Drop rows with missing values
df_clean = df_long.dropna(subset=['Value'])

print("Transformed Dataset Shape:", df_clean.shape)
df_clean.head()

Transformed Dataset Shape: (18009, 6)


Unnamed: 0,Country Name,Country Code,Series Name,Series Code,Year,Value
1,Afghanistan,AFG,"Population, total",SP.POP.TOTL,1990,12045660.0
2,Albania,ALB,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1990,1682.88
3,Albania,ALB,"Population, total",SP.POP.TOTL,1990,3286542.0
4,Algeria,DZA,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1990,3605.69
5,Algeria,DZA,"Population, total",SP.POP.TOTL,1990,25375810.0


## 3. Trend Analysis

In [6]:
# Separate GDP per capita and Population
gdp_pc = df_clean[df_clean['Series Code'] == 'NY.GDP.PCAP.KD']
pop = df_clean[df_clean['Series Code'] == 'SP.POP.TOTL']

# Interactive Country Explorer
countries = sorted(gdp_pc['Country Name'].unique())

country_dropdown = widgets.Dropdown(
    options=countries,
    value='United States' if 'United States' in countries else countries[0],
    description='Country:',
    style={'description_width': 'initial'}
)

output = widgets.Output()

def update_plot(change):
    with output:
        output.clear_output(wait=True)
        

        country = change['new']
        
        country_gdp = gdp_pc[gdp_pc['Country Name'] == country]
        country_pop = pop[pop['Country Name'] == country]
        
        # Create subplots with secondary y-axis
        fig = make_subplots(specs=[[{"secondary_y": True}]])
        
        fig.add_trace(go.Scatter(x=country_gdp['Year'], y=country_gdp['Value'], 
                                 name='GDP per Capita', mode='lines+markers'), secondary_y=False)
        
        fig.add_trace(go.Scatter(x=country_pop['Year'], y=country_pop['Value'], 
                                 name='Population', mode='lines', line=dict(dash='dot')), secondary_y=True)
        
        fig.update_layout(title=f'GDP per Capita and Population Trends: {country}',
                          height=500)
        fig.update_yaxes(title_text="GDP per Capita (Constant 2015 US$)", secondary_y=False)
        fig.update_yaxes(title_text="Population", secondary_y=True)
        
        fig.show()

country_dropdown.observe(update_plot, names='value')

display(widgets.VBox([widgets.HTML('<h3>ðŸ“ˆ Explore Country Trends</h3>'), 
                      country_dropdown, output]))

update_plot({'new': country_dropdown.value})

VBox(children=(HTML(value='<h3>ðŸ“ˆ Explore Country Trends</h3>'), Dropdown(description='Country:', index=248, opâ€¦

## 4. Comparative Analysis

In [7]:
# Top 15 Countries by GDP per Capita (Latest Year)
latest_year = gdp_pc['Year'].max()
latest_data = gdp_pc[gdp_pc['Year'] == latest_year]

top_15 = latest_data.nlargest(15, 'Value')

fig = px.bar(top_15, x='Value', y='Country Name', orientation='h',
             title=f'Top 15 Countries by GDP per Capita ({latest_year})',
             color='Value', color_continuous_scale='Viridis',
             labels={'Value': 'GDP per Capita (US$)'})
fig.update_layout(yaxis={'categoryorder':'total ascending'}, height=600)
fig.show()

---
## Summary of Findings

### Key Observations
1. **Standard of Living**: [To be filled] - Wide disparity between high-income and low-income nations.
2. **Population Growth**: [To be filled] - Developing nations show faster population growth.

### Implications
- **Resource Strain**: High population growth + low GDP per capita = challenge for healthcare/nutrition.
- **Obesity Link**: Higher GDP per capita often correlates with higher obesity rates (Nutrition Transition).

### Next Steps
- Use GDP per Capita as a key predictor for Obesity and Diabetes models.