## 1. Import Libraries and Load Data

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display
import warnings
warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:.2f}'.format)

print("âœ… Libraries imported successfully!")

In [None]:
# Load the dataset
df = pd.read_csv('../../data/raw/Per-capita.csv')

# Display first few rows
print(f"ðŸ“Š Dataset Shape: {df.shape[0]:,} rows Ã— {df.shape[1]} columns")
print("\n" + "="*60)
print("First 5 rows of the dataset:")
print("="*60)
df.head()

## 2. Data Cleaning and Preprocessing

In [None]:
# Clean column names (remove [YR...])
df.columns = [col.split(' [')[0] for col in df.columns]

# Melt the dataframe
id_vars = ['Country Name', 'Country Code', 'Series Name', 'Series Code']
value_vars = [col for col in df.columns if col not in id_vars]

df_long = df.melt(id_vars=id_vars, value_vars=value_vars, var_name='Year', value_name='Value')

# Convert Year to numeric
df_long['Year'] = pd.to_numeric(df_long['Year'], errors='coerce')

# Convert Value to numeric (handle '..')
df_long['Value'] = pd.to_numeric(df_long['Value'], errors='coerce')

# Drop rows with missing values
df_clean = df_long.dropna(subset=['Value'])

print("ðŸ“Š Transformed Dataset Shape:", df_clean.shape)
df_clean.head()

## 3. Trend Analysis

In [None]:
# Separate GDP per capita and Population
gdp_pc = df_clean[df_clean['Series Code'] == 'NY.GDP.PCAP.KD']
pop = df_clean[df_clean['Series Code'] == 'SP.POP.TOTL']

# Interactive Country Explorer
countries = sorted(gdp_pc['Country Name'].unique())

country_dropdown = widgets.Dropdown(
    options=countries,
    value='United States' if 'United States' in countries else countries[0],
    description='Country:',
    style={'description_width': 'initial'}
)

output = widgets.Output()

def update_plot(change):
    with output:
        output.clear_output(wait=True)
        country = change['new']
        
        country_gdp = gdp_pc[gdp_pc['Country Name'] == country]
        country_pop = pop[pop['Country Name'] == country]
        
        # Create subplots with secondary y-axis
        fig = make_subplots(specs=[[{"secondary_y": True}]])
        
        fig.add_trace(go.Scatter(x=country_gdp['Year'], y=country_gdp['Value'], 
                                 name='GDP per Capita', mode='lines+markers'), secondary_y=False)
        
        fig.add_trace(go.Scatter(x=country_pop['Year'], y=country_pop['Value'], 
                                 name='Population', mode='lines', line=dict(dash='dot')), secondary_y=True)
        
        fig.update_layout(title=f'GDP per Capita and Population Trends: {country}',
                          height=500)
        fig.update_yaxes(title_text="GDP per Capita (Constant 2015 US$)", secondary_y=False)
        fig.update_yaxes(title_text="Population", secondary_y=True)
        
        fig.show()

country_dropdown.observe(update_plot, names='value')

display(widgets.VBox([widgets.HTML('<h3>ðŸ“ˆ Explore Country Trends</h3>'), 
                      country_dropdown, output]))

update_plot({'new': country_dropdown.value})

## 4. Comparative Analysis

In [None]:
# Top 15 Countries by GDP per Capita (Latest Year)
latest_year = gdp_pc['Year'].max()
latest_data = gdp_pc[gdp_pc['Year'] == latest_year]

top_15 = latest_data.nlargest(15, 'Value')

fig = px.bar(top_15, x='Value', y='Country Name', orientation='h',
             title=f'Top 15 Countries by GDP per Capita ({latest_year})',
             color='Value', color_continuous_scale='Viridis',
             labels={'Value': 'GDP per Capita (US$)'})
fig.update_layout(yaxis={'categoryorder':'total ascending'}, height=600)
fig.show()

---
## ðŸ“Š Summary of Findings

### Key Observations
1. **Standard of Living**: [To be filled] - Wide disparity between high-income and low-income nations.
2. **Population Growth**: [To be filled] - Developing nations show faster population growth.

### Implications
- **Resource Strain**: High population growth + low GDP per capita = challenge for healthcare/nutrition.
- **Obesity Link**: Higher GDP per capita often correlates with higher obesity rates (Nutrition Transition).

### Next Steps
- Use GDP per Capita as a key predictor for Obesity and Diabetes models.