## 1. Import Libraries and Load Data

In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display
import warnings
warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:.2f}'.format)

print("Libraries imported successfully!")

Libraries imported successfully!


In [2]:
# Load the dataset
df = pd.read_csv('../../data/processed/cleaned-obesity-rates.csv')

# Display first few rows
print(f"Dataset Shape: {df.shape[0]:,} rows √ó {df.shape[1]} columns")
print("\n" + "="*60)
print("First 5 rows of the dataset:")
print("="*60)
df.head()

Dataset Shape: 4,554 rows √ó 3 columns

First 5 rows of the dataset:


Unnamed: 0,Area,Year,Value
0,Afghanistan,2000,4.3
1,Afghanistan,2001,4.6
2,Afghanistan,2002,5.0
3,Afghanistan,2003,5.4
4,Afghanistan,2004,5.8


## 2. Global Trends Analysis

In [3]:
# Global Average Obesity Rate Over Time
global_trend = df.groupby('Year')['Value'].mean().reset_index()

fig = px.line(global_trend, x='Year', y='Value', markers=True,
              title='Global Average Obesity Prevalence Over Time',
              labels={'Value': 'Obesity Prevalence (%)'})
fig.update_layout(height=500)
fig.show()

## 3. Country-Specific Analysis

In [4]:
# Interactive Country Explorer
countries = sorted(df['Area'].unique())

country_dropdown = widgets.Dropdown(
    options=countries,
    value='United States of America' if 'United States of America' in countries else countries[0],
    description='Country:',
    style={'description_width': 'initial'}
)

output = widgets.Output()

def update_country_plot(change):
    with output:
        output.clear_output(wait=True)
        country = change['new']
        country_data = df[df['Area'] == country]
        
        fig = px.line(country_data, x='Year', y='Value', markers=True,
                      title=f'Obesity Prevalence Trend: {country}',
                      labels={'Value': 'Obesity Prevalence (%)'})
        fig.update_layout(height=500)
        fig.show()
        
        # Stats
        start_val = country_data.iloc[0]['Value']
        end_val = country_data.iloc[-1]['Value']
        change_val = end_val - start_val
        
        print(f"Change ({country_data['Year'].min()} - {country_data['Year'].max()}): +{change_val:.1f} percentage points")

country_dropdown.observe(update_country_plot, names='value')

display(widgets.VBox([widgets.HTML('<h3>üåç Explore Country Trends</h3>'), 
                      country_dropdown, output]))

update_country_plot({'new': country_dropdown.value})

VBox(children=(HTML(value='<h3>üåç Explore Country Trends</h3>'), Dropdown(description='Country:', index=189, op‚Ä¶

## 4. Comparative Analysis

In [5]:
# Highest and Lowest Obesity Rates (Latest Year)
latest_year = df['Year'].max()
latest_data = df[df['Year'] == latest_year]

# Top 15 Highest
top_15 = latest_data.nlargest(15, 'Value')
fig_top = px.bar(top_15, x='Value', y='Area', orientation='h',
                 title=f'Top 15 Countries with Highest Obesity Rates ({latest_year})',
                 color='Value', color_continuous_scale='Reds')
fig_top.update_layout(yaxis={'categoryorder':'total ascending'}, height=500)
fig_top.show()

# Top 15 Lowest
bottom_15 = latest_data.nsmallest(15, 'Value')
fig_bottom = px.bar(bottom_15, x='Value', y='Area', orientation='h',
                    title=f'Top 15 Countries with Lowest Obesity Rates ({latest_year})',
                    color='Value', color_continuous_scale='Blues')
fig_bottom.update_layout(yaxis={'categoryorder':'total descending'}, height=500)
fig_bottom.show()

## 5. Global Map Visualization

In [7]:
# Interactive World Map
map_output = widgets.Output()

def update_map(change):
    with map_output:
        map_output.clear_output(wait=True)
        year = change['new']
        year_data = df[df['Year'] == year]
        
        fig = px.choropleth(year_data, locations='Area', locationmode='country names',
                            color='Value', hover_name='Area',
                            color_continuous_scale='Reds',
                            title=f'Global Obesity Prevalence - {year}',
                            labels={'Value': 'Prevalence (%)'})
        fig.update_layout(height=600)
        fig.show()

year_slider = widgets.IntSlider(
    value=df['Year'].max(),
    min=df['Year'].min(),
    max=df['Year'].max(),
    step=1,
    description='Year:',
    continuous_update=False
)

year_slider.observe(update_map, names='value')

display(widgets.VBox([widgets.HTML('<h3>Global Obesity Map</h3>'), 
                      year_slider, map_output]))

update_map({'new': year_slider.value})

VBox(children=(HTML(value='<h3>Global Obesity Map</h3>'), IntSlider(value=2022, continuous_update=False, descr‚Ä¶

## 6. Distribution Analysis

In [8]:
# Distribution of Obesity Rates Over Time (Box Plot)
fig = px.box(df, x='Year', y='Value', title='Distribution of Obesity Rates Over Time',
             labels={'Value': 'Prevalence (%)'})
fig.update_layout(height=500)
fig.show()

---
## Summary of Findings

### Key Observations
1. **Global Trend**: [To be filled] - Consistent and alarming rise in obesity rates globally.
2. **Hotspots**: [To be filled] - Pacific Islands and Middle East often show highest rates.
3. **Low Prevalence**: [To be filled] - Parts of Asia and Africa maintain lower rates, though increasing.

### Implications
- **Public Health**: Major risk factor for Diabetes, CVDs, etc.
- **Economic Impact**: Increased healthcare costs.

### Next Steps
- Correlate with Diabetes Prevalence (strong link expected).
- Correlate with Diet Composition (sugar/fat intake).