In [7]:
# Import Essential Libraries
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display
import warnings

# Configuration
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:.2f}'.format)

print("Libraries imported successfully.")

Libraries imported successfully.


## 1. Data Loading and Preprocessing
We will load the cleaned diet composition dataset.

In [8]:
# Load the dataset
file_path = '../../data/processed/cleaned-diet-compositions.csv.csv'
df = pd.read_csv(file_path)

# Preprocessing: Convert Year to Integer
# The 'year' column seems to be in 'YYYY-MM-DD' format based on previous exploration
if df['year'].dtype == 'object':
    df['Year_Int'] = pd.to_datetime(df['year']).dt.year
else:
    df['Year_Int'] = df['year']

# Identify Nutrient Columns (excluding metadata)
metadata_cols = ['entity', 'year', 'Year_Int']
nutrient_cols = [col for col in df.columns if col not in metadata_cols]

print(f"Dataset Loaded: {df.shape[0]} rows, {df.shape[1]} columns")
print(f"Year Range: {df['Year_Int'].min()} - {df['Year_Int'].max()}")
print(f"Nutrient Categories: {nutrient_cols}")
df.head()

Dataset Loaded: 8154 rows, 13 columns
Year Range: 1961 - 2013
Nutrient Categories: ['cereals_and_grains', 'pulses', 'starchy_roots', 'sugar', 'oils_fats', 'meat', 'dairy_eggs', 'fruit_and_vegetables', 'other', 'alcoholic_beverages']


Unnamed: 0,entity,year,cereals_and_grains,pulses,starchy_roots,sugar,oils_fats,meat,dairy_eggs,fruit_and_vegetables,other,alcoholic_beverages,Year_Int
0,Afghanistan,1961-01-01,2060,16.0,25,51,92.0,88.0,102.0,82.0,13,0.0,1961
1,Afghanistan,1962-01-01,2060,17.0,22,45,98.0,88.0,101.0,76.0,12,0.0,1962
2,Afghanistan,1963-01-01,2060,17.0,23,47,106.0,91.0,110.0,79.0,13,0.0,1963
3,Afghanistan,1964-01-01,2060,18.0,24,55,102.0,93.0,110.0,95.0,11,0.0,1964
4,Afghanistan,1965-01-01,2060,18.0,24,57,105.0,95.0,118.0,95.0,13,0.0,1965


## 2. Global Trends Analysis
Let's calculate the global average consumption for each nutrient category over time to understand the macro shifts in human diet.

In [9]:
# Calculate Global Averages per Year
global_trends = df.groupby('Year_Int')[nutrient_cols].mean().reset_index()

# Melt for Plotly
global_melted = global_trends.melt(id_vars='Year_Int', var_name='Nutrient', value_name='Consumption')

# Visualization: Global Trends Line Chart
fig = px.line(global_melted, x='Year_Int', y='Consumption', color='Nutrient',
              title='<b>Global Average Nutrient Consumption Over Time (1961-Present)</b>',
              labels={'Year_Int': 'Year', 'Consumption': 'Avg Consumption (kcal/capita/day)'},
              template='plotly_white')

fig.update_layout(hovermode="x unified", height=600)
fig.show()

### Observations
- **Cereals & Grains**: Typically the largest caloric contributor, but is it declining or stable?
- **Meat & Oils**: Look for upward trends indicating the "Nutrition Transition" towards westernized diets.
- **Sugar**: Check for spikes or plateaus.

## 3. Regional & Country-Specific Analysis
Use the interactive widget below to explore how specific countries' diets have changed.

In [10]:
# Interactive Country Explorer
countries = sorted(df['entity'].unique())

def plot_country_diet(country):
    country_data = df[df['entity'] == country]
    
    # Create Subplots: Line Chart & Area Chart
    fig = make_subplots(rows=2, cols=1, 
                        shared_xaxes=True, 
                        vertical_spacing=0.1,
                        subplot_titles=(f'Nutrient Consumption Trends: {country}', f'Diet Composition (Stacked): {country}'))
    
    # Line Chart (Trends)
    for nutrient in nutrient_cols:
        fig.add_trace(go.Scatter(x=country_data['Year_Int'], y=country_data[nutrient], 
                                 name=nutrient, mode='lines'), row=1, col=1)
        
    # Stacked Area Chart (Composition)
    for nutrient in nutrient_cols:
        fig.add_trace(go.Scatter(x=country_data['Year_Int'], y=country_data[nutrient], 
                                 name=nutrient, stackgroup='one', showlegend=False), row=2, col=1)

    fig.update_layout(height=800, title_text=f"Dietary Analysis for {country}", template='plotly_white')
    fig.show()

# Widget
widgets.interact(plot_country_diet, country=widgets.Dropdown(options=countries, value='United States', description='Country:'));

interactive(children=(Dropdown(description='Country:', index=164, options=('Afghanistan', 'Albania', 'Algeria'â€¦

## 4. Comparative Growth Rates
Which nutrient categories have seen the fastest growth globally?

In [11]:
# Calculate Percentage Growth (Start vs End)
start_year = df['Year_Int'].min()
end_year = df['Year_Int'].max()

start_data = global_trends[global_trends['Year_Int'] == start_year].set_index('Year_Int').T
end_data = global_trends[global_trends['Year_Int'] == end_year].set_index('Year_Int').T

growth_df = pd.DataFrame({
    'Start_Value': start_data.iloc[:, 0],
    'End_Value': end_data.iloc[:, 0]
})

growth_df['Absolute_Change'] = growth_df['End_Value'] - growth_df['Start_Value']
growth_df['Percent_Change'] = (growth_df['Absolute_Change'] / growth_df['Start_Value']) * 100
growth_df = growth_df.sort_values('Percent_Change', ascending=True)

# Visualization
fig = px.bar(growth_df, x='Percent_Change', y=growth_df.index, orientation='h',
             title=f'<b>Global Percentage Change in Nutrient Consumption ({start_year}-{end_year})</b>',
             labels={'Percent_Change': 'Growth (%)', 'index': 'Nutrient Category'},
             color='Percent_Change', color_continuous_scale='RdBu')

fig.update_layout(height=500)
fig.show()

## Conclusion
This analysis highlights the shifting global dietary landscape. Significant increases in **Oils/Fats** and **Meat** consumption globally reflect the nutrition transition, while traditional staples like **Starchy Roots** may show stagnation or decline in relative importance.