In [92]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

In [93]:
df_hdi = pd.read_csv('datasets/human-development-index-(hdi)-by-country-2024.csv')

df_hdi_very_high = df_hdi[df_hdi['HdiTier'] == 'Very High']

In [94]:
# Select the top 10 countries with highest human development index according to worldpopulationreview.
# Assume these countries have high access to alcohol, tobacco and food.
#
# https://worldpopulationreview.com/country-rankings/developed-countries
#
# Note: Netherlands and Hong Kong are excluded, as these did not have data points.

df = pd.read_csv('datasets/obese_smoke_alcohol_2015.csv')
df = df.dropna()

countries_highest_hdi = ['Japan', 'Switzerland', 'Singapore', 'Italy', 'Spain', 'Malta', 'Australia', 'Sweden', 'Norway']

filtered_df = df[df['Country Name'].isin(countries_highest_hdi)].copy()

In [95]:
# Convert 'Prevalence of obesity among adults, Both sexes' to string and clean it
filtered_df['Prevalence of obesity among adults, Both sexes'] = filtered_df['Prevalence of obesity among adults, Both sexes'].astype(str)
filtered_df['Prevalence of obesity among adults, Both sexes'] = filtered_df['Prevalence of obesity among adults, Both sexes'].str.replace(r'\s*\[.*?\]\s*', '', regex=True)
filtered_df['Prevalence of obesity among adults, Both sexes'] = pd.to_numeric(filtered_df['Prevalence of obesity among adults, Both sexes'], errors='coerce')

filtered_df = filtered_df.sort_values(by='Life expectancy', ascending=True)

In [96]:
normalized_life_expectancy = (filtered_df['Life expectancy'] - filtered_df['Life expectancy'].min()) / (filtered_df['Life expectancy'].max() - filtered_df['Life expectancy'].min())

In [97]:
fig = go.Figure(data=go.Parcoords(
    line=dict(color=filtered_df['Life expectancy'],  # Color by life expectancy
              colorscale = [[0,'yellow'],[0.5,'yellow'],[0.6,'red'],[0.6,'red'],[0.8,'red'],[1,'red']],
              showscale=True,
              colorbar=dict(title='Life Expectancy')),  # Add color bar title
    dimensions=[
        dict(range=[0, 50],
             label='Tobacco (%)', values=filtered_df['Prevalence of current tobacco use (% of adults)']),
        dict(range=[0, 50],
             label='Obesity (%)', values=filtered_df['Prevalence of obesity among adults, Both sexes']),
        dict(range=[0, 30],
             label='Alcohol per capita (L)', values=filtered_df['Total alcohol consumption per capita (liters of pure alcohol, projected estimates, 15+ years of age)']),
        dict(range=[80, 90], constraintrange=[70, 90],
             label='Life expectancy (years)', values=filtered_df['Life expectancy'])
    ]
))

fig.update_layout(
    title='Parallel Coordinates Plot of Health Factors and Life Expectancy<br>' + 
    '<sup>Tobacco, obesity and alcohol rates slightly affect life expectancy</sup>', title_x=0.5, title_y=0.92,
    plot_bgcolor='#cff8d6',
    paper_bgcolor='#cff8d6',
    font=dict(size=12),
    margin={'l': 80, 'b': 120, 'r': 130, 't': 120},
    width=780,
    height=520
)

fig.show()

In [98]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=2, cols=2, start_cell='bottom-left')

fig.add_trace(go.Scatter(x=filtered_df['Prevalence of current tobacco use (% of adults)'], y=filtered_df['Life expectancy']),
              row=1, col=1)

fig.add_trace(go.Scatter(x=filtered_df['Prevalence of obesity among adults, Both sexes'], y=filtered_df['Life expectancy']),
              row=1, col=2)

fig.add_trace(go.Scatter(x=filtered_df['Total alcohol consumption per capita (liters of pure alcohol, projected estimates, 15+ years of age)'], y=filtered_df['Life expectancy']),
              row=2, col=1)