In [111]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

In [112]:
# Select the top 10 countries with highest human development index according to worldpopulationreview.
# Assume these countries have high access to alcohol, tobacco and food.
#
# https://worldpopulationreview.com/country-rankings/developed-countries
#
# Note: Netherlands and Hong Kong are excluded, as these did not have data points.

df = pd.read_csv('datasets/obese_smoke_alcohol_2015.csv')
df = df.dropna()

countries_highest_hdi = [
    'Switzerland',
    'Norway',
    'Iceland',
    'Australia',
    'Sweden',
    'Germany',
    'Denmark',
    'Ireland',
    'Singapore',
    'Finland',
    'Belgium',
    'New Zealand',
    'Canada',
    'United Arab Emirates',
    'Luxembourg',
    'United States',
    'Japan',
    'Austria',
    'Slovenia',
    'Malta',
    'Israel',
    'France',
    'Spain'
]

filtered_df = df[df['Country Name'].isin(countries_highest_hdi)].copy()

In [113]:
# Convert 'Prevalence of obesity among adults, Both sexes' to string and clean it
filtered_df['Prevalence of obesity among adults, Both sexes'] = filtered_df['Prevalence of obesity among adults, Both sexes'].astype(str)
filtered_df['Prevalence of obesity among adults, Both sexes'] = filtered_df['Prevalence of obesity among adults, Both sexes'].str.replace(r'\s*\[.*?\]\s*', '', regex=True)
filtered_df['Prevalence of obesity among adults, Both sexes'] = pd.to_numeric(filtered_df['Prevalence of obesity among adults, Both sexes'], errors='coerce')

In [114]:
fig = go.Figure(data=go.Parcoords(
    line=dict(color=filtered_df['Life expectancy'],  # Color by life expectancy
              colorscale=[[0, 'lightblue'], [1, 'red']],  # Two-color scale from purple to gold
              showscale=True,
              colorbar=dict(title='Life Expectancy')),  # Add color bar title
    dimensions=[
        dict(range=[0, 50], constraintrange=[10, 40],
             label='Tobacco (%)', values=filtered_df['Prevalence of current tobacco use (% of adults)']),
        dict(range=[0, 50],
             label='Obesity', values=filtered_df['Prevalence of obesity among adults, Both sexes']),
        dict(range=[0, 40],
             label='Alcohol', values=filtered_df['Total alcohol consumption per capita (liters of pure alcohol, projected estimates, 15+ years of age)'])
    ]
))

fig.update_layout(
    title='Parallel Coordinates Plot of Health Factors and Life Expectancy',
    title_x=0.5,
    font=dict(size=12),
    plot_bgcolor='white',
    paper_bgcolor='white',
    width=780
)

fig.show()