In [59]:
# import library
import plotly.graph_objects as go
import plotly.io as pio
import pandas as pd
from pandas.plotting import parallel_coordinates
pio.renderers.default = "plotly_mimetype+notebook_connected"

## Read file

In [60]:
df = pd.read_csv('../data/combine_data.csv')

In [61]:
df.columns

Index(['Unnamed: 0', 'Country', 'year', 'Happiness Score',
       'Log GDP per capita', 'Social support', 'Health',
       'Freedom to make life choices', 'Generosity', 'Corruption',
       'Positive affect', 'Negative affect',
       'Confidence in national government', 'continent', 'Country Code',
       'Time Code', 'CO2 emissions', 'Population', 'Unemployment',
       'Children out of school', 'Adjusted net national income'],
      dtype='object')

In [62]:
df.dtypes


Unnamed: 0                             int64
Country                               object
year                                   int64
Happiness Score                      float64
Log GDP per capita                   float64
Social support                       float64
Health                               float64
Freedom to make life choices         float64
Generosity                           float64
Corruption                           float64
Positive affect                      float64
Negative affect                      float64
Confidence in national government    float64
continent                             object
Country Code                          object
Time Code                             object
CO2 emissions                        float64
Population                           float64
Unemployment                         float64
Children out of school               float64
Adjusted net national income         float64
dtype: object

## Subset

In [63]:
#subset_df = df[(df['year'] == 2021)]
# Subset data 
subset_df = df[['continent','year','Happiness Score','Unemployment','Freedom to make life choices','Social support','Health','Corruption','Confidence in national government',]]
subset_df.describe()

Unnamed: 0,year,Happiness Score,Unemployment,Freedom to make life choices,Social support,Health,Corruption,Confidence in national government
count,1648.0,1648.0,1224.0,1624.0,1637.0,1635.0,1555.0,1463.0
mean,2014.570388,5.524484,7.877786,0.75733,0.812972,63.465387,0.741298,0.484874
std,3.870226,1.134368,6.028937,0.139519,0.120424,6.820814,0.191639,0.196239
min,2008.0,2.178809,0.14,0.257534,0.290184,17.360001,0.035198,0.07971
25%,2011.0,4.658249,4.11,0.667525,0.750896,59.344999,0.68072,0.328743
50%,2015.0,5.478749,6.175,0.780625,0.838044,65.224998,0.8007,0.464806
75%,2018.0,6.377864,9.365,0.86799,0.906699,68.787502,0.869897,0.622427
max,2021.0,7.970892,47.5,0.985178,0.987343,74.349998,0.983276,0.993604


## Groupby

In [65]:
#Groupby data based on the continent and year
continent_year_stats = subset_df.groupby(['continent', 'year']).mean().reset_index()
continent_year_stats

Unnamed: 0,continent,year,Happiness Score,Unemployment,Freedom to make life choices,Social support,Health,Corruption,Confidence in national government
0,Africa,2008,4.255948,9.690000,0.595611,0.658018,50.896522,0.850964,0.505158
1,Africa,2009,4.414326,9.900000,0.658334,0.729996,52.061111,0.815278,0.493342
2,Africa,2010,4.328962,9.462000,0.701996,0.802223,53.555000,0.812732,0.568229
3,Africa,2011,4.485657,7.604615,0.700793,0.733715,54.208148,0.788887,0.576726
4,Africa,2012,4.450748,9.340000,0.656594,0.747869,55.108461,0.801816,0.473423
...,...,...,...,...,...,...,...,...,...
78,South America,2017,5.960326,6.056471,0.835252,0.849100,64.928947,0.783619,0.398690
79,South America,2018,5.930389,6.286000,0.829939,0.831588,64.975000,0.791615,0.359537
80,South America,2019,6.082741,6.520000,0.844920,0.843521,65.800000,0.796668,0.375859
81,South America,2020,5.682799,9.600769,0.829613,0.826120,66.176667,0.757853,0.396936


In [66]:
continent_year_stats.describe()

Unnamed: 0,year,Happiness Score,Unemployment,Freedom to make life choices,Social support,Health,Corruption,Confidence in national government
count,83.0,83.0,83.0,83.0,83.0,83.0,83.0,83.0
mean,2014.566265,6.008795,7.071624,0.796759,0.843469,65.064201,0.667638,0.487885
std,4.03398,0.994655,2.057564,0.080966,0.078241,4.985332,0.15701,0.083958
min,2008.0,4.126032,4.2,0.595611,0.658018,50.896522,0.271222,0.341821
25%,2011.0,5.326638,5.719773,0.73231,0.789373,63.399708,0.612485,0.409043
50%,2015.0,5.995353,6.275,0.795992,0.855763,66.332353,0.745581,0.499454
75%,2018.0,6.948258,8.19225,0.84854,0.9007,68.566907,0.777002,0.567579
max,2021.0,7.428542,13.6125,0.939678,0.969603,70.700001,0.850964,0.630913


In [67]:
continent_year_stats.columns #show all column names

Index(['continent', 'year', 'Happiness Score', 'Unemployment',
       'Freedom to make life choices', 'Social support', 'Health',
       'Corruption', 'Confidence in national government'],
      dtype='object')

## Plot 

In [68]:
# set the color
colorscale = [[0, 'rgb(231, 138, 195)'], [0.45, 'rgb(231, 138, 195)'], [0.45, 'rgb(255, 217, 47)'], [0.60, 'rgb(255, 217, 47)'], [0.60, 'rgb(166, 216, 84)'], [1, 'rgb(166, 216, 84)']]

fig = go.Figure(data=
    go.Parcoords(
        line = dict(color = continent_year_stats['Happiness Score'], #set happiness socre as legend
                   colorscale = colorscale, #set color
                   showscale = True,
                   cmin = 0, #make range
                   cmax = 10),
        dimensions = list([
            dict(range = [0,1], # make range 
                label = 'Social support', values = continent_year_stats['Social support']), #choose dimension
            dict(range = [48,72], # make range 
                label = 'Health', values = continent_year_stats['Health']), #choose dimension
            dict(range = [0,1], # make range 
                label = 'Confidence in national government', values = continent_year_stats['Confidence in national government']), #choose dimension
            dict(range = [0,1], # make range 
                label = 'Corruption', values = continent_year_stats['Corruption']), #choose dimension
            dict(range = [0,10], # make range 
                label = 'Happiness Score', values = continent_year_stats['Happiness Score'])])) #choose dimension
        )
fig.update_layout( # Set titles
    title='Parallel Coordinate Chart for Quantitative Attributes',
    xaxis=dict(title='Attribute'),
    yaxis=dict(title='Value'),
    legend=dict(title="Happiness Categories")
)
# Put x and y titles
fig.add_annotation(x=0.5, y=-0.16, text="Quantitative Attributes", showarrow=False, font=dict(size=14))
fig.add_annotation(x=-0.08, y=0.5, text="Value",
                   showarrow=False, font=dict(size=14), textangle=270)
fig.add_annotation(x=1.1, y=1.04, text="Happiness Score",
                   showarrow=False, font=dict(size=8))

fig.show()

In [69]:
fig.write_html("../img/Parallel_plot.html") #Save the plot