# "Happiness Index Comparison"
> "Altair interactive plot to compare countries' happiness index"

- author: Youngky
- categories: [altair, jupyter]

### Do the happiness scores of countries change significantly between the 2015 and 2019? Which countries are happier? And which countries are becoming less happy?

In [6]:
#hide
import pandas as pd
import numpy as np
import os
import copy
import matplotlib.pyplot as plt
from colour import Color
import altair as alt
%config InlineBackend.figure_format = 'retina'

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', None)

In [2]:
#hide
url = 'https://raw.githubusercontent.com/wyoungky/Happiness-Index/master/data/world-happiness/'
file_name = ['2015.csv', '2016.csv', '2017.csv', '2018.csv', '2019.csv']


### read and combine all files in the folder.
number = 0 
for file in file_name:
    info_temp = pd.read_csv(url + file)
    
    ## get date of the record.
    info_temp['Year'] = file.split(".")[0]
    
    if number == 0:
        happiness_data = copy.deepcopy(info_temp)
        number += 1
    else:
        happiness_data = pd.concat([happiness_data,info_temp], sort=True)

In [3]:
#hide

# pivot to get yearly value
happiness_data_yearly = (happiness_data.pivot_table(index=['Country','Region'], 
                                                    columns='Year',
                                                    values='Happiness Score')
                         .rename_axis(None, axis=1)
                         .reset_index())


# fill from previous years, if cannot, fill from following years
column_to_rank = [2015, 2016, 2017, 2018, 2019]
# calculate rank
for year in column_to_rank:
    # calculate rank
    new_col_name_rank = "rank_" + str(year)
    rank = happiness_data_yearly[str(year)].rank(ascending=False)
    happiness_data_yearly[new_col_name_rank] = rank
    
# calculate average rank
rank_col = [col for col in happiness_data_yearly if col.startswith('rank')]
happiness_data_yearly["avg_rank"] = happiness_data_yearly[rank_col].mean(axis=1).round(1)
    
# calculate score change
# calculate rate of change
# calculate rank change
for year in column_to_rank[:-1]:
    year_later_list = [i for i in column_to_rank if i > year]
    for year_later in year_later_list:
        # calculate rank change
        new_col_name_s = "rank_" + str(year) + "_" + str(year_later)
        score_change = ((happiness_data_yearly["rank_" + str(year)]
                         -happiness_data_yearly["rank_"  + str(year_later)]))
        happiness_data_yearly[new_col_name_s] = score_change

        # calculate score change
        new_col_name_s = "score_" + str(year) + "_" + str(year_later)
        score_change = ((happiness_data_yearly[str(year_later)]
                         -happiness_data_yearly[str(year)])).round(3)
        happiness_data_yearly[new_col_name_s] = score_change


> Tip: Clicking the legend on the right side of the chart will highlight the data. 

> Note: x-axis value can be changed by clicking the dropdown on the bottom of the chart.

In [4]:
#hide_input
altair_data = copy.deepcopy(happiness_data_yearly)


### data manipulation for chart
reshaped_df = altair_data[['Country','Region',
                           '2015','2016','2017',
                           '2018','2019']]
reshaped_df = reshaped_df.melt(id_vars=['Country','Region','2019'])
reshaped_df = reshaped_df.rename(columns={'variable':'X-Axis',
                                          'value':'Selected Year'})


### start drawing chart
chart = alt.Chart(reshaped_df)

### setup dropdown and filters
col_x = 'Selected Year'
col_y = '2019'

    ## dropdown
select_box = alt.binding_select(options=list(reshaped_df['X-Axis'].unique()))
selection_dropdown = alt.selection_single(name='Select Year for', fields=['X-Axis'], 
                                          bind=select_box,
                                          init={'X-Axis': '2015'})
    ## filters on legend
selection1 = alt.selection_multi(fields=['Region'], bind='legend')


### draw chart (main chart)
xscale = alt.Scale(domain=(2.0, 8.0))
yscale = alt.Scale(domain=(2.0, 8.0))


scatter = chart.mark_point().encode(
    alt.X(col_x, scale = xscale, title=''),
    alt.Y(col_y, scale = yscale, title='2019 Happiness Score'),
    color=alt.condition(selection1,'Region:N',alt.value('lightgray'),scale=alt.Scale(scheme='tableau20')),
    tooltip=['Country', 
             alt.Tooltip(col_x,title="Score Selected Year"),
             alt.Tooltip(col_y,title="Score 2019")],
    opacity=alt.condition(selection1, alt.value(1), alt.value(0.2))
).add_selection(
    selection_dropdown,
    selection1
).transform_filter(
    selection_dropdown 
).properties(
    height=500,
    width=500,
)


### draw chart (right secondary chart)
chart2 = chart.mark_bar().encode(
    alt.X('count()'),
    alt.Y(col_y, 
          bin=alt.Bin(maxbins=30, extent=yscale.domain), 
          title = "",
         axis=alt.Axis(values=[2,2.5,3,3.5,4,4.5,5,5.5,6,6.5,7,7.5,8], grid=True)),
    color='Region:N'
).transform_filter(
    selection_dropdown 
).transform_filter(
    selection1 
).properties(
    height=500,
    width=100
)

### draw chart (bottom secondary chart)
chart3 = chart.mark_bar().encode(
    alt.Y('count(Country)'),
    alt.X(col_x, 
            bin=alt.Bin(maxbins=30, extent=xscale.domain), 
            title='Happiness Score of Selected Year',
            axis=alt.Axis(values=[2,2.5,3,3.5,4,4.5,5,5.5,6,6.5,7,7.5,8], grid=True)),
    color='Region:N'
).transform_filter(
    selection_dropdown 
).transform_filter(
    selection1 
).properties(
    height=100,
    width=500
)

### draw average lines.
xmean = chart.mark_rule().encode(
    x='mean(' + col_x + '):Q',
    size=alt.value(0.5),
    color=alt.value('blue')
).transform_filter(
    selection_dropdown 
).transform_filter(
    selection1 
)

text_xmean = chart.mark_text(
    text='mean', angle=270, align='left', baseline='bottom', opacity=0.5, color="blue"
).encode(
    x='mean(' + col_x + '):Q', y=alt.value(490)
).transform_filter(
    selection_dropdown 
).transform_filter(
    selection1 
)

ymean = chart.mark_rule().encode(
    y='mean(' + col_y + '):Q',
    size=alt.value(0.5),
    color=alt.value('blue')
).transform_filter(
    selection_dropdown 
).transform_filter(
    selection1 
)

text_ymean = chart.mark_text(
    text='mean', angle=0, align='left', baseline='bottom', opacity=0.5, color="blue"
).encode(
    x=alt.value(10), y='mean(' + col_y + '):Q'
).transform_filter(
    selection_dropdown 
).transform_filter(
    selection1 
)

df = pd.DataFrame({
    col_x: range(min(int(np.floor(min(reshaped_df[col_x]))),
                     int(np.floor(min(reshaped_df[col_y])))),
                 max(int(np.ceil(max(reshaped_df[col_x])+1)),
                     int(np.floor(max(reshaped_df[col_y]))))),
    col_y: range(min(int(np.floor(min(reshaped_df[col_x]))),
                     int(np.floor(min(reshaped_df[col_y])))),
                 max(int(np.ceil(max(reshaped_df[col_x])+1)),
                     int(np.floor(max(reshaped_df[col_y])))))
})

others = alt.Chart(df).mark_line(strokeDash=[10,5]).encode(
    alt.X(col_x),
    alt.Y(col_y),
    size=alt.value(0.5)
)


## combine charts

final_chart = ((scatter+xmean+text_xmean+ymean+text_ymean+others) | (chart2+ymean) ) & (chart3+xmean)

final_chart.properties(
    title='Happiness Index Comparison'
).configure_title(
    fontSize=20,
    anchor='middle',
    color='black',
    dy=-20
)

In [5]:
#hide
altair_data = copy.deepcopy(happiness_data_yearly)


### data manipulation for chart
reshaped_df_2 = altair_data[['Country','Region','2015','2016','2019']]
reshaped_df_2 = reshaped_df_2.melt(id_vars=['Country','Region'])
reshaped_df_2 = reshaped_df_2.rename(columns={'variable':'X-Axis',
                                          'value':'Selected Year'})


### start drawing chart
chart = alt.Chart(reshaped_df_2)


    ## dropdown
select_box = alt.binding_select(options=list(reshaped_df_2['X-Axis'].unique()))
selection_dropdown = alt.selection_single(name='Select Year for', fields=['X-Axis'], 
                                          bind=select_box,
                                          init={'X-Axis': '2015'})

selection1 = alt.selection_multi(fields=['Region'], bind='legend')

chart.mark_line().encode(
    x="X-Axis:N", 
    y="rank:O", 
    color = alt.condition(selection1, "Region:N", alt.value('lightgray'),scale=alt.Scale(scheme='tableau20')), 
    detail = "Country:N",
    opacity=alt.condition(selection1, alt.value(1), alt.value(0.1))
).transform_window(
    rank="rank()",
    sort=[
        alt.SortField("Selected Year", order="descending")
    ],
    groupby=["X-Axis"],
).add_selection(
    selection_dropdown,
    selection1
).properties(
    height=500,
    width=500
)



### Sources and Reference

Dataset: https://www.kaggle.com/unsdsn/world-happiness/data