In [1]:
import pandas as pd
import matplotlib.pyplot as plt

happy_df = pd.read_csv("happiness_df.csv")

def get_happiness_scores(country_name):
    mask = happy_df['Country'] == country_name
    happiness_scores = happy_df.loc[mask, 'Happiness Score'].tolist()
    return happiness_scores

def plot_country(happiness_scores):
    years = range(2018, 2023)
    plt.plot(years, happiness_scores)
    plt.xlabel('Year')
    plt.ylabel('Happiness Scores')
    plt.title('Change in Happiness from 2015 to 2022')
    plt.show()

In [2]:
# THIS IS PROPERLY ORDERING, MOST UP TO DATE

pivot = pd.DataFrame({'Country': happy_df['Country']})
for year in range(2015, 2023):
    pivot[str(year)] = pd.Series(dtype=float)

    
for i in range(len(pivot)):
    # get the name of the current country
    country = pivot.loc[i, 'Country']
    
    # loop through each year from 2015 to 2022
    for year in range(2015, 2023):
        # look up the happiness score for the current country in the happy_df dataframe for the current year
        row = happy_df.loc[(happy_df['Country'] == country) & (happy_df['Year'] == year), 'Happiness Score']
        if not row.empty:
            happiness_score = row.values[0]
            # fill in the corresponding cell in the pivot dataframe
            pivot.loc[i, str(year)] = happiness_score

pivot.drop_duplicates(subset=['Country'], inplace=True)
pivot.drop(index=pivot.index[-1], inplace=True)

# display the final dataframe
pivoted_happy = pivot

pivoted_happy

Unnamed: 0,Country,2015,2016,2017,2018,2019,2020,2021,2022
0,Finland,7.587,7.526,7.537,7.632,7.769,7.8087,7.842,7.821
1,Denmark,7.561,7.509,7.522,7.594,7.600,7.6456,7.620,7.636
2,Iceland,7.527,7.501,7.504,7.555,7.554,7.5599,7.571,7.557
3,Switzerland,7.522,7.498,7.494,7.495,7.494,7.5045,7.554,7.512
4,Netherlands,7.427,7.413,7.469,7.487,7.488,7.4880,7.464,7.415
...,...,...,...,...,...,...,...,...,...
141,Botswana*,3.904,3.856,3.766,3.795,3.973,3.7208,3.623,3.471
142,Rwanda*,3.896,3.832,3.657,3.774,3.933,3.6528,3.615,3.268
143,Zimbabwe,3.845,3.763,3.644,3.692,3.802,3.5733,3.600,2.995
144,Lebanon,3.819,3.739,3.603,3.632,3.775,3.5380,3.512,2.955


In [3]:
country_codes = pd.read_csv("country_codes.csv")
country_codes = country_codes.iloc[:, :3]

In [4]:
pivoted_happy['Country'] = pivoted_happy['Country'].str.replace('*', '')

  pivoted_happy['Country'] = pivoted_happy['Country'].str.replace('*', '')


In [5]:
# Use merge to join the two DataFrames on the "name" column
merged_df = pd.merge(pivoted_happy, country_codes[['name', 'alpha-3']], 
                     how='left', left_on='Country', right_on='name')

merged_df = merged_df.rename(columns={'alpha-3': 'Country Code'})
merged_df = merged_df.drop(columns=['name'])

# Save the modified DataFrame back to the original variable
pivoted_happy = merged_df

In [6]:
nan_rows = pivoted_happy[pd.isna(pivoted_happy['Country Code'])]
print(nan_rows)

                       Country   2015   2016   2017   2018   2019    2020  \
15               United States  6.983  6.994  6.951  6.927  7.021  7.0937   
16              United Kingdom  6.946  6.952  6.891  6.910  6.985  7.0758   
25    Taiwan Province of China  6.750  6.650  6.572  6.441  6.444  6.4401   
31                      Kosovo  6.485  6.478  6.424  6.374  6.300  6.3756   
58                 South Korea  5.813  5.822  5.822  5.810  5.860  5.9109   
61                     Moldova  5.759  5.771  5.758  5.752  5.758  5.8708   
70                     Bolivia  5.477  5.528  5.472  5.524  5.529  5.5557   
76                     Vietnam  5.286  5.401  5.293  5.410  5.425  5.5150   
78                North Cyprus  5.253  5.314  5.273  5.358  5.373  5.5047   
79                      Russia  5.212  5.303  5.269  5.358  5.339  5.4888   
80   Hong Kong S.A.R. of China  5.194  5.291  5.262  5.347  5.323  5.4562   
87                 Ivory Coast  5.102  5.161  5.225  5.199  5.211  5.1944   

In [7]:
# manually adding all country codes where the ISO uses different names than happiness

pivoted_happy.loc[pivoted_happy['Country'] == 'United States', 'Country Code'] = 'USA'
pivoted_happy.loc[pivoted_happy['Country'] == 'Russia', 'Country Code'] = 'RUS'
pivoted_happy.loc[pivoted_happy['Country'] == 'United Kingdom', 'Country Code'] = 'GBR'
pivoted_happy.loc[pivoted_happy['Country'] == 'Taiwan Province of China', 'Country Code'] = 'TWN'
pivoted_happy.loc[pivoted_happy['Country'] == 'South Korea', 'Country Code'] = 'KOR'
pivoted_happy.loc[pivoted_happy['Country'] == 'Bolivia', 'Country Code'] = 'BOL'
pivoted_happy.loc[pivoted_happy['Country'] == 'Vietnam', 'Country Code'] = 'VNM'
pivoted_happy.loc[pivoted_happy['Country'] == 'North Cyprus', 'Country Code'] = 'CYP'
pivoted_happy.loc[pivoted_happy['Country'] == 'Hong Kong S.A.R. of China', 'Country Code'] = 'HKG'
pivoted_happy.loc[pivoted_happy['Country'] == 'Ivory Coast', 'Country Code'] = 'CIV'
pivoted_happy.loc[pivoted_happy['Country'] == 'Laos', 'Country Code'] = 'LAO'
pivoted_happy.loc[pivoted_happy['Country'] == 'Venezuela', 'Country Code'] = 'VEN'
pivoted_happy.loc[pivoted_happy['Country'] == 'Iran', 'Country Code'] = 'IRN'
pivoted_happy.loc[pivoted_happy['Country'] == 'Tanzania', 'Country Code'] = 'TZA'

In [9]:
import plotly.express as px

df_2022 = pivoted_happy[['Country', 'Country Code', '2022']]
color_scale = ["#8B0000", "#FF4500", "#FFA500", "#FFFF00"]

# Create a choropleth map using Plotly Express
fig = px.choropleth(df_2022, 
                    locations='Country Code', 
                    locationmode='ISO-3',
                    color='2022', 
                    hover_name='Country',
                    color_continuous_scale=color_scale, 
                    title='Happiness Scores by Country in 2022')

# Display the map
fig.show()

In [19]:
# Calculate the difference between 2022 and 2015 happiness scores
pivoted_happy['Happiness Score Difference'] = pivoted_happy['2022'] - pivoted_happy['2019']

In [20]:
def get_largest_9():
    largest = pivoted_happy.nlargest(9, 'Happiness Score Difference')
    return largest['Country'].tolist()
    
def get_smallest_9():
    smallest = pivoted_happy.nsmallest(9, 'Happiness Score Difference')
    return smallest['Country'].tolist()

In [24]:
#pivoted_happy is a cleaned df with country's happiness by year from WHR
pivoted_happy['Happiness Score Difference'] = pivoted_happy['2022'] - pivoted_happy['2019']
#creating a more specific dp for our map to lookup values in
df_2022 = pivoted_happy[['Country', 'Country Code', 'Happiness Score Difference']]

#color scale
norm = plt.Normalize(vmin=-1.377, vmax=0.247)
color_scale = [[norm(-1.377), "#8B0000"], 
               [norm(-0.5), "#FF4500"], 
               [norm(0), "#FFA500"], 
               [norm(0.247), "#FFFF00"]]

# Creating a choropleth map
fig = px.choropleth(df_2022, 
                    locations='Country Code', # needed to use country code to have choropleth know countries to shade
                    locationmode='ISO-3', # locations are a built in feature of plotly express, just need to link data
                    color='Happiness Score Difference', 
                    hover_name='Country',
                    color_continuous_scale=color_scale, 
                    title='Happiness Score Difference from 2022 to 2019')

# Display the map
fig.show()

In [88]:
get_smallest_9()

['Afghanistan',
 'Lebanon',
 'Zimbabwe',
 'Rwanda',
 'Botswana',
 'Lesotho',
 'Sierra Leone',
 'India',
 'Tanzania']

In [89]:
get_largest_9()

['Russia',
 'Bulgaria',
 'Nepal',
 'Finland',
 'Hong Kong S.A.R. of China',
 'Liberia',
 'North Cyprus',
 'Venezuela',
 'Armenia']

In [90]:
pivoted_happy

Unnamed: 0,Country,2015,2016,2017,2018,2019,2020,2021,2022,Country Code,Happiness Score Difference
0,Finland,7.587,7.526,7.537,7.632,7.769,7.8087,7.842,7.821,FIN,0.234
1,Denmark,7.561,7.509,7.522,7.594,7.600,7.6456,7.620,7.636,DNK,0.075
2,Iceland,7.527,7.501,7.504,7.555,7.554,7.5599,7.571,7.557,ISL,0.030
3,Switzerland,7.522,7.498,7.494,7.495,7.494,7.5045,7.554,7.512,CHE,-0.010
4,Netherlands,7.427,7.413,7.469,7.487,7.488,7.4880,7.464,7.415,NLD,-0.012
...,...,...,...,...,...,...,...,...,...,...,...
141,Botswana,3.904,3.856,3.766,3.795,3.973,3.7208,3.623,3.471,BWA,-0.433
142,Rwanda,3.896,3.832,3.657,3.774,3.933,3.6528,3.615,3.268,RWA,-0.628
143,Zimbabwe,3.845,3.763,3.644,3.692,3.802,3.5733,3.600,2.995,ZWE,-0.850
144,Lebanon,3.819,3.739,3.603,3.632,3.775,3.5380,3.512,2.955,LBN,-0.864


In [91]:
# Adding continents for happy_df
df_2015 = pd.read_csv("2015.csv")
df_2015 = df_2015.iloc[:, :2]
#df_2015["Region"] = df_2015["Region"].replace("Sub-Saharan Africa", "Africa")
#df_2015["Region"] = df_2015["Region"].replace("Western Europe", "Europe")
df_2015

Unnamed: 0,Country,Region
0,Switzerland,Western Europe
1,Iceland,Western Europe
2,Denmark,Western Europe
3,Norway,Western Europe
4,Canada,North America
...,...,...
153,Rwanda,Sub-Saharan Africa
154,Benin,Sub-Saharan Africa
155,Syria,Middle East and Northern Africa
156,Burundi,Sub-Saharan Africa


In [92]:
region_map = df_2015.set_index('Country')['Region'].to_dict()

# create a new column in the happy_df dataframe that maps country names to region values
pivoted_happy['Region'] = pivoted_happy['Country'].map(region_map)

In [93]:
pivoted_happy

Unnamed: 0,Country,2015,2016,2017,2018,2019,2020,2021,2022,Country Code,Happiness Score Difference,Region
0,Finland,7.587,7.526,7.537,7.632,7.769,7.8087,7.842,7.821,FIN,0.234,Western Europe
1,Denmark,7.561,7.509,7.522,7.594,7.600,7.6456,7.620,7.636,DNK,0.075,Western Europe
2,Iceland,7.527,7.501,7.504,7.555,7.554,7.5599,7.571,7.557,ISL,0.030,Western Europe
3,Switzerland,7.522,7.498,7.494,7.495,7.494,7.5045,7.554,7.512,CHE,-0.010,Western Europe
4,Netherlands,7.427,7.413,7.469,7.487,7.488,7.4880,7.464,7.415,NLD,-0.012,Western Europe
...,...,...,...,...,...,...,...,...,...,...,...,...
141,Botswana,3.904,3.856,3.766,3.795,3.973,3.7208,3.623,3.471,BWA,-0.433,Sub-Saharan Africa
142,Rwanda,3.896,3.832,3.657,3.774,3.933,3.6528,3.615,3.268,RWA,-0.628,Sub-Saharan Africa
143,Zimbabwe,3.845,3.763,3.644,3.692,3.802,3.5733,3.600,2.995,ZWE,-0.850,Sub-Saharan Africa
144,Lebanon,3.819,3.739,3.603,3.632,3.775,3.5380,3.512,2.955,LBN,-0.864,Middle East and Northern Africa


In [117]:
num_regions = pivoted_happy['Region'].nunique()
print("Number of unique regions:", num_regions)
regions = pivoted_happy['Region'].unique()
print(regions)

Number of unique regions: 7
['Western Europe' 'Middle East and Northern Africa' 'Oceania'
 'North America' 'Central and Eastern Europe' 'Asia' 'Sub-Saharan Africa']


In [110]:
mask = pivoted_happy['Region'].isnull()
rows_with_nan = pivoted_happy[mask]
print(rows_with_nan)

Empty DataFrame
Columns: [Country, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, Country Code, Happiness Score Difference, Region]
Index: []


In [116]:
pivoted_happy.loc[pivoted_happy['Country'] == 'Czechia', 'Region'] = 'Central and Eastern Europe'
pivoted_happy.loc[pivoted_happy['Country'] == 'Taiwan Province of China', 'Region'] = 'Eastern Asia'
pivoted_happy.loc[pivoted_happy['Country'] == 'Hong Kong S.A.R. of China', 'Region'] = 'Eastern Asia'
pivoted_happy.loc[pivoted_happy['Country'] == 'North Macedonia', 'Region'] = 'Central and Eastern Europe'
pivoted_happy.loc[pivoted_happy['Country'] == 'Gambia', 'Region'] = 'Middle East and Northern Africa'
pivoted_happy.loc[pivoted_happy['Country'] == 'Congo', 'Region'] = 'Sub-Saharan Africa'
pivoted_happy.loc[pivoted_happy['Country'] == 'Namibia', 'Region'] = 'Sub-Saharan Africa'
pivoted_happy.loc[pivoted_happy['Country'] == 'Eswatini, Kingdom of', 'Region'] = 'Sub-Saharan Africa'

pivoted_happy['Region'] = pivoted_happy['Region'].replace('Australia and New Zealand', 'Oceania')
pivoted_happy['Region'] = pivoted_happy['Region'].replace('Southeastern Asia', 'Asia')
pivoted_happy['Region'] = pivoted_happy['Region'].replace('Southern Asia', 'Asia')
pivoted_happy['Region'] = pivoted_happy['Region'].replace('Eastern Asia', 'Asia')
pivoted_happy['Region'] = pivoted_happy['Region'].replace('Latin America and Caribbean', 'North America')

In [133]:
oceania_data = pivoted_happy[pivoted_happy['Region'] == 'Oceania']
print(oceania_data)


        Country   2015   2016   2017   2018   2019    2020   2021   2022  \
9   New Zealand  7.284  7.291  7.284  7.272  7.246  7.2375  7.268  7.200   
11    Australia  7.226  7.119  7.079  7.139  7.167  7.2228  7.157  7.162   

   Country Code  Happiness Score Difference   Region  
9           NZL                      -0.084  Oceania  
11          AUS                      -0.064  Oceania  


In [135]:
oceania_data = pivoted_happy[pivoted_happy['Region'] == 'Oceania']
oceania_size = oceania_data['2022'].mean() * 500

# add nodes to the graph
for country in oceania_data['Country']:
    if country not in pos:
        pos[country] = nx.spring_layout(G, k=0.3, iterations=50)[country]

# add edges to the graph
for country in oceania_data['Country']:
    G.add_edge('Oceania', country, weight=oceania_size)

# set node colors and sizes
node_colors = [blues(i/len(regions)) for i in range(len(regions))]
node_sizes = [pivoted_happy[pivoted_happy['Region'] == region]['2022'].mean() * 500 for region in regions]

# draw the graph
fig, ax = plt.subplots(figsize=(10, 10))
nx.draw(G, pos, node_color=node_colors, node_size=node_sizes, with_labels=True, font_size=10, font_weight='bold', ax=ax)

# add edge labels
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8, font_color='gray')

# set axis limits
plt.xlim(-1.5, 1.5)
plt.ylim(-1.5, 1.5)

# show the plot
plt.show()


KeyError: 'New Zealand'