In [1]:
#Import libraries
import panel as pn
pn.extension('plotly')
import plotly.express as px
import pandas as pd
import numpy as np
import hvplot.pandas
import matplotlib.pyplot as plt
import os
from pathlib import Path
from dotenv import load_dotenv


In [2]:
# Read the Mapbox API key
load_dotenv()
mapbox_api = os.getenv('mapbox')
px.set_mapbox_access_token(mapbox_api)

In [3]:
#Load Data
hdi_file_path = Path('Resources/HDI.csv')
hdi_df = pd.read_csv(hdi_file_path, index_col='HDI Rank')
gps_file_path = Path('Resources/country-coordinates.csv')
gps_df = pd.read_csv(gps_file_path, index_col='country')

In [4]:
#Check nulls
hdi_df.isnull().sum()

Country    0
1990       0
1991       0
1992       0
1993       0
1994       0
1995       0
1996       0
1997       0
1998       0
1999       0
2000       0
2001       0
2002       0
2003       0
2004       0
2005       0
2006       0
2007       0
2008       0
2009       0
2010       0
2011       0
2012       0
2013       0
2014       0
2015       0
2016       0
2017       0
2018       0
2019       0
dtype: int64

In [5]:
#Check nulls
gps_df.isnull().sum()

country_code             1
latitude                 1
longitude                1
usa_state_code         193
usa_state_latitude     193
usa_state_longitude    193
usa_state              193
dtype: int64

In [6]:
#Check head
hdi_df.head()

Unnamed: 0_level_0,Country,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
HDI Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
169,Afghanistan,0.302,0.307,0.316,0.312,0.307,0.331,0.335,0.339,0.344,...,0.472,0.477,0.489,0.496,0.5,0.5,0.502,0.506,0.509,0.511
69,Albania,0.65,0.631,0.615,0.618,0.624,0.637,0.646,0.645,0.655,...,0.745,0.764,0.775,0.782,0.787,0.788,0.788,0.79,0.792,0.795
91,Algeria,0.572,0.576,0.582,0.586,0.59,0.595,0.602,0.611,0.621,...,0.721,0.728,0.728,0.729,0.736,0.74,0.743,0.745,0.746,0.748
36,Andorra,..,..,..,..,..,..,..,..,..,...,0.837,0.836,0.858,0.856,0.863,0.862,0.866,0.863,0.867,0.868
148,Angola,..,..,..,..,..,..,..,..,..,...,0.517,0.533,0.544,0.555,0.565,0.572,0.578,0.582,0.582,0.581


In [7]:
#Check head
gps_df.head()

Unnamed: 0_level_0,country_code,latitude,longitude,usa_state_code,usa_state_latitude,usa_state_longitude,usa_state
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Andorra,AD,42.546245,1.601554,AK,63.588753,-154.493062,Alaska
United Arab Emirates,AE,23.424076,53.847818,AL,32.318231,-86.902298,Alabama
Afghanistan,AF,33.93911,67.709953,AR,35.20105,-91.831833,Arkansas
Antigua and Barbuda,AG,17.060816,-61.796428,AZ,34.048928,-111.093731,Arizona
Anguilla,AI,18.220554,-63.068615,CA,36.778261,-119.417932,California


In [8]:
#Reset index
hdi_df.reset_index(inplace=True)

In [9]:
#Set index to column "Country"
hdi_df.set_index("Country", inplace=True)

In [10]:
gps_df.reset_index(inplace=True)
gps_df.rename(columns={'country': 'Country'}, inplace=True)

In [11]:
gps_df.set_index("Country", inplace=True)
gps_df.head()

Unnamed: 0_level_0,country_code,latitude,longitude,usa_state_code,usa_state_latitude,usa_state_longitude,usa_state
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Andorra,AD,42.546245,1.601554,AK,63.588753,-154.493062,Alaska
United Arab Emirates,AE,23.424076,53.847818,AL,32.318231,-86.902298,Alabama
Afghanistan,AF,33.93911,67.709953,AR,35.20105,-91.831833,Arkansas
Antigua and Barbuda,AG,17.060816,-61.796428,AZ,34.048928,-111.093731,Arizona
Anguilla,AI,18.220554,-63.068615,CA,36.778261,-119.417932,California


In [12]:
gps_df = gps_df.drop(['country_code', 'usa_state_code', 'usa_state_latitude', 'usa_state_longitude', 'usa_state'], axis=1)

In [13]:
gps_df.head()

Unnamed: 0_level_0,latitude,longitude
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Andorra,42.546245,1.601554
United Arab Emirates,23.424076,53.847818
Afghanistan,33.93911,67.709953
Antigua and Barbuda,17.060816,-61.796428
Anguilla,18.220554,-63.068615


In [14]:
#Concatenate dataframes
hdi_gps_df = pd.concat([hdi_df, gps_df], axis='columns')
hdi_gps_df.head()

Unnamed: 0_level_0,HDI Rank,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2012,2013,2014,2015,2016,2017,2018,2019,latitude,longitude
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,169.0,0.302,0.307,0.316,0.312,0.307,0.331,0.335,0.339,0.344,...,0.489,0.496,0.5,0.5,0.502,0.506,0.509,0.511,33.93911,67.709953
Albania,69.0,0.65,0.631,0.615,0.618,0.624,0.637,0.646,0.645,0.655,...,0.775,0.782,0.787,0.788,0.788,0.79,0.792,0.795,41.153332,20.168331
Algeria,91.0,0.572,0.576,0.582,0.586,0.59,0.595,0.602,0.611,0.621,...,0.728,0.729,0.736,0.74,0.743,0.745,0.746,0.748,28.033886,1.659626
Andorra,36.0,..,..,..,..,..,..,..,..,..,...,0.858,0.856,0.863,0.862,0.866,0.863,0.867,0.868,42.546245,1.601554
Angola,148.0,..,..,..,..,..,..,..,..,..,...,0.544,0.555,0.565,0.572,0.578,0.582,0.582,0.581,-11.202692,17.873887


In [15]:
 #Drop Countries excluded from HDI data and check head
 hdi_gps_df.drop(hdi_gps_df.index[189:], inplace=True)
 hdi_gps_df.head()

Unnamed: 0_level_0,HDI Rank,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2012,2013,2014,2015,2016,2017,2018,2019,latitude,longitude
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,169.0,0.302,0.307,0.316,0.312,0.307,0.331,0.335,0.339,0.344,...,0.489,0.496,0.5,0.5,0.502,0.506,0.509,0.511,33.93911,67.709953
Albania,69.0,0.65,0.631,0.615,0.618,0.624,0.637,0.646,0.645,0.655,...,0.775,0.782,0.787,0.788,0.788,0.79,0.792,0.795,41.153332,20.168331
Algeria,91.0,0.572,0.576,0.582,0.586,0.59,0.595,0.602,0.611,0.621,...,0.728,0.729,0.736,0.74,0.743,0.745,0.746,0.748,28.033886,1.659626
Andorra,36.0,..,..,..,..,..,..,..,..,..,...,0.858,0.856,0.863,0.862,0.866,0.863,0.867,0.868,42.546245,1.601554
Angola,148.0,..,..,..,..,..,..,..,..,..,...,0.544,0.555,0.565,0.572,0.578,0.582,0.582,0.581,-11.202692,17.873887


In [16]:
#Check tail
hdi_gps_df.iloc[184]

HDI Rank     113.0
1990         0.644
1991         0.654
1992          0.66
1993         0.662
1994         0.662
1995         0.666
1996         0.668
1997          0.67
1998         0.672
1999         0.674
2000         0.676
2001         0.684
2002         0.692
2003         0.692
2004         0.704
2005         0.718
2006         0.731
2007         0.748
2008         0.757
2009         0.756
2010         0.757
2011         0.769
2012         0.772
2013         0.777
2014         0.775
2015         0.769
2016         0.759
2017         0.743
2018         0.733
2019         0.711
latitude       NaN
longitude      NaN
Name: Venezuela (Bolivarian Republic of), dtype: object

In [17]:
hdi_df = hdi_gps_df.drop(columns=['latitude', 'longitude'])

In [18]:
hdi_df.reset_index(inplace=True)

In [19]:
hdi_df.tail()

Unnamed: 0,Country,HDI Rank,1990,1991,1992,1993,1994,1995,1996,1997,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
184,Venezuela (Bolivarian Republic of),113.0,0.644,0.654,0.66,0.662,0.662,0.666,0.668,0.67,...,0.757,0.769,0.772,0.777,0.775,0.769,0.759,0.743,0.733,0.711
185,Viet Nam,117.0,0.483,0.493,0.504,0.514,0.525,0.537,0.548,0.547,...,0.661,0.671,0.676,0.681,0.683,0.688,0.693,0.696,0.7,0.704
186,Yemen,179.0,0.401,0.401,0.404,0.406,0.408,0.414,0.421,0.426,...,0.506,0.506,0.504,0.509,0.502,0.483,0.474,0.467,0.468,0.47
187,Zambia,146.0,0.421,0.417,0.416,0.419,0.414,0.415,0.416,0.416,...,0.527,0.534,0.549,0.557,0.561,0.569,0.571,0.578,0.582,0.584
188,Zimbabwe,150.0,0.478,0.481,0.467,0.463,0.46,0.453,0.453,0.447,...,0.482,0.499,0.525,0.537,0.547,0.553,0.558,0.563,0.569,0.571


In [20]:
# Check values for '..'
hdi_df.iloc[:,2:].isin(['..']).sum()

1990    45
1991    45
1992    45
1993    45
1994    45
1995    41
1996    41
1997    41
1998    41
1999    38
2000    15
2001    15
2002    14
2003    13
2004    11
2005     4
2006     3
2007     3
2008     3
2009     3
2010     1
2011     1
2012     1
2013     1
2014     1
2015     1
2016     1
2017     0
2018     0
2019     0
dtype: int64

In [21]:
# Check data type of '..'
type(hdi_df['1990'].values[3])

str

In [22]:
hdi_df.iloc[:,2:] = hdi_df.iloc[:,2:].replace('..', np.nan)

In [23]:
hdi_df.iloc[:,2:].isnull().sum()

1990    45
1991    45
1992    45
1993    45
1994    45
1995    41
1996    41
1997    41
1998    41
1999    38
2000    15
2001    15
2002    14
2003    13
2004    11
2005     4
2006     3
2007     3
2008     3
2009     3
2010     1
2011     1
2012     1
2013     1
2014     1
2015     1
2016     1
2017     0
2018     0
2019     0
dtype: int64

In [24]:
# Transform object data types to float
hdi_df.iloc[:,2:] = hdi_df.iloc[:,2:].astype(float)

In [25]:
# Check data type
type(hdi_df['1990'].values[4])

numpy.float64

In [26]:
# Calculate HDI Average
hdi_mean = hdi_df.iloc[:,2:].mean()

In [27]:
# Check data type
type(hdi_mean)

pandas.core.series.Series

In [28]:
# Transform series to DataFrame
hdi_mean = pd.DataFrame(hdi_mean)

In [29]:
hdi_mean.reset_index(inplace=True)

In [30]:
hdi_mean.columns = ['Year', 'HDI Average']

In [31]:
hdi_mean.head()

Unnamed: 0,Year,HDI Average
0,1990,0.599653
1,1991,0.601854
2,1992,0.604187
3,1993,0.608229
4,1994,0.612347


In [32]:
# Plot HDI average
hdi_average_plot = px.line(
    hdi_mean,
    x = 'Year',
    y = 'HDI Average',
    title='HDI Average'
)

In [33]:
hdi_average_plot

In [34]:
hdi_df

Unnamed: 0,Country,HDI Rank,1990,1991,1992,1993,1994,1995,1996,1997,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,Afghanistan,169.0,0.302,0.307,0.316,0.312,0.307,0.331,0.335,0.339,...,0.472,0.477,0.489,0.496,0.500,0.500,0.502,0.506,0.509,0.511
1,Albania,69.0,0.650,0.631,0.615,0.618,0.624,0.637,0.646,0.645,...,0.745,0.764,0.775,0.782,0.787,0.788,0.788,0.790,0.792,0.795
2,Algeria,91.0,0.572,0.576,0.582,0.586,0.590,0.595,0.602,0.611,...,0.721,0.728,0.728,0.729,0.736,0.740,0.743,0.745,0.746,0.748
3,Andorra,36.0,,,,,,,,,...,0.837,0.836,0.858,0.856,0.863,0.862,0.866,0.863,0.867,0.868
4,Angola,148.0,,,,,,,,,...,0.517,0.533,0.544,0.555,0.565,0.572,0.578,0.582,0.582,0.581
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
184,Venezuela (Bolivarian Republic of),113.0,0.644,0.654,0.660,0.662,0.662,0.666,0.668,0.670,...,0.757,0.769,0.772,0.777,0.775,0.769,0.759,0.743,0.733,0.711
185,Viet Nam,117.0,0.483,0.493,0.504,0.514,0.525,0.537,0.548,0.547,...,0.661,0.671,0.676,0.681,0.683,0.688,0.693,0.696,0.700,0.704
186,Yemen,179.0,0.401,0.401,0.404,0.406,0.408,0.414,0.421,0.426,...,0.506,0.506,0.504,0.509,0.502,0.483,0.474,0.467,0.468,0.470
187,Zambia,146.0,0.421,0.417,0.416,0.419,0.414,0.415,0.416,0.416,...,0.527,0.534,0.549,0.557,0.561,0.569,0.571,0.578,0.582,0.584


In [35]:
canada_hdi = pd.DataFrame(hdi_df.iloc[31])

In [36]:
# Check DataFrame
canada_hdi.index

Index(['Country', 'HDI Rank', '1990', '1991', '1992', '1993', '1994', '1995',
       '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
       '2014', '2015', '2016', '2017', '2018', '2019'],
      dtype='object')

In [37]:
# Drop Country and HDI rows
canada_hdi = canada_hdi.drop(canada_hdi.index[0])
canada_hdi = canada_hdi.drop(canada_hdi.index[0])

In [38]:
# Reset Index
canada_hdi.reset_index(inplace=True)

In [39]:
# Set Column Names
canada_hdi.columns = ['Year', 'HDI Score']

In [40]:
# Assign plot variable
canada_plot = px.line(
    canada_hdi,
    x = 'Year',
    y = 'HDI Score',
    title='HDI Score - Canada'
)

In [41]:
# Plot HDI Average - Canada
canada_plot

In [43]:
# Create DataFrames for select nations
usa_hdi = pd.DataFrame(hdi_df.iloc[180])
uk_hdi = pd.DataFrame(hdi_df.iloc[179])
aus_hdi = pd.DataFrame(hdi_df.iloc[8])
fr_hdi = pd.DataFrame(hdi_df.iloc[60])
ger_hdi = pd.DataFrame(hdi_df.iloc[64])
jpn_hdi = pd.DataFrame(hdi_df.iloc[85])
mex_hdi = pd.DataFrame(hdi_df.iloc[111])

In [48]:
# Drop HDI Ranks and Country rows
usa_hdi = usa_hdi.drop(usa_hdi.index[0])
usa_hdi = usa_hdi.drop(usa_hdi.index[0])
uk_hdi = uk_hdi.drop(uk_hdi.index[0])
uk_hdi = uk_hdi.drop(uk_hdi.index[0])
aus_hdi = aus_hdi.drop(aus_hdi.index[0])
aus_hdi = aus_hdi.drop(aus_hdi.index[0])
fr_hdi = fr_hdi.drop(fr_hdi.index[0])
fr_hdi = fr_hdi.drop(fr_hdi.index[0])
ger_hdi = ger_hdi.drop(ger_hdi.index[0])
ger_hdi = ger_hdi.drop(ger_hdi.index[0])
jpn_hdi = jpn_hdi.drop(jpn_hdi.index[0])
jpn_hdi = jpn_hdi.drop(jpn_hdi.index[0])
mex_hdi = mex_hdi.drop(mex_hdi.index[0])
mex_hdi = mex_hdi.drop(mex_hdi.index[0])

In [50]:
# Reset Indexes
usa_hdi.reset_index(inplace=True)
uk_hdi.reset_index(inplace=True)
aus_hdi.reset_index(inplace=True)
fr_hdi.reset_index(inplace=True)
ger_hdi.reset_index(inplace=True)
jpn_hdi.reset_index(inplace=True)
mex_hdi.reset_index(inplace=True)

In [51]:
# Rename Columns
usa_hdi.columns = ['Year', 'HDI Score']
uk_hdi.columns = ['Year', 'HDI Score']
aus_hdi.columns = ['Year', 'HDI Score']
fr_hdi.columns = ['Year', 'HDI Score']
ger_hdi.columns = ['Year', 'HDI Score']
jpn_hdi.columns = ['Year', 'HDI Score']
mex_hdi.columns = ['Year', 'HDI Score']

In [56]:
# Assign variable for USA HDI plot
usa_plot = px.line(
    usa_hdi,
    x = 'Year',
    y = 'HDI Score',
    title='HDI Score - USA'
)

In [57]:
# Plot USA HDI
usa_plot

In [58]:
uk_plot = px.line(
    uk_hdi,
    x = 'Year',
    y = 'HDI Score',
    title='HDI Score - UK'
)

In [59]:
aus_plot = px.line(
    aus_hdi,
    x = 'Year',
    y = 'HDI Score',
    title='HDI Score - Australia'
)

In [60]:
fr_plot = px.line(
    fr_hdi,
    x = 'Year',
    y = 'HDI Score',
    title='HDI Score - France'
)

In [61]:
ger_plot = px.line(
    ger_hdi,
    x = 'Year',
    y = 'HDI Score',
    title='HDI Score - Germany'
)

In [62]:
jpn_plot = px.line(
    jpn_hdi,
    x = 'Year',
    y = 'HDI Score',
    title='HDI Score - Japan'
)

In [63]:
mex_plot = px.line(
    mex_hdi,
    x = 'Year',
    y = 'HDI Score',
    title='HDI Score - Mexico'
)

In [65]:
uk_plot

In [66]:
aus_plot

In [67]:
fr_plot

In [68]:
ger_plot

In [69]:
jpn_plot

In [70]:
mex_plot