In [122]:
# imports

import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import folium

# Collect Data

## LPI

In [76]:
LPI_data = pd.read_csv('LPI.csv')
LPI = LPI_data[['Economy', 'Mean LPI score, 2012–18']].copy()
LPI.columns = ['country', 'LPI']

In [77]:
LPI.head()

Unnamed: 0,country,LPI
0,Germany,4.19
1,Netherlands,4.07
2,Sweden,4.07
3,Belgium,4.05
4,Singapore,4.05


## Economic and Social Globalization

In [78]:
KOFGl = pd.read_csv('KOFGI_2022_public.csv')
years_to_select = [2012, 2013, 2014, 2015, 2016, 2017, 2018]
KOFESGl = KOFGl[KOFGl['year'].isin(years_to_select)][['country', 'KOFSoGI ', 'KOFEcGI']].copy()

In [79]:
KOFESGl.head()

Unnamed: 0,country,KOFSoGI,KOFEcGI
42,Aruba,85.0,64.0
43,Aruba,84.0,60.0
44,Aruba,83.0,59.0
45,Aruba,83.0,59.0
46,Aruba,83.0,59.0


In [80]:
# Compute average for KOFSoGI and KOFEcGI by country
averaged_df = KOFESGl.groupby('country')[['KOFSoGI ', 'KOFEcGI']].mean().reset_index()

averaged_df.head()

Unnamed: 0,country,KOFSoGI,KOFEcGI
0,Afghanistan,35.714286,33.142857
1,Albania,67.714286,64.857143
2,Algeria,50.857143,38.571429
3,Andorra,86.285714,
4,Angola,37.142857,46.285714


In [81]:
averaged_df[averaged_df.isnull().any(axis=1)]

Unnamed: 0,country,KOFSoGI,KOFEcGI
3,Andorra,86.285714,
46,Cuba,49.142857,
63,Faroe Islands,82.857143,
67,French Polynesia,72.714286,
74,Greenland,74.285714,
76,Guam,,
100,"Korea, Dem People’s Rep",,
111,Liechtenstein,86.857143,
130,Monaco,88.285714,
157,Puerto Rico,75.857143,


## GNI

In [89]:
GNI_data = pd.read_csv('GNI.csv')
GNI_data.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Aruba,ABW,"GNI per capita, Atlas method (current US$)",NY.GNP.PCAP.CD,,,,,,,...,25500.0,25790.0,26180.0,26650.0,27720.0,29310.0,30330.0,24840.0,29460.0,
1,Africa Eastern and Southern,AFE,"GNI per capita, Atlas method (current US$)",NY.GNP.PCAP.CD,,,,,,,...,1716.468305,1696.462899,1579.295342,1460.609129,1443.996026,1456.396534,1494.589953,1388.531225,1461.388019,1542.260985
2,Afghanistan,AFG,"GNI per capita, Atlas method (current US$)",NY.GNP.PCAP.CD,,,,,,,...,680.0,650.0,610.0,570.0,540.0,520.0,530.0,500.0,390.0,
3,Africa Western and Central,AFW,"GNI per capita, Atlas method (current US$)",NY.GNP.PCAP.CD,,,113.080093,121.446289,127.515314,132.486065,...,1928.166481,2101.336153,2009.488614,1776.140878,1590.799458,1606.960024,1681.824079,1664.742718,1727.603209,1779.43139
4,Angola,AGO,"GNI per capita, Atlas method (current US$)",NY.GNP.PCAP.CD,,,,,,,...,4650.0,4800.0,3880.0,2450.0,1980.0,1870.0,2040.0,1690.0,1650.0,1900.0


In [90]:
GNI_data = GNI_data.rename(columns={'Country Name': 'Country'})

# Extract the columns for the years and calculate the mean across them
GNI_data['GNI'] = GNI_data[['2012', '2013', '2014', '2015', '2016', '2017', '2018']].mean(axis=1)

# Extract only the 'Country' and 'GNI' columns
GNI_avg = GNI_data[['Country', 'GNI']]



                         Country           GNI
0                          Aruba  26512.857143
1    Africa Eastern and Southern   1576.800836
2                    Afghanistan    602.857143
3     Africa Western and Central   1825.693292
4                         Angola   3384.285714
..                           ...           ...
261                       Kosovo   3908.571429
262                  Yemen, Rep.   1128.571429
263                 South Africa   6937.142857
264                       Zambia   1527.142857
265                     Zimbabwe   1238.571429

[266 rows x 2 columns]


In [91]:
GNI_avg.head()

Unnamed: 0,Country,GNI
0,Aruba,26512.857143
1,Africa Eastern and Southern,1576.800836
2,Afghanistan,602.857143
3,Africa Western and Central,1825.693292
4,Angola,3384.285714


## Combining data

In [92]:

concatenated_df = pd.concat([LPI[['country', 'LPI']], averaged_df['KOFSoGI '], averaged_df['KOFEcGI'], GNI_avg['GNI']], axis=1)


In [95]:
LPI_indexed = LPI.set_index('country')
averaged_df_indexed = averaged_df.set_index('country')
GNI_avg_indexed = GNI_avg.set_index('Country')

concatenated_df = pd.concat([LPI_indexed['LPI'], averaged_df_indexed[['KOFSoGI ', 'KOFEcGI']], GNI_avg_indexed['GNI']], axis=1)

In [96]:
concatenated_df

Unnamed: 0,LPI,KOFSoGI,KOFEcGI,GNI
Germany,4.19,87.714286,79.428571,46104.285714
Netherlands,4.07,86.857143,88.714286,49655.714286
Sweden,4.07,89.285714,83.142857,57762.857143
Belgium,4.05,85.714286,88.142857,45771.428571
Singapore,4.05,87.285714,94.142857,54355.714286
...,...,...,...,...
Tuvalu,,,,5372.857143
St. Vincent and the Grenadines,,,,7354.285714
British Virgin Islands,,,,
Virgin Islands (U.S.),,,,


Note: In the interest of time, I've opted to exclude any rows with missing values and countries that did not match across datasets. For a more comprehensive analysis in the future, it would be beneficial to thoroughly clean the dataset. Discrepancies, especially those arising from variations in country name spellings (as observed with the 'SSI' dataset), should be systematically addressed to ensure accurate matching

In [114]:
cleaned_df = concatenated_df.dropna()
cleaned_df

Unnamed: 0,LPI,KOFSoGI,KOFEcGI,GNI
Germany,4.19,87.714286,79.428571,46104.285714
Netherlands,4.07,86.857143,88.714286,49655.714286
Sweden,4.07,89.285714,83.142857,57762.857143
Belgium,4.05,85.714286,88.142857,45771.428571
Singapore,4.05,87.285714,94.142857,54355.714286
...,...,...,...,...
Syrian Arab Republic,2.10,48.714286,33.142857,1175.714286
Sierra Leone,2.06,40.428571,40.857143,558.571429
Afghanistan,2.04,35.714286,33.142857,602.857143
Haiti,2.02,41.857143,37.000000,1395.714286


In [115]:
cleaned_df = cleaned_df.reset_index().rename(columns={'index': 'Country', 'KOFSoGI ': 'SCOG',
    'KOFEcGI': 'ECOG'})
cleaned_df

Unnamed: 0,Country,LPI,SCOG,ECOG,GNI
0,Germany,4.19,87.714286,79.428571,46104.285714
1,Netherlands,4.07,86.857143,88.714286,49655.714286
2,Sweden,4.07,89.285714,83.142857,57762.857143
3,Belgium,4.05,85.714286,88.142857,45771.428571
4,Singapore,4.05,87.285714,94.142857,54355.714286
...,...,...,...,...,...
147,Syrian Arab Republic,2.10,48.714286,33.142857,1175.714286
148,Sierra Leone,2.06,40.428571,40.857143,558.571429
149,Afghanistan,2.04,35.714286,33.142857,602.857143
150,Haiti,2.02,41.857143,37.000000,1395.714286


## Saving output

In [116]:
cleaned_df.to_csv('ITRI_var.csv', index=False)

# Modeling

In [117]:
cleaned_df.head()

Unnamed: 0,Country,LPI,SCOG,ECOG,GNI
0,Germany,4.19,87.714286,79.428571,46104.285714
1,Netherlands,4.07,86.857143,88.714286,49655.714286
2,Sweden,4.07,89.285714,83.142857,57762.857143
3,Belgium,4.05,85.714286,88.142857,45771.428571
4,Singapore,4.05,87.285714,94.142857,54355.714286


### Min-max scaling to account for different scales

In [118]:
# Min-Max scale function
def min_max_scale(series):
    return (series - series.min()) / (series.max() - series.min())

# Scale the columns
cleaned_df['GNI'] = min_max_scale(cleaned_df['GNI'])
cleaned_df['LPI'] = min_max_scale(cleaned_df['LPI'])
cleaned_df['ECOG'] = min_max_scale(cleaned_df['ECOG'])
cleaned_df['SCOG'] = min_max_scale(cleaned_df['SCOG'])

### Calculate ITRI for each country

In [120]:
# Assuming constants
gamma = 0.320
alpha = 0.349
beta = 0.331

# Calculate ITRI
cleaned_df['ITRI'] = gamma * cleaned_df['GNI'] + 2 / (1/alpha * cleaned_df['LPI'] + 1/(beta * (cleaned_df['ECOG'] + cleaned_df['SCOG'])))

In [121]:
cleaned_df.head()

Unnamed: 0,Country,LPI,SCOG,ECOG,GNI,ITRI
0,Germany,1.0,0.951435,0.777056,0.501234,0.593935
1,Netherlands,0.945205,0.93819,0.917749,0.540051,0.634055
2,Sweden,0.945205,0.975717,0.833333,0.628662,0.657965
3,Belgium,0.936073,0.92053,0.909091,0.497595,0.620762
4,Singapore,0.936073,0.944812,1.0,0.591422,0.661443


In [128]:
# visualise ITRI
scale_factor = 10
m = folium.Map(location=[45, -100], zoom_start=4)

geolocator = Nominatim(user_agent="geoapi")
for i, row in cleaned_df.iterrows():
    location = geolocator.geocode(row['Country'])
    
    # Skip countries with ITRI value of 0
    if row['ITRI'] == 0:
        continue

    if location:
        # Using logarithmic scaling for the radius to account for the skewed distribution
        radius = np.log(row['ITRI'] + 1) * scale_factor
        
        folium.CircleMarker(
            location=[location.latitude, location.longitude],
            radius=radius,
            popup=f"{row['Country']} - ITRI: {row['ITRI']}",
            fill=True,
            color="green",
            fill_color="green"
        ).add_to(m)

m.save("ITRI.html")

### Calculate RAI for each country

#### Import and combine SSI data

In [129]:
SSI_data = pd.read_csv('SSI.csv')
SSI_data.head()

Unnamed: 0,Country,Production,Imports,Exports,Consumption,Reserves,SSI,Population
0,Afghanistan,4.2,0.0,0.0,4.2,1.8,0.515021,41128771.0
1,Albania,2.04,0.0,0.0,2.04,0.2,0.195907,2775634.0
2,Algeria,3298.4,0.0,1697.6,1583.2,159.0,20.391625,44903225.0
3,American Samoa,0.0,0.0,0.0,0.0,0.0,0.0,44273.0
4,Angola,228.6,0.0,189.6,39.0,12.6,15.941837,35588987.0


In [130]:
merged_df = cleaned_df.merge(SSI_data[['Country', 'SSI', 'Population']], 
                             left_on='Country', 
                             right_on='Country', 
                             how='left')

In [139]:
merged_df['SSI'].describe()

count    141.000000
mean       5.198273
std       13.661319
min        0.000000
25%        0.000000
50%        0.089047
75%        6.700337
max      140.540180
Name: SSI, dtype: float64

In [141]:
merged_df_clean = merged_df.dropna().copy()
merged_df_clean.describe()

Unnamed: 0,LPI,SCOG,ECOG,GNI,ITRI,SSI,Population
count,141.0,141.0,141.0,141.0,141.0,141.0,141.0
mean,0.406166,0.567611,0.459987,0.15791,0.486498,5.198273,50046460.0
std,0.257064,0.276244,0.254856,0.218144,0.182858,13.661319,172505900.0
min,0.0,0.0,0.0,0.0,0.024156,0.0,381900.0
25%,0.205479,0.317881,0.24026,0.016941,0.346651,0.0,4268873.0
50%,0.333333,0.589404,0.452381,0.061458,0.520028,0.089047,11228820.0
75%,0.575342,0.803532,0.649351,0.1862,0.63015,6.700337,35588990.0
max,1.0,1.0,1.0,1.0,0.815157,140.54018,1417173000.0


In [142]:
# min max scale SSI
merged_df_clean['SSI'] = min_max_scale(merged_df_clean['SSI'])

In [143]:
merged_df_clean.describe()

Unnamed: 0,LPI,SCOG,ECOG,GNI,ITRI,SSI,Population
count,141.0,141.0,141.0,141.0,141.0,141.0,141.0
mean,0.406166,0.567611,0.459987,0.15791,0.486498,0.036988,50046460.0
std,0.257064,0.276244,0.254856,0.218144,0.182858,0.097206,172505900.0
min,0.0,0.0,0.0,0.0,0.024156,0.0,381900.0
25%,0.205479,0.317881,0.24026,0.016941,0.346651,0.0,4268873.0
50%,0.333333,0.589404,0.452381,0.061458,0.520028,0.000634,11228820.0
75%,0.575342,0.803532,0.649351,0.1862,0.63015,0.047676,35588990.0
max,1.0,1.0,1.0,1.0,0.815157,1.0,1417173000.0


#### Calculate RAI

In [152]:
# Assuming constants
a = 1
b = 0.9

# Calculate ω for each country based on its SSI value
# merged_df_clean['ω'] = a - b * merged_df_clean['SSI']
merged_df_clean['ω'] = a * np.exp(-b * (1 - merged_df_clean['SSI']))


# Calculate RAI for each country
merged_df_clean['RAI'] = merged_df_clean['ω'] * merged_df_clean['ITRI'] + (1 - merged_df_clean['ω']) * merged_df_clean['SSI']

In [153]:
merged_df_clean.head()

Unnamed: 0,Country,LPI,SCOG,ECOG,GNI,ITRI,SSI,Population,ω,RAI
0,Germany,1.0,0.951435,0.777056,0.501234,0.593935,0.004633,84079811.0,0.408269,0.245226
1,Netherlands,0.945205,0.93819,0.917749,0.540051,0.634055,0.05068,17703090.0,0.425544,0.298931
2,Sweden,0.945205,0.975717,0.833333,0.628662,0.657965,0.0,10486941.0,0.40657,0.267509
3,Belgium,0.936073,0.92053,0.909091,0.497595,0.620762,9e-06,11669446.0,0.406573,0.252391
4,Singapore,0.936073,0.944812,1.0,0.591422,0.661443,0.0,5637022.0,0.40657,0.268923


In [154]:
merged_df_clean.describe()

Unnamed: 0,LPI,SCOG,ECOG,GNI,ITRI,SSI,Population,ω,RAI
count,141.0,141.0,141.0,141.0,141.0,141.0,141.0,141.0,141.0
mean,0.406166,0.567611,0.459987,0.15791,0.486498,0.036988,50046460.0,0.422352,0.223341
std,0.257064,0.276244,0.254856,0.218144,0.182858,0.097206,172505900.0,0.053321,0.09979
min,0.0,0.0,0.0,0.0,0.024156,0.0,381900.0,0.40657,0.009898
25%,0.205479,0.317881,0.24026,0.016941,0.346651,0.0,4268873.0,0.40657,0.167456
50%,0.333333,0.589404,0.452381,0.061458,0.520028,0.000634,11228820.0,0.406802,0.233156
75%,0.575342,0.803532,0.649351,0.1862,0.63015,0.047676,35588990.0,0.424394,0.268501
max,1.0,1.0,1.0,1.0,0.815157,1.0,1417173000.0,1.0,0.815157


In [157]:
# visualise RAI
scale_factor = 10
m = folium.Map(location=[45, -100], zoom_start=4)

geolocator = Nominatim(user_agent="geoapi")
for i, row in merged_df_clean.iterrows():
    location = geolocator.geocode(row['Country'])
    
    # Skip countries with ITRI value of 0
    if row['RAI'] == 0:
        continue

    if location:
        # Using logarithmic scaling for the radius to account for the skewed distribution
        radius = np.log(row['RAI'] + 1) * scale_factor
        
        folium.CircleMarker(
            location=[location.latitude, location.longitude],
            radius=radius,
            popup=f"{row['Country']} - RAI: {row['RAI']}",
            fill=True,
            color="green",
            fill_color="green"
        ).add_to(m)

m.save("RAI.html")

In [155]:
merged_df_clean[merged_df_clean['Country'] == 'Mauritius']

Unnamed: 0,Country,LPI,SCOG,ECOG,GNI,ITRI,SSI,Population,ω,RAI
83,Mauritius,0.296804,0.783664,0.831169,0.108941,0.7698,0.0,1262523.0,0.40657,0.312977


In [162]:
# Calculate the population of countries with RAI < 0.16
population_rai_less_than_cutoff = merged_df_clean[merged_df_clean['RAI'] < 0.1]['Population'].sum()

# Calculate the total population of all countries
total_population = merged_df_clean['Population'].sum()

# Calculate the percentage
percentage = (population_rai_less_than_cutoff / total_population) * 100

print(f"Percentage of population with RAI less than 0.23: {percentage:.2f}%")

Percentage of population with RAI less than 0.23: 6.16%


In [165]:
merged_df_clean['RAI'].mean() - 2 * merged_df_clean['RAI'].std()

0.023761267793763363

In [164]:
merged_df_clean['RAI'].std()

0.09978990459117706