In [1]:
# Import the required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import PolynomialFeatures

In [3]:
# Load the dataset
df_inequality = pd.read_csv("/dataset/economic/gini-coefficient.csv")

       Country  Year  \
0  Afghanistan  1980   
1  Afghanistan  1981   
2  Afghanistan  1982   
3  Afghanistan  1983   
4  Afghanistan  1984   

   Gini coefficient (before tax) (World Inequality Database)  
0                                                NaN          
1                                                NaN          
2                                                NaN          
3                                                NaN          
4                                                NaN          


In [None]:
# Display the first few rows
df_inequality.head()

In [4]:
# Check for null values
print(df_inequality.isnull().sum())

# Get data types
print(df_inequality.dtypes)

# Get summary statistics
print(df_inequality.describe(include='all'))

Country                                                         0
Year                                                            0
Gini coefficient (before tax) (World Inequality Database)    5176
dtype: int64
Country                                                       object
Year                                                           int64
Gini coefficient (before tax) (World Inequality Database)    float64
dtype: object
          Country         Year  \
count        9710  9710.000000   
unique        255          NaN   
top     Australia          NaN   
freq          112          NaN   
mean          NaN  1996.152008   
std           NaN    21.569577   
min           NaN  1820.000000   
25%           NaN  1988.000000   
50%           NaN  1999.000000   
75%           NaN  2010.000000   
max           NaN  2022.000000   

        Gini coefficient (before tax) (World Inequality Database)  
count                                         4534.000000          
unique                 

In [5]:
# View the columns
df_inequality.columns

Index(['Country', 'Year',
       'Gini coefficient (before tax) (World Inequality Database)'],
      dtype='object')

In [6]:
# Rename the columns
df_inequality.rename(columns={"Gini coefficient (before tax) (World Inequality Database)": "Gini coefficient"}, inplace=True)

In [7]:
# Filter the data to include only years after 1989
df_inequality = df_inequality[df_inequality["Year"] > 1989]

In [8]:
# List of countries you want to filter
selected_countries = [
    'Australia', 'Austria', 'Belgium', 'Canada', 'Chile', 'Colombia', 'Costa Rica',
    'Czechia', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece',
    'Hungary', 'Iceland', 'Ireland', 'Israel', 'Italy', 'Japan', 'Korea','South Korea', 'Latvia',
    'Lithuania', 'Luxembourg', 'Mexico', 'Netherlands', 'New Zealand', 'Norway',
    'Poland', 'Portugal', 'Slovak Republic', 'Slovenia', 'Spain', 'Sweden',
    'Switzerland', 'Türkiye','Turkey',  'United Kingdom', 'United States'
]

# Filter the data to include only the selected countries
df_inequality = df_inequality[df_inequality["Country"].isin(selected_countries)]

# Display the filtered DataFrame
print(df_inequality.head())

       Country  Year  Gini coefficient
567  Australia  1990          0.437959
568  Australia  1991          0.449667
569  Australia  1992          0.444478
570  Australia  1993          0.451112
571  Australia  1994          0.455326


In [9]:
# Convert the table to a pivot table
df_inequality_cleaned = df_inequality.pivot(index="Year", columns="Country", values="Gini coefficient")

In [10]:
# Display the cleaned table
df_inequality_cleaned

Country,Australia,Austria,Belgium,Canada,Chile,Colombia,Costa Rica,Czechia,Denmark,Estonia,...,Poland,Portugal,Slovenia,South Korea,Spain,Sweden,Switzerland,Turkey,United Kingdom,United States
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1990,0.437959,,,0.44981,,,,,0.336517,,...,0.329614,0.445555,,0.363435,0.484381,0.317023,0.416457,0.660477,0.459973,0.512556
1991,0.449667,,0.441805,0.457081,,,,,0.334237,,...,0.363603,0.459205,,0.382953,0.474803,0.324095,0.399239,0.654467,0.465889,0.511415
1992,0.444478,,0.440195,0.459325,,,,0.343977,0.33585,,...,0.362818,0.45844,0.326793,0.35483,0.466786,0.319587,0.391184,0.648492,0.466766,0.523563
1993,0.451112,,0.442371,0.469644,,,,,0.340576,,...,0.374043,0.472515,0.369814,0.351959,0.460383,0.331909,0.387626,0.642554,0.47266,0.521087
1994,0.455326,,0.445794,0.4714,,,,,0.345311,,...,0.393465,0.4782,0.358412,0.349091,0.459284,0.355925,0.391458,0.636652,0.464089,0.520088
1995,0.458079,,0.444433,0.479104,,,,,0.345367,,...,0.414056,0.479476,0.385724,0.346229,0.471375,0.365837,0.391996,0.636725,0.466509,0.528353
1996,0.456176,,0.448057,0.484024,,,,0.393362,0.344389,,...,0.430416,0.474483,0.427567,0.362887,0.458605,0.354496,0.395014,0.636798,0.474747,0.535595
1997,0.454558,,0.447875,0.490234,,,,,0.354899,,...,0.417828,0.478613,0.376474,0.354933,0.450594,0.369392,0.405864,0.636868,0.471158,0.541848
1998,0.462038,,0.450676,0.49182,,,,,0.355059,,...,0.426491,0.477998,0.397527,0.362232,0.453741,0.37415,0.413281,0.636938,0.480323,0.543334
1999,0.46504,,0.453396,0.492713,,,,,0.367816,0.535515,...,0.421461,0.481964,0.370542,0.372762,0.456222,0.37752,0.399128,0.637006,0.488732,0.546673


In [11]:
# Fill missing values with NaN
df_inequality_cleaned.loc[2023] = [np.nan] * len(df_inequality_cleaned.columns)
styled_table_inequality  = df_inequality_cleaned.style.format(precision=3).background_gradient(cmap="coolwarm", axis=None)
styled_table_inequality

Country,Australia,Austria,Belgium,Canada,Chile,Colombia,Costa Rica,Czechia,Denmark,Estonia,Finland,France,Germany,Greece,Hungary,Iceland,Ireland,Israel,Italy,Japan,Latvia,Lithuania,Luxembourg,Mexico,Netherlands,New Zealand,Norway,Poland,Portugal,Slovenia,South Korea,Spain,Sweden,Switzerland,Turkey,United Kingdom,United States
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
1990,0.438,,,0.45,,,,,0.337,,0.35,0.453,0.454,0.454,0.307,0.414,0.422,,0.444,0.518,,,,,0.395,0.505,0.325,0.33,0.446,,0.363,0.484,0.317,0.416,0.66,0.46,0.513
1991,0.45,,0.442,0.457,,,,,0.334,,0.328,0.449,0.446,0.455,0.323,0.414,0.436,,0.443,0.507,,,0.439,,0.39,0.495,0.324,0.364,0.459,,0.383,0.475,0.324,0.399,0.654,0.466,0.511
1992,0.444,,0.44,0.459,,,,0.344,0.336,,0.338,0.444,0.442,0.455,0.336,0.414,0.444,0.596,0.446,0.474,,,,,0.389,0.507,0.328,0.363,0.458,0.327,0.355,0.467,0.32,0.391,0.648,0.467,0.524
1993,0.451,,0.442,0.47,,,,,0.341,,0.339,0.448,0.441,0.478,0.34,0.414,0.436,,0.448,0.479,,,,,0.394,0.513,0.349,0.374,0.473,0.37,0.352,0.46,0.332,0.388,0.643,0.473,0.521
1994,0.455,,0.446,0.471,,,,,0.345,,0.389,0.45,0.438,0.496,0.342,0.415,0.448,,0.453,0.482,0.454,0.493,0.483,,0.403,0.512,0.366,0.393,0.478,0.358,0.349,0.459,0.356,0.391,0.637,0.464,0.52
1995,0.458,,0.444,0.479,,,,,0.345,,0.381,0.451,0.429,0.49,0.323,0.413,0.45,,0.462,0.492,0.461,0.479,,,0.412,0.511,0.365,0.414,0.479,0.386,0.346,0.471,0.366,0.392,0.637,0.467,0.528
1996,0.456,,0.448,0.484,,,,0.393,0.344,,0.386,0.45,0.439,0.493,0.322,0.415,0.426,,0.466,0.502,0.483,0.468,,,0.418,0.499,0.383,0.43,0.474,0.428,0.363,0.459,0.354,0.395,0.637,0.475,0.536
1997,0.455,,0.448,0.49,,,,,0.355,,0.416,0.452,0.451,0.519,0.336,0.417,0.442,0.605,0.469,0.501,0.488,0.454,0.502,,0.418,0.509,0.397,0.418,0.479,0.376,0.355,0.451,0.369,0.406,0.637,0.471,0.542
1998,0.462,,0.451,0.492,,,,,0.355,,0.412,0.454,0.462,0.529,0.345,0.42,0.46,,0.473,0.503,0.495,0.45,,,0.41,0.525,0.373,0.426,0.478,0.398,0.362,0.454,0.374,0.413,0.637,0.48,0.543
1999,0.465,,0.453,0.493,,,,,0.368,0.536,0.413,0.451,0.466,0.532,0.358,0.423,0.458,,0.475,0.509,0.497,0.442,,,0.416,0.487,0.384,0.421,0.482,0.371,0.373,0.456,0.378,0.399,0.637,0.489,0.547


In [12]:
# **Step 1: Predict Missing Values for Each Country**
mse_per_country = {}

def fill_missing_with_regression_and_mse(series):
    """
    Apply Linear Regression to fill missing values and calculate MSE at the end of a time series.
    """
    df_temp = series.reset_index()
    df_temp.columns = ['Year', 'Gini']
    
    known_data = df_temp.dropna(subset=['Gini'])
    
    # Ensure we have enough data to split
    if len(known_data) < 3:  # Not enough data for a train-test split
        return df_temp['Gini'].values, None

    # Keep the last few known data points as a test set (10% or at least 1 value)
    test_size = max(1, len(known_data) // 10)
    train_data = known_data.iloc[:-test_size]
    test_data = known_data.iloc[-test_size:]

    X_train = train_data[['Year']]
    y_train = train_data['Gini']

    X_test = test_data[['Year']]
    y_test = test_data['Gini']

    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predict missing values at the beginning
    missing_years = df_temp[df_temp['Gini'].isna()]['Year'].values.reshape(-1, 1)
    if len(missing_years) > 0:
        df_temp.loc[df_temp['Gini'].isna(), 'Gini'] = model.predict(missing_years)

    # Predict for the test set
    y_pred = model.predict(X_test)

    # Calculate MSE
    mse = mean_squared_error(y_test, y_pred)

    return df_temp['Gini'].values, mse

# Apply the function to each country and store MSE values
def apply_with_mse(series):
    filled_values, mse = fill_missing_with_regression_and_mse(series)
    mse_per_country[series.name] = mse  # Store MSE for each country
    return filled_values

df_inequality_cleaned = df_inequality_cleaned.apply(apply_with_mse, axis=0)

# Convert MSE per country into a DataFrame for better display
mse_df = pd.DataFrame.from_dict(mse_per_country, orient='index', columns=['MSE'])
mse_df.sort_values(by='MSE', ascending=False, inplace=True)

# Print the MSE table
print(mse_df)

# Display the cleaned table
styled_table_inequality = df_inequality_cleaned.style.format(precision=3).background_gradient(cmap="coolwarm", axis=None)

styled_table_inequality

                     MSE
Norway          0.007456
Lithuania       0.005066
South Korea     0.004019
Poland          0.003418
New Zealand     0.002506
Hungary         0.002126
Finland         0.001759
Colombia        0.001593
Germany         0.001382
Luxembourg      0.001326
Turkey          0.001229
Denmark         0.001020
Costa Rica      0.000948
United Kingdom  0.000893
United States   0.000752
Portugal        0.000734
Estonia         0.000651
Canada          0.000606
Ireland         0.000566
Chile           0.000532
Czechia         0.000495
Israel          0.000414
Japan           0.000410
Mexico          0.000389
Australia       0.000368
Switzerland     0.000351
Belgium         0.000303
France          0.000277
Austria         0.000262
Sweden          0.000260
Greece          0.000189
Slovenia        0.000085
Iceland         0.000084
Spain           0.000038
Netherlands     0.000037
Italy           0.000020
Latvia          0.000006


Country,Australia,Austria,Belgium,Canada,Chile,Colombia,Costa Rica,Czechia,Denmark,Estonia,Finland,France,Germany,Greece,Hungary,Iceland,Ireland,Israel,Italy,Japan,Latvia,Lithuania,Luxembourg,Mexico,Netherlands,New Zealand,Norway,Poland,Portugal,Slovenia,South Korea,Spain,Sweden,Switzerland,Turkey,United Kingdom,United States
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
1990,0.438,0.487,0.451,0.45,0.714,0.751,0.623,0.392,0.337,0.586,0.35,0.453,0.454,0.454,0.307,0.414,0.422,0.61,0.444,0.518,0.498,0.449,0.495,0.68,0.395,0.505,0.325,0.33,0.446,0.37,0.363,0.484,0.317,0.416,0.66,0.46,0.513
1991,0.45,0.485,0.442,0.457,0.715,0.749,0.625,0.392,0.334,0.582,0.328,0.449,0.446,0.455,0.323,0.414,0.436,0.61,0.443,0.507,0.497,0.451,0.439,0.683,0.39,0.495,0.324,0.364,0.459,0.372,0.383,0.475,0.324,0.399,0.654,0.466,0.511
1992,0.444,0.483,0.44,0.459,0.716,0.746,0.627,0.344,0.336,0.579,0.338,0.444,0.442,0.455,0.336,0.414,0.444,0.596,0.446,0.474,0.496,0.453,0.494,0.686,0.389,0.507,0.328,0.363,0.458,0.327,0.355,0.467,0.32,0.391,0.648,0.467,0.524
1993,0.451,0.481,0.442,0.47,0.717,0.743,0.629,0.391,0.341,0.575,0.339,0.448,0.441,0.478,0.34,0.414,0.436,0.611,0.448,0.479,0.496,0.455,0.494,0.688,0.394,0.513,0.349,0.374,0.473,0.37,0.352,0.46,0.332,0.388,0.643,0.473,0.521
1994,0.455,0.479,0.446,0.471,0.718,0.741,0.631,0.391,0.345,0.571,0.389,0.45,0.438,0.496,0.342,0.415,0.448,0.611,0.453,0.482,0.454,0.493,0.483,0.691,0.403,0.512,0.366,0.393,0.478,0.358,0.349,0.459,0.356,0.391,0.637,0.464,0.52
1995,0.458,0.477,0.444,0.479,0.718,0.738,0.633,0.391,0.345,0.567,0.381,0.451,0.429,0.49,0.323,0.413,0.45,0.611,0.462,0.492,0.461,0.479,0.493,0.694,0.412,0.511,0.365,0.414,0.479,0.386,0.346,0.471,0.366,0.392,0.637,0.467,0.528
1996,0.456,0.475,0.448,0.484,0.719,0.735,0.635,0.393,0.344,0.564,0.386,0.45,0.439,0.493,0.322,0.415,0.426,0.611,0.466,0.502,0.483,0.468,0.492,0.697,0.418,0.499,0.383,0.43,0.474,0.428,0.363,0.459,0.354,0.395,0.637,0.475,0.536
1997,0.455,0.473,0.448,0.49,0.72,0.733,0.638,0.391,0.355,0.56,0.416,0.452,0.451,0.519,0.336,0.417,0.442,0.605,0.469,0.501,0.488,0.454,0.502,0.7,0.418,0.509,0.397,0.418,0.479,0.376,0.355,0.451,0.369,0.406,0.637,0.471,0.542
1998,0.462,0.471,0.451,0.492,0.721,0.73,0.64,0.39,0.355,0.556,0.412,0.454,0.462,0.529,0.345,0.42,0.46,0.611,0.473,0.503,0.495,0.45,0.491,0.703,0.41,0.525,0.373,0.426,0.478,0.398,0.362,0.454,0.374,0.413,0.637,0.48,0.543
1999,0.465,0.469,0.453,0.493,0.722,0.728,0.642,0.39,0.368,0.536,0.413,0.451,0.466,0.532,0.358,0.423,0.458,0.611,0.475,0.509,0.497,0.442,0.49,0.706,0.416,0.487,0.384,0.421,0.482,0.371,0.373,0.456,0.378,0.399,0.637,0.489,0.547


In [13]:
# Filter the data to include only the selected countries
df_inequality_cleaned.isna().sum()

Country
Australia         0
Austria           0
Belgium           0
Canada            0
Chile             0
Colombia          0
Costa Rica        0
Czechia           0
Denmark           0
Estonia           0
Finland           0
France            0
Germany           0
Greece            0
Hungary           0
Iceland           0
Ireland           0
Israel            0
Italy             0
Japan             0
Latvia            0
Lithuania         0
Luxembourg        0
Mexico            0
Netherlands       0
New Zealand       0
Norway            0
Poland            0
Portugal          0
Slovenia          0
South Korea       0
Spain             0
Sweden            0
Switzerland       0
Turkey            0
United Kingdom    0
United States     0
dtype: int64

In [14]:
# Load the dataset
df_GDP = pd.read_csv("/dataset/economic/gdp-per-capita-growth.csv")

In [15]:
# Display the first few rows
print(df_GDP.dtypes)
print(df_GDP.describe(include='all'))
print(df_GDP.columns)
df_GDP= df_GDP[df_GDP["Year"] > 1989]
df_GDP = df_GDP[df_GDP['Entity'].isin(selected_countries)]

Entity                               object
Code                                 object
Year                                  int64
GDP per capita growth (annual %)    float64
dtype: object
          Entity   Code          Year  GDP per capita growth (annual %)
count      10905  10141  10905.000000                      10905.000000
unique       228    214           NaN                               NaN
top     Zimbabwe    LSO           NaN                               NaN
freq          62     62           NaN                               NaN
mean         NaN    NaN   1995.621641                          1.966012
std          NaN    NaN     17.033024                          6.015372
min          NaN    NaN   1961.000000                        -64.425840
25%          NaN    NaN   1982.000000                         -0.250181
50%          NaN    NaN   1997.000000                          2.137955
75%          NaN    NaN   2010.000000                          4.452477
max          NaN  

In [16]:
# Rename the columns
df_GDP.rename(columns={"GDP per capita growth (annual %)": "GDP"}, inplace=True)
df_GDP.rename(columns={"Entity": "Country"}, inplace=True)
df_GDP

Unnamed: 0,Country,Code,Year,GDP
441,Australia,AUS,1990,2.057392
442,Australia,AUS,1991,-1.643571
443,Australia,AUS,1992,-0.690437
444,Australia,AUS,1993,3.123738
445,Australia,AUS,1994,2.983092
...,...,...,...,...
10373,United States,USA,2018,2.404868
10374,United States,USA,2019,1.829668
10375,United States,USA,2020,-3.700953
10376,United States,USA,2021,5.779549


In [17]:
# Convert the table to a pivot table
GDP_table = df_GDP.pivot(index="Year", columns="Country", values="GDP")
GDP_table.loc[2023] = [np.nan] * len(GDP_table.columns)
styled_table_GDP = GDP_table.style.format(precision=3).background_gradient(cmap="coolwarm", axis=None)
styled_table_GDP

Country,Australia,Austria,Belgium,Canada,Chile,Colombia,Costa Rica,Czechia,Denmark,Estonia,Finland,France,Germany,Greece,Hungary,Iceland,Ireland,Israel,Italy,Japan,Latvia,Lithuania,Luxembourg,Mexico,Netherlands,New Zealand,Norway,Poland,Portugal,Slovenia,South Korea,Spain,Sweden,Switzerland,Turkey,United Kingdom,United States
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
1990,2.057,3.554,2.83,-1.296,1.674,2.112,0.955,,1.311,,0.225,2.43,4.352,-1.052,,,8.375,,1.9,4.494,,,4.01,3.335,3.468,-0.767,1.582,,4.177,,8.8,3.676,-0.021,2.615,7.217,0.433,0.741
1991,-1.644,2.414,1.455,-3.3,6.063,-0.056,-0.297,-11.403,1.131,,-6.399,0.163,4.345,1.87,,,1.345,,1.468,3.117,,,7.198,1.941,1.635,-5.769,2.594,-7.345,4.609,,9.687,2.285,-1.818,-2.146,-1.095,-1.408,-1.434
1992,-0.69,0.976,1.12,-0.297,9.389,2.002,6.488,-0.608,1.62,,-3.837,1.095,1.151,-0.066,-3.026,,2.641,,0.766,0.527,,,0.476,1.568,0.94,0.046,2.978,2.201,1.168,,5.101,0.436,-1.737,-1.14,3.206,0.13,2.097
1993,3.124,-0.299,-1.348,1.536,4.943,3.328,4.473,-0.042,-0.322,,-1.141,-1.059,-1.626,-2.177,-0.463,,2.181,,-0.913,-0.781,,,2.811,0.902,0.554,5.185,2.235,3.475,-2.163,,5.797,-1.543,-2.633,-1.031,5.784,2.244,1.406
1994,2.983,2.009,2.909,3.356,3.461,3.774,2.009,2.872,4.977,,3.516,1.976,2.037,1.491,3.088,,5.34,,2.13,0.802,,,2.417,2.425,2.342,3.731,4.459,5.071,0.694,,8.175,1.897,3.194,0.466,-6.326,3.582,2.761
1995,2.733,2.511,2.171,1.625,7.376,3.229,1.714,6.566,2.492,,3.82,1.736,1.246,1.624,1.631,,9.074,,2.885,2.391,,,0.017,-7.641,2.608,3.2,3.616,6.958,3.922,,8.517,2.305,3.391,-0.188,6.068,2.261,1.469
1996,2.611,2.211,1.124,0.632,5.351,0.21,-0.961,4.387,2.319,6.503,3.328,1.052,0.514,2.41,0.254,4.004,6.533,3.193,1.238,2.9,3.75,5.961,0.027,4.343,3.022,1.989,4.497,6.035,3.116,3.268,6.868,2.234,1.418,0.029,5.62,2.321,2.572
1997,2.795,1.978,3.543,3.249,5.986,1.602,3.13,-0.411,2.833,14.347,6.018,1.972,1.643,3.97,3.351,4.909,9.914,1.229,1.776,0.741,9.927,9.113,4.107,5.362,3.793,0.748,4.715,6.379,3.936,5.191,5.18,3.269,3.012,2.015,5.851,4.655,3.197
1998,3.625,3.468,1.744,3.034,2.879,-1.16,4.856,-0.262,1.848,5.347,5.178,3.205,1.998,3.321,4.142,6.219,7.638,1.762,1.781,-1.538,7.343,8.257,5.356,4.41,4.021,-0.032,2.059,4.603,4.28,3.505,-5.812,3.962,4.254,2.724,0.817,3.113,3.271
1999,3.885,3.355,3.306,4.29,-1.454,-5.794,2.078,1.488,2.608,-0.718,4.138,2.889,1.821,2.678,3.363,2.784,9.291,0.732,1.609,-0.516,3.596,-0.438,6.724,1.083,4.337,4.823,1.375,4.664,3.321,5.257,10.677,4.068,4.166,1.179,-4.708,2.723,3.598


In [18]:
GDP_table_filled = GDP_table.copy()

mse_dict = {}  # Store MSE values for each country

# Convert index to integers (if it's not already)
GDP_table_filled.index = GDP_table_filled.index.astype(int)

# **Step 1: Predict Missing Values for Each Country**
for country in GDP_table_filled.columns:
    series = GDP_table_filled[country]  # Get the country's GDP data
    known_data = series.dropna()  # Keep only known (non-NaN) values


    # Extract training data
    X_train = known_data.index.values.reshape(-1, 1)  # Years
    y_train = known_data.values  # GDP values

    # Train **Polynomial Regression (Degree 2) + Gradient Boosting**
    model = make_pipeline(
        PolynomialFeatures(1),  # Degree 3 for capturing more complex trends
        GradientBoostingRegressor(n_estimators=500, learning_rate=0.01, max_depth=4, random_state=42)
    )
    model.fit(X_train, y_train)

    # **Find Missing Years & Predict Them**
    missing_years = series[series.isna()].index.to_numpy().reshape(-1, 1)

    if len(missing_years) > 0:
        GDP_table_filled.loc[missing_years.flatten(), country] = model.predict(missing_years)

    # **Compute MSE on the last 20% of known data**
    test_size = max(1, int(len(known_data) * 0.20))  # Ensure at least 1 test sample
    test_data = known_data.iloc[-test_size:]

    X_test = test_data.index.values.reshape(-1, 1)
    y_test = test_data.values

    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)

    mse_dict[country] = mse  # Store MSE for this country

# **Step 2: Compute & Display MSE for Each Country**
mse_df = pd.DataFrame.from_dict(mse_dict, orient='index', columns=['MSE'])
mse_df.sort_values(by='MSE', ascending=False, inplace=True)

print("\n **Mean Squared Error (MSE) for Each Country**")
print(mse_df)

styled_table_GDP = GDP_table_filled.style.format(precision=3).background_gradient(cmap="coolwarm", axis=None)
styled_table_GDP


🔍 **Mean Squared Error (MSE) for Each Country**
                     MSE
Czechia         0.479130
Turkey          0.348180
Luxembourg      0.175648
Estonia         0.170309
Hungary         0.133767
Latvia          0.122748
Chile           0.111882
Portugal        0.107300
Poland          0.102123
Costa Rica      0.096035
Lithuania       0.094532
Finland         0.093192
Colombia        0.088113
Ireland         0.077506
Canada          0.073653
Belgium         0.073277
Japan           0.069704
France          0.068414
Switzerland     0.065641
Mexico          0.059819
Germany         0.058701
Italy           0.058638
Sweden          0.054102
Slovenia        0.051902
Austria         0.047247
South Korea     0.042898
United Kingdom  0.039765
Netherlands     0.038692
Israel          0.034543
Greece          0.032804
New Zealand     0.032546
Iceland         0.029099
United States   0.023117
Denmark         0.020754
Norway          0.016171
Spain           0.015846
Australia       0.010887


Country,Australia,Austria,Belgium,Canada,Chile,Colombia,Costa Rica,Czechia,Denmark,Estonia,Finland,France,Germany,Greece,Hungary,Iceland,Ireland,Israel,Italy,Japan,Latvia,Lithuania,Luxembourg,Mexico,Netherlands,New Zealand,Norway,Poland,Portugal,Slovenia,South Korea,Spain,Sweden,Switzerland,Turkey,United Kingdom,United States
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
1990,2.057,3.554,2.83,-1.296,1.674,2.112,0.955,-10.992,1.311,6.502,0.225,2.43,4.352,-1.052,-2.745,4.084,8.375,3.06,1.9,4.494,3.789,6.02,4.01,3.335,3.468,-0.767,1.582,-7.011,4.177,3.305,8.8,3.676,-0.021,2.615,7.217,0.433,0.741
1991,-1.644,2.414,1.455,-3.3,6.063,-0.056,-0.297,-11.403,1.131,6.502,-6.399,0.163,4.345,1.87,-2.745,4.084,1.345,3.06,1.468,3.117,3.789,6.02,7.198,1.941,1.635,-5.769,2.594,-7.345,4.609,3.305,9.687,2.285,-1.818,-2.146,-1.095,-1.408,-1.434
1992,-0.69,0.976,1.12,-0.297,9.389,2.002,6.488,-0.608,1.62,6.502,-3.837,1.095,1.151,-0.066,-3.026,4.084,2.641,3.06,0.766,0.527,3.789,6.02,0.476,1.568,0.94,0.046,2.978,2.201,1.168,3.305,5.101,0.436,-1.737,-1.14,3.206,0.13,2.097
1993,3.124,-0.299,-1.348,1.536,4.943,3.328,4.473,-0.042,-0.322,6.502,-1.141,-1.059,-1.626,-2.177,-0.463,4.084,2.181,3.06,-0.913,-0.781,3.789,6.02,2.811,0.902,0.554,5.185,2.235,3.475,-2.163,3.305,5.797,-1.543,-2.633,-1.031,5.784,2.244,1.406
1994,2.983,2.009,2.909,3.356,3.461,3.774,2.009,2.872,4.977,6.502,3.516,1.976,2.037,1.491,3.088,4.084,5.34,3.06,2.13,0.802,3.789,6.02,2.417,2.425,2.342,3.731,4.459,5.071,0.694,3.305,8.175,1.897,3.194,0.466,-6.326,3.582,2.761
1995,2.733,2.511,2.171,1.625,7.376,3.229,1.714,6.566,2.492,6.502,3.82,1.736,1.246,1.624,1.631,4.084,9.074,3.06,2.885,2.391,3.789,6.02,0.017,-7.641,2.608,3.2,3.616,6.958,3.922,3.305,8.517,2.305,3.391,-0.188,6.068,2.261,1.469
1996,2.611,2.211,1.124,0.632,5.351,0.21,-0.961,4.387,2.319,6.503,3.328,1.052,0.514,2.41,0.254,4.004,6.533,3.193,1.238,2.9,3.75,5.961,0.027,4.343,3.022,1.989,4.497,6.035,3.116,3.268,6.868,2.234,1.418,0.029,5.62,2.321,2.572
1997,2.795,1.978,3.543,3.249,5.986,1.602,3.13,-0.411,2.833,14.347,6.018,1.972,1.643,3.97,3.351,4.909,9.914,1.229,1.776,0.741,9.927,9.113,4.107,5.362,3.793,0.748,4.715,6.379,3.936,5.191,5.18,3.269,3.012,2.015,5.851,4.655,3.197
1998,3.625,3.468,1.744,3.034,2.879,-1.16,4.856,-0.262,1.848,5.347,5.178,3.205,1.998,3.321,4.142,6.219,7.638,1.762,1.781,-1.538,7.343,8.257,5.356,4.41,4.021,-0.032,2.059,4.603,4.28,3.505,-5.812,3.962,4.254,2.724,0.817,3.113,3.271
1999,3.885,3.355,3.306,4.29,-1.454,-5.794,2.078,1.488,2.608,-0.718,4.138,2.889,1.821,2.678,3.363,2.784,9.291,0.732,1.609,-0.516,3.596,-0.438,6.724,1.083,4.337,4.823,1.375,4.664,3.321,5.257,10.677,4.068,4.166,1.179,-4.708,2.723,3.598


In [19]:
# Load the dataset
df_death = pd.read_csv("/dataset/economic/deaths-in-armed-conflicts-based-on-where-they-occurred.csv")

In [20]:
# Display the first few rows
print(df_death.dtypes)
print(df_death.describe(include='all'))
print(df_death.columns)
print(df_death.head())
print(df_death.isnull().sum())

Entity                                                                           object
Year                                                                              int64
Deaths in ongoing conflicts in a country (best estimate) - Conflict type: all     int64
dtype: object
          Entity         Year  \
count       7175  7175.000000   
unique       205          NaN   
top     Abkhazia          NaN   
freq          35          NaN   
mean         NaN  2006.000000   
std          NaN    10.100209   
min          NaN  1989.000000   
25%          NaN  1997.000000   
50%          NaN  2006.000000   
75%          NaN  2015.000000   
max          NaN  2023.000000   

        Deaths in ongoing conflicts in a country (best estimate) - Conflict type: all  
count                                         7175.000000                              
unique                                                NaN                              
top                                                   NaN    

In [21]:
# Rename the columns
df_death.rename(columns={"Entity": "Country"}, inplace=True)
df_death.rename(columns={"Deaths in ongoing conflicts in a country (best estimate) - Conflict type: all": "Deaths"}, inplace=True)

In [22]:
# 
df_death = df_death[df_death["Year"] > 1989]
df_death = df_death[df_death['Country'].isin(selected_countries)]
table_death = df_death.pivot(index="Year", columns="Country", values="Deaths")
styled_table_death = table_death.style.format(precision=3).background_gradient(cmap="coolwarm", axis=None)
styled_table_death

Country,Australia,Austria,Belgium,Canada,Chile,Colombia,Costa Rica,Czechia,Denmark,Estonia,Finland,France,Germany,Greece,Hungary,Iceland,Ireland,Israel,Italy,Japan,Latvia,Lithuania,Luxembourg,Mexico,Netherlands,New Zealand,Norway,Poland,Portugal,Slovenia,South Korea,Spain,Sweden,Switzerland,Turkey,United Kingdom,United States
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
1990,0,0,0,0,0,692,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,0,10,2,0,0,0,0,0,0,0,0,0,612,41,0
1991,0,0,0,0,0,834,0,0,0,0,0,0,0,0,0,0,0,7,0,0,3,15,0,1,0,0,0,0,0,64,0,45,0,0,426,26,0
1992,0,0,0,0,0,1479,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1889,0,0
1993,0,0,0,0,0,267,0,0,0,0,0,0,0,0,0,0,0,18,0,0,0,0,0,27,0,0,0,0,0,0,0,0,0,0,2685,32,0
1994,0,0,0,0,0,1155,0,0,0,0,0,0,0,0,0,0,0,51,0,0,0,0,0,145,0,0,0,0,0,0,0,0,0,0,4156,0,0
1995,0,0,0,24,0,922,0,0,0,0,0,0,0,0,0,0,0,26,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3700,0,0
1996,0,0,0,0,0,1379,0,0,0,0,0,0,0,0,0,0,0,51,0,0,0,0,0,37,0,0,0,0,0,0,0,0,0,0,3696,0,0
1997,0,0,0,0,0,801,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,45,0,0,0,0,0,0,0,0,0,0,2624,0,0
1998,0,0,0,27,0,1595,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2044,29,0
1999,0,0,0,0,0,2056,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1173,0,0


In [23]:
# Load the dataset
df_conflict = pd.read_csv("/dataset/economic/locations-of-ongoing-armed-conflicts.csv")

In [24]:
# Display the first few rows
print(df_conflict.dtypes)
print(df_conflict.describe(include='all'))
print(df_conflict.columns)
print(df_conflict.head())
print(df_conflict.isnull().sum())

Entity                                                    object
Code                                                      object
Year                                                       int64
Country where conflict took place - Conflict type: all     int64
dtype: object
          Entity      Code         Year  \
count       7175      7175  7175.000000   
unique       205       205          NaN   
top     Abkhazia  OWID_ABK          NaN   
freq          35        35          NaN   
mean         NaN       NaN  2006.000000   
std          NaN       NaN    10.100209   
min          NaN       NaN  1989.000000   
25%          NaN       NaN  1997.000000   
50%          NaN       NaN  2006.000000   
75%          NaN       NaN  2015.000000   
max          NaN       NaN  2023.000000   

        Country where conflict took place - Conflict type: all  
count                                         7175.000000       
unique                                                NaN       
top           

In [25]:
# Rename the columns
df_conflict.rename(columns={"Entity": "Country"}, inplace=True)
df_conflict.rename(columns={"Country where conflict took place - Conflict type: all": "Conflict"}, inplace=True)

In [26]:
df_conflict = df_conflict[df_conflict["Year"] > 1989]
df_conflict = df_conflict[df_conflict['Country'].isin(selected_countries)]
table_conflict = df_conflict.pivot(index="Year", columns="Country", values="Conflict")
styled_table_conflict = table_conflict.style.format(precision=3).background_gradient(cmap="coolwarm", axis=None)
styled_table_conflict

Country,Australia,Austria,Belgium,Canada,Chile,Colombia,Costa Rica,Czechia,Denmark,Estonia,Finland,France,Germany,Greece,Hungary,Iceland,Ireland,Israel,Italy,Japan,Latvia,Lithuania,Luxembourg,Mexico,Netherlands,New Zealand,Norway,Poland,Portugal,Slovenia,South Korea,Spain,Sweden,Switzerland,Turkey,United Kingdom,United States
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
1990,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0
1991,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0
1992,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
1993,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0
1994,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0
1995,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0
1996,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0
1997,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0
1998,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0
1999,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0


In [27]:
# Load the dataset
df_liberal = pd.read_csv("/dataset/economic/liberal-democracy-index.csv")

In [28]:
# Display the first few rows
print(df_liberal.dtypes)
print(df_liberal.describe(include='all'))
print(df_liberal.columns)
print(df_liberal.head())
print(df_liberal.isnull().sum())

Entity                                                          object
Code                                                            object
Year                                                             int64
Liberal democracy index (best estimate, aggregate: average)    float64
dtype: object
             Entity   Code          Year  \
count         31459  29627  31459.000000   
unique          210    195           NaN   
top     Afghanistan    AFG           NaN   
freq            235    235           NaN   
mean            NaN    NaN   1927.673670   
std             NaN    NaN     63.605308   
min             NaN    NaN   1789.000000   
25%             NaN    NaN   1880.000000   
50%             NaN    NaN   1937.000000   
75%             NaN    NaN   1981.000000   
max             NaN    NaN   2023.000000   

        Liberal democracy index (best estimate, aggregate: average)  
count                                        31459.000000            
unique                           

In [29]:
# Rename the columns
df_liberal.rename(columns={"Entity": "Country"}, inplace=True)
df_liberal.rename(columns={"Liberal democracy index (best estimate, aggregate: average)": "Liberal democracy index"}, inplace=True)

In [30]:
df_liberal = df_liberal[df_liberal["Year"] > 1989]
df_liberal = df_liberal[df_liberal['Country'].isin(selected_countries)]
table_liberal = df_liberal.pivot(index="Year", columns="Country", values="Liberal democracy index")
styled_table_liberal = table_liberal.style.format(precision=3).background_gradient(cmap="coolwarm", axis=None)
styled_table_liberal

Country,Australia,Austria,Belgium,Canada,Chile,Colombia,Costa Rica,Czechia,Denmark,Estonia,Finland,France,Germany,Greece,Hungary,Iceland,Ireland,Israel,Italy,Japan,Latvia,Lithuania,Luxembourg,Mexico,Netherlands,New Zealand,Norway,Poland,Portugal,Slovenia,South Korea,Spain,Sweden,Switzerland,Turkey,United Kingdom,United States
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
1990,0.852,0.783,0.803,0.759,0.635,0.323,0.838,0.717,0.879,0.694,0.817,0.804,,0.795,0.716,0.776,0.762,0.695,0.735,0.759,0.448,0.728,0.793,0.191,0.817,0.812,0.845,0.665,0.822,0.513,0.503,0.815,0.882,0.83,0.355,0.745,0.806
1991,0.852,0.784,0.803,0.759,0.751,0.396,0.843,0.804,0.88,0.526,0.825,0.804,0.854,0.795,0.756,0.775,0.784,0.697,0.739,0.76,0.495,0.715,0.793,0.202,0.817,0.809,0.845,0.782,0.811,0.716,0.503,0.815,0.882,0.832,0.368,0.745,0.805
1992,0.852,0.784,0.81,0.755,0.751,0.43,0.843,0.812,0.88,0.384,0.825,0.805,0.854,0.797,0.756,0.775,0.79,0.692,0.75,0.76,0.567,0.734,0.793,0.216,0.817,0.809,0.845,0.805,0.811,0.767,0.519,0.815,0.881,0.828,0.376,0.744,0.811
1993,0.852,0.785,0.81,0.755,0.75,0.43,0.843,0.826,0.88,0.807,0.831,0.798,0.854,0.798,0.756,0.775,0.789,0.694,0.755,0.761,0.632,0.743,0.793,0.221,0.817,0.819,0.846,0.809,0.811,0.767,0.614,0.819,0.88,0.833,0.375,0.748,0.807
1994,0.852,0.784,0.81,0.761,0.789,0.431,0.843,0.826,0.88,0.794,0.831,0.803,0.855,0.798,0.758,0.774,0.789,0.699,0.75,0.764,0.68,0.76,0.792,0.261,0.814,0.82,0.848,0.811,0.811,0.772,0.621,0.818,0.88,0.833,0.377,0.751,0.808
1995,0.852,0.779,0.816,0.761,0.789,0.433,0.843,0.826,0.881,0.803,0.832,0.799,0.859,0.8,0.76,0.78,0.789,0.699,0.751,0.764,0.678,0.76,0.791,0.278,0.812,0.82,0.848,0.81,0.813,0.767,0.621,0.818,0.881,0.833,0.379,0.751,0.812
1996,0.85,0.786,0.818,0.769,0.789,0.431,0.844,0.826,0.881,0.807,0.832,0.806,0.859,0.802,0.76,0.784,0.794,0.698,0.755,0.763,0.682,0.758,0.791,0.291,0.812,0.823,0.848,0.803,0.82,0.773,0.627,0.821,0.881,0.837,0.386,0.751,0.811
1997,0.845,0.786,0.818,0.766,0.789,0.432,0.842,0.826,0.881,0.808,0.832,0.804,0.859,0.802,0.759,0.78,0.807,0.698,0.763,0.758,0.698,0.764,0.791,0.349,0.814,0.831,0.848,0.793,0.82,0.763,0.642,0.821,0.881,0.837,0.396,0.747,0.809
1998,0.845,0.785,0.817,0.764,0.789,0.425,0.846,0.816,0.879,0.808,0.832,0.802,0.859,0.802,0.758,0.78,0.811,0.698,0.763,0.758,0.697,0.757,0.791,0.377,0.804,0.831,0.848,0.799,0.82,0.768,0.742,0.821,0.881,0.837,0.396,0.75,0.809
1999,0.845,0.785,0.82,0.764,0.786,0.42,0.846,0.808,0.879,0.807,0.835,0.802,0.859,0.802,0.756,0.782,0.811,0.698,0.763,0.758,0.701,0.757,0.792,0.383,0.82,0.831,0.848,0.8,0.821,0.761,0.752,0.821,0.88,0.837,0.42,0.772,0.812


In [31]:
table_liberal.loc[table_liberal.index[0], "Germany"] = table_liberal.loc[table_liberal.index[1], "Germany"]
styled_table_liberal = table_liberal.style.format(precision=3).background_gradient(cmap="coolwarm", axis=None)
styled_table_liberal

Country,Australia,Austria,Belgium,Canada,Chile,Colombia,Costa Rica,Czechia,Denmark,Estonia,Finland,France,Germany,Greece,Hungary,Iceland,Ireland,Israel,Italy,Japan,Latvia,Lithuania,Luxembourg,Mexico,Netherlands,New Zealand,Norway,Poland,Portugal,Slovenia,South Korea,Spain,Sweden,Switzerland,Turkey,United Kingdom,United States
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
1990,0.852,0.783,0.803,0.759,0.635,0.323,0.838,0.717,0.879,0.694,0.817,0.804,0.854,0.795,0.716,0.776,0.762,0.695,0.735,0.759,0.448,0.728,0.793,0.191,0.817,0.812,0.845,0.665,0.822,0.513,0.503,0.815,0.882,0.83,0.355,0.745,0.806
1991,0.852,0.784,0.803,0.759,0.751,0.396,0.843,0.804,0.88,0.526,0.825,0.804,0.854,0.795,0.756,0.775,0.784,0.697,0.739,0.76,0.495,0.715,0.793,0.202,0.817,0.809,0.845,0.782,0.811,0.716,0.503,0.815,0.882,0.832,0.368,0.745,0.805
1992,0.852,0.784,0.81,0.755,0.751,0.43,0.843,0.812,0.88,0.384,0.825,0.805,0.854,0.797,0.756,0.775,0.79,0.692,0.75,0.76,0.567,0.734,0.793,0.216,0.817,0.809,0.845,0.805,0.811,0.767,0.519,0.815,0.881,0.828,0.376,0.744,0.811
1993,0.852,0.785,0.81,0.755,0.75,0.43,0.843,0.826,0.88,0.807,0.831,0.798,0.854,0.798,0.756,0.775,0.789,0.694,0.755,0.761,0.632,0.743,0.793,0.221,0.817,0.819,0.846,0.809,0.811,0.767,0.614,0.819,0.88,0.833,0.375,0.748,0.807
1994,0.852,0.784,0.81,0.761,0.789,0.431,0.843,0.826,0.88,0.794,0.831,0.803,0.855,0.798,0.758,0.774,0.789,0.699,0.75,0.764,0.68,0.76,0.792,0.261,0.814,0.82,0.848,0.811,0.811,0.772,0.621,0.818,0.88,0.833,0.377,0.751,0.808
1995,0.852,0.779,0.816,0.761,0.789,0.433,0.843,0.826,0.881,0.803,0.832,0.799,0.859,0.8,0.76,0.78,0.789,0.699,0.751,0.764,0.678,0.76,0.791,0.278,0.812,0.82,0.848,0.81,0.813,0.767,0.621,0.818,0.881,0.833,0.379,0.751,0.812
1996,0.85,0.786,0.818,0.769,0.789,0.431,0.844,0.826,0.881,0.807,0.832,0.806,0.859,0.802,0.76,0.784,0.794,0.698,0.755,0.763,0.682,0.758,0.791,0.291,0.812,0.823,0.848,0.803,0.82,0.773,0.627,0.821,0.881,0.837,0.386,0.751,0.811
1997,0.845,0.786,0.818,0.766,0.789,0.432,0.842,0.826,0.881,0.808,0.832,0.804,0.859,0.802,0.759,0.78,0.807,0.698,0.763,0.758,0.698,0.764,0.791,0.349,0.814,0.831,0.848,0.793,0.82,0.763,0.642,0.821,0.881,0.837,0.396,0.747,0.809
1998,0.845,0.785,0.817,0.764,0.789,0.425,0.846,0.816,0.879,0.808,0.832,0.802,0.859,0.802,0.758,0.78,0.811,0.698,0.763,0.758,0.697,0.757,0.791,0.377,0.804,0.831,0.848,0.799,0.82,0.768,0.742,0.821,0.881,0.837,0.396,0.75,0.809
1999,0.845,0.785,0.82,0.764,0.786,0.42,0.846,0.808,0.879,0.807,0.835,0.802,0.859,0.802,0.756,0.782,0.811,0.698,0.763,0.758,0.701,0.757,0.792,0.383,0.82,0.831,0.848,0.8,0.821,0.761,0.752,0.821,0.88,0.837,0.42,0.772,0.812


In [32]:
df_free_and_fair = pd.read_csv("/dataset/economic/free-and-fair-elections-index.csv")
print(df_free_and_fair.dtypes)
print(df_free_and_fair.describe(include='all'))
print(df_free_and_fair.columns)
print(df_free_and_fair.head())
print(df_free_and_fair.isnull().sum())

Entity                                    object
Code                                      object
Year                                       int64
Clean elections index (best estimate)    float64
dtype: object
             Entity   Code          Year  \
count         32208  31577  32208.000000   
unique          205    195           NaN   
top     Afghanistan    AFG           NaN   
freq            235    235           NaN   
mean            NaN    NaN   1922.832743   
std             NaN    NaN     65.616994   
min             NaN    NaN   1789.000000   
25%             NaN    NaN   1869.000000   
50%             NaN    NaN   1932.000000   
75%             NaN    NaN   1978.000000   
max             NaN    NaN   2023.000000   

        Clean elections index (best estimate)  
count                            32208.000000  
unique                                    NaN  
top                                       NaN  
freq                                      NaN  
mean                 

In [33]:
df_free_and_fair.rename(columns={"Entity": "Country"}, inplace=True)
df_free_and_fair.rename(columns={"Clean elections index (best estimate)": "Free and fair elections index"}, inplace=True)

In [34]:
df_free_and_fair = df_free_and_fair[df_free_and_fair["Year"] > 1989]
df_free_and_fair = df_free_and_fair[df_free_and_fair['Country'].isin(selected_countries)]
table_free_and_fair = df_free_and_fair.pivot(index="Year", columns="Country", values="Free and fair elections index")
styled_table_free_and_fair = table_free_and_fair.style.format(precision=3).background_gradient(cmap="coolwarm", axis=None)
styled_table_free_and_fair

Country,Australia,Austria,Belgium,Canada,Chile,Colombia,Costa Rica,Czechia,Denmark,Estonia,Finland,France,Germany,Greece,Hungary,Iceland,Ireland,Israel,Italy,Japan,Latvia,Lithuania,Luxembourg,Mexico,Netherlands,New Zealand,Norway,Poland,Portugal,Slovenia,South Korea,Spain,Sweden,Switzerland,Turkey,United Kingdom,United States
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
1990,0.956,0.926,0.932,0.914,0.884,0.563,0.93,0.796,0.946,0.709,0.949,0.942,,0.936,0.841,0.94,0.929,0.913,0.896,0.92,0.542,0.788,0.945,0.358,0.953,0.925,0.94,0.764,0.937,0.668,0.744,0.955,0.959,0.924,0.835,0.892,0.884
1991,0.956,0.925,0.933,0.914,0.884,0.601,0.933,0.934,0.948,0.566,0.95,0.942,0.98,0.936,0.919,0.938,0.928,0.913,0.896,0.921,0.686,0.925,0.945,0.413,0.953,0.926,0.94,0.941,0.937,0.919,0.744,0.955,0.959,0.923,0.839,0.892,0.891
1992,0.956,0.925,0.942,0.914,0.884,0.587,0.933,0.933,0.948,0.267,0.95,0.942,0.98,0.936,0.919,0.938,0.928,0.911,0.896,0.921,0.686,0.912,0.945,0.507,0.953,0.926,0.94,0.953,0.937,0.926,0.755,0.955,0.96,0.92,0.855,0.889,0.891
1993,0.958,0.925,0.942,0.917,0.888,0.587,0.933,0.95,0.948,0.948,0.95,0.941,0.98,0.937,0.919,0.938,0.926,0.909,0.907,0.921,0.785,0.933,0.945,0.517,0.953,0.938,0.94,0.954,0.937,0.924,0.807,0.955,0.96,0.92,0.855,0.888,0.891
1994,0.958,0.925,0.942,0.931,0.952,0.579,0.934,0.95,0.948,0.948,0.949,0.941,0.98,0.941,0.921,0.938,0.926,0.909,0.898,0.921,0.865,0.936,0.944,0.531,0.953,0.937,0.939,0.954,0.937,0.924,0.807,0.955,0.96,0.92,0.855,0.888,0.892
1995,0.958,0.923,0.943,0.931,0.952,0.586,0.934,0.95,0.946,0.953,0.951,0.944,0.982,0.941,0.923,0.94,0.926,0.909,0.894,0.921,0.861,0.936,0.943,0.555,0.953,0.937,0.939,0.952,0.937,0.924,0.807,0.955,0.962,0.921,0.855,0.888,0.893
1996,0.958,0.929,0.944,0.931,0.952,0.586,0.934,0.951,0.946,0.953,0.952,0.945,0.982,0.94,0.923,0.938,0.926,0.909,0.904,0.919,0.848,0.936,0.943,0.625,0.953,0.947,0.939,0.941,0.937,0.932,0.829,0.956,0.962,0.927,0.862,0.888,0.893
1997,0.957,0.929,0.944,0.925,0.952,0.586,0.934,0.952,0.946,0.953,0.952,0.943,0.982,0.938,0.923,0.936,0.933,0.91,0.909,0.915,0.879,0.934,0.943,0.731,0.953,0.946,0.939,0.942,0.937,0.922,0.84,0.956,0.962,0.927,0.862,0.881,0.891
1998,0.958,0.928,0.944,0.921,0.953,0.574,0.936,0.938,0.947,0.953,0.952,0.942,0.982,0.938,0.923,0.936,0.938,0.91,0.909,0.915,0.881,0.92,0.943,0.798,0.953,0.946,0.939,0.953,0.937,0.933,0.918,0.956,0.962,0.927,0.862,0.888,0.891
1999,0.961,0.927,0.944,0.921,0.953,0.571,0.936,0.926,0.948,0.954,0.951,0.942,0.981,0.938,0.923,0.937,0.938,0.912,0.909,0.915,0.886,0.92,0.945,0.798,0.953,0.946,0.939,0.953,0.938,0.93,0.918,0.956,0.962,0.926,0.88,0.895,0.891


In [35]:
table_free_and_fair.loc[table_free_and_fair.index[0], "Germany"] = table_free_and_fair.loc[table_free_and_fair.index[1], "Germany"]
styled_table_free_and_fair = table_free_and_fair.style.format(precision=3).background_gradient(cmap="coolwarm", axis=None)
styled_table_free_and_fair

Country,Australia,Austria,Belgium,Canada,Chile,Colombia,Costa Rica,Czechia,Denmark,Estonia,Finland,France,Germany,Greece,Hungary,Iceland,Ireland,Israel,Italy,Japan,Latvia,Lithuania,Luxembourg,Mexico,Netherlands,New Zealand,Norway,Poland,Portugal,Slovenia,South Korea,Spain,Sweden,Switzerland,Turkey,United Kingdom,United States
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
1990,0.956,0.926,0.932,0.914,0.884,0.563,0.93,0.796,0.946,0.709,0.949,0.942,0.98,0.936,0.841,0.94,0.929,0.913,0.896,0.92,0.542,0.788,0.945,0.358,0.953,0.925,0.94,0.764,0.937,0.668,0.744,0.955,0.959,0.924,0.835,0.892,0.884
1991,0.956,0.925,0.933,0.914,0.884,0.601,0.933,0.934,0.948,0.566,0.95,0.942,0.98,0.936,0.919,0.938,0.928,0.913,0.896,0.921,0.686,0.925,0.945,0.413,0.953,0.926,0.94,0.941,0.937,0.919,0.744,0.955,0.959,0.923,0.839,0.892,0.891
1992,0.956,0.925,0.942,0.914,0.884,0.587,0.933,0.933,0.948,0.267,0.95,0.942,0.98,0.936,0.919,0.938,0.928,0.911,0.896,0.921,0.686,0.912,0.945,0.507,0.953,0.926,0.94,0.953,0.937,0.926,0.755,0.955,0.96,0.92,0.855,0.889,0.891
1993,0.958,0.925,0.942,0.917,0.888,0.587,0.933,0.95,0.948,0.948,0.95,0.941,0.98,0.937,0.919,0.938,0.926,0.909,0.907,0.921,0.785,0.933,0.945,0.517,0.953,0.938,0.94,0.954,0.937,0.924,0.807,0.955,0.96,0.92,0.855,0.888,0.891
1994,0.958,0.925,0.942,0.931,0.952,0.579,0.934,0.95,0.948,0.948,0.949,0.941,0.98,0.941,0.921,0.938,0.926,0.909,0.898,0.921,0.865,0.936,0.944,0.531,0.953,0.937,0.939,0.954,0.937,0.924,0.807,0.955,0.96,0.92,0.855,0.888,0.892
1995,0.958,0.923,0.943,0.931,0.952,0.586,0.934,0.95,0.946,0.953,0.951,0.944,0.982,0.941,0.923,0.94,0.926,0.909,0.894,0.921,0.861,0.936,0.943,0.555,0.953,0.937,0.939,0.952,0.937,0.924,0.807,0.955,0.962,0.921,0.855,0.888,0.893
1996,0.958,0.929,0.944,0.931,0.952,0.586,0.934,0.951,0.946,0.953,0.952,0.945,0.982,0.94,0.923,0.938,0.926,0.909,0.904,0.919,0.848,0.936,0.943,0.625,0.953,0.947,0.939,0.941,0.937,0.932,0.829,0.956,0.962,0.927,0.862,0.888,0.893
1997,0.957,0.929,0.944,0.925,0.952,0.586,0.934,0.952,0.946,0.953,0.952,0.943,0.982,0.938,0.923,0.936,0.933,0.91,0.909,0.915,0.879,0.934,0.943,0.731,0.953,0.946,0.939,0.942,0.937,0.922,0.84,0.956,0.962,0.927,0.862,0.881,0.891
1998,0.958,0.928,0.944,0.921,0.953,0.574,0.936,0.938,0.947,0.953,0.952,0.942,0.982,0.938,0.923,0.936,0.938,0.91,0.909,0.915,0.881,0.92,0.943,0.798,0.953,0.946,0.939,0.953,0.937,0.933,0.918,0.956,0.962,0.927,0.862,0.888,0.891
1999,0.961,0.927,0.944,0.921,0.953,0.571,0.936,0.926,0.948,0.954,0.951,0.942,0.981,0.938,0.923,0.937,0.938,0.912,0.909,0.915,0.886,0.92,0.945,0.798,0.953,0.946,0.939,0.953,0.938,0.93,0.918,0.956,0.962,0.926,0.88,0.895,0.891


In [36]:
df_democracy = pd.read_csv("/dataset/economic/democracy-index-eiu.csv")
print(df_democracy.dtypes)
print(df_democracy.describe(include='all'))
print(df_democracy.columns)
print(df_democracy.head())
print(df_democracy.isnull().sum())

Entity              object
Code                object
Year                 int64
Democracy score    float64
dtype: object
             Entity  Code         Year  Democracy score
count          2784  2688  2784.000000      2784.000000
unique          174   168          NaN              NaN
top     Afghanistan   AFG          NaN              NaN
freq             16    16          NaN              NaN
mean            NaN   NaN  2015.312500         5.473941
std             NaN   NaN     4.921742         2.207830
min             NaN   NaN  2006.000000         0.260000
25%             NaN   NaN  2011.750000         3.520000
50%             NaN   NaN  2015.500000         5.770000
75%             NaN   NaN  2019.250000         7.240000
max             NaN   NaN  2023.000000         9.930000
Index(['Entity', 'Code', 'Year', 'Democracy score'], dtype='object')
        Entity Code  Year  Democracy score
0  Afghanistan  AFG  2006             3.06
1  Afghanistan  AFG  2008             3.02
2  Afgha

In [37]:
df_democracy.rename(columns={"Entity": "Country"}, inplace=True)
df_democracy.rename(columns={"Democracy score": "Democracy index"}, inplace=True)

In [38]:
df_democracy = df_democracy[df_democracy["Year"] > 1989]
df_democracy = df_democracy[df_democracy['Country'].isin(selected_countries)]
table_democracy = df_democracy.pivot(index="Year", columns="Country", values="Democracy index")
styled_table_democracy = table_democracy.style.format(precision=3).background_gradient(cmap="coolwarm", axis=None)
styled_table_democracy

Country,Australia,Austria,Belgium,Canada,Chile,Colombia,Costa Rica,Czechia,Denmark,Estonia,Finland,France,Germany,Greece,Hungary,Iceland,Ireland,Israel,Italy,Japan,Latvia,Lithuania,Luxembourg,Mexico,Netherlands,New Zealand,Norway,Poland,Portugal,Slovenia,South Korea,Spain,Sweden,Switzerland,Turkey,United Kingdom,United States
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
2006,9.09,8.69,8.15,9.07,7.89,6.4,8.04,8.17,9.52,7.74,9.25,8.07,8.82,8.13,7.53,9.71,9.01,7.28,7.73,8.15,7.37,7.43,9.1,6.67,9.66,9.01,9.55,7.3,8.16,7.96,7.88,8.34,9.88,9.02,5.7,8.08,8.22
2008,9.09,8.49,8.16,9.07,7.89,6.54,8.04,8.19,9.52,7.68,9.25,8.07,8.82,8.13,7.44,9.65,9.01,7.48,7.98,8.25,7.23,7.36,9.1,6.78,9.53,9.19,9.68,7.3,8.05,7.96,8.01,8.45,9.88,9.15,5.69,8.15,8.22
2010,9.22,8.49,8.05,9.08,7.67,6.55,8.04,8.19,9.52,7.68,9.19,7.77,8.38,7.92,7.21,9.65,8.79,7.48,7.83,8.08,7.05,7.24,8.88,6.93,8.99,9.26,9.8,7.05,8.02,7.69,8.11,8.16,9.5,9.09,5.73,8.16,8.18
2011,9.22,8.49,8.05,9.08,7.54,6.63,8.1,8.19,9.52,7.61,9.06,7.77,8.34,7.65,7.04,9.65,8.56,7.53,7.74,8.08,7.05,7.24,8.88,6.93,8.99,9.26,9.8,7.12,7.81,7.76,8.06,8.02,9.5,9.09,5.73,8.16,8.11
2012,9.22,8.62,8.05,9.08,7.54,6.63,8.1,8.19,9.52,7.61,9.06,7.88,8.34,7.65,6.96,9.65,8.56,7.53,7.74,8.08,7.05,7.24,8.88,6.9,8.99,9.26,9.93,7.12,7.92,7.88,8.13,8.02,9.73,9.09,5.76,8.21,8.11
2013,9.13,8.48,8.05,9.08,7.8,6.55,8.03,8.06,9.38,7.61,9.03,7.92,8.31,7.65,6.96,9.65,8.68,7.53,7.85,8.08,7.05,7.54,8.88,6.91,8.84,9.26,9.93,7.12,7.65,7.88,8.06,8.02,9.73,9.09,5.63,8.31,8.11
2014,9.01,8.54,7.93,9.08,7.8,6.55,8.03,7.94,9.11,7.74,9.03,8.04,8.64,7.45,6.9,9.58,8.72,7.63,7.85,8.08,7.48,7.54,8.88,6.68,8.92,9.26,9.93,7.47,7.79,7.57,8.06,8.05,9.73,9.09,5.12,8.31,8.11
2015,9.01,8.54,7.93,9.08,7.84,6.62,7.96,7.94,9.11,7.85,9.03,7.92,8.64,7.45,6.84,9.58,8.85,7.77,7.98,7.96,7.37,7.54,8.88,6.55,8.92,9.26,9.93,7.09,7.79,7.57,7.97,8.3,9.45,9.09,5.12,8.31,8.05
2016,9.01,8.41,7.77,9.15,7.78,6.67,7.88,7.82,9.2,7.85,9.03,7.92,8.63,7.23,6.72,9.5,9.15,7.85,7.98,7.99,7.31,7.47,8.81,6.47,8.8,9.26,9.93,6.83,7.86,7.51,7.92,8.3,9.39,9.09,5.04,8.36,7.98
2017,9.09,8.42,7.78,9.15,7.84,6.67,7.88,7.62,9.22,7.79,9.03,7.8,8.61,7.29,6.64,9.58,9.15,7.79,7.98,7.88,7.25,7.41,8.81,6.41,8.89,9.26,9.87,6.67,7.84,7.5,8.0,8.08,9.39,9.03,4.88,8.53,7.98


In [39]:
df_multiple = pd.read_csv("/dataset/economic/multi-party-elections-row-20250127-125436.csv")

In [40]:
print(df_multiple.dtypes)
print(df_multiple.describe(include='all'))
print(df_multiple.columns)
print(df_multiple.head())
print(df_multiple.isnull().sum())
print(df_multiple.head(100))

Entity                                                object
Code                                                  object
Year                                                   int64
Multiparty elections (best estimate) (re-estimate)     int64
dtype: object
        Entity   Code          Year  \
count    17403  17171  17403.000000   
unique     202    194           NaN   
top     Sweden    SWE           NaN   
freq       235    235           NaN   
mean       NaN    NaN   1953.953399   
std        NaN    NaN     53.210683   
min        NaN    NaN   1789.000000   
25%        NaN    NaN   1922.000000   
50%        NaN    NaN   1966.000000   
75%        NaN    NaN   1997.000000   
max        NaN    NaN   2023.000000   

        Multiparty elections (best estimate) (re-estimate)  
count                                        17403.000000   
unique                                                NaN   
top                                                   NaN   
freq                         

In [41]:
df_multiple.rename(columns={"Entity": "Country"}, inplace=True)
df_multiple.rename(columns={"Multiparty elections (best estimate) (re-estimate)": "Multiparty elections"}, inplace=True)

In [42]:
df_multiple = df_multiple[df_multiple["Year"] > 1989]
df_multiple = df_multiple[df_multiple['Country'].isin(selected_countries)]
table_multiple = df_multiple.pivot(index="Year", columns="Country", values="Multiparty elections")
styled_table_multiple = table_multiple.style.format(precision=3).background_gradient(cmap="coolwarm", axis=None)
styled_table_multiple

Country,Australia,Austria,Belgium,Canada,Chile,Colombia,Costa Rica,Czechia,Denmark,Estonia,Finland,France,Germany,Greece,Hungary,Iceland,Ireland,Israel,Italy,Japan,Latvia,Lithuania,Luxembourg,Mexico,Netherlands,New Zealand,Norway,Poland,Portugal,Slovenia,South Korea,Spain,Sweden,Switzerland,Turkey,United Kingdom,United States
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
1990,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1991,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1992,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1993,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1994,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1995,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1996,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1997,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1998,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1999,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [43]:
df_inequality_cleaned = df_inequality_cleaned.reset_index()
table_conflict = table_conflict.reset_index()
table_death = table_death.reset_index()
table_democracy = table_democracy.reset_index()
table_free_and_fair = table_free_and_fair.reset_index()
table_liberal = table_liberal.reset_index()
table_multiple = table_multiple.reset_index()
GDP_table_filled = GDP_table_filled.reset_index()

In [44]:
df_inequality_cleaned= df_inequality_cleaned.melt(id_vars=["Year"], var_name="Country", value_name="Gini coefficient")
GDP_table_filled = GDP_table_filled.melt(id_vars=["Year"], var_name="Country", value_name="GDP")
table_conflict = table_conflict.melt(id_vars=["Year"], var_name="Country", value_name="Conflict")
table_death = table_death.melt(id_vars=["Year"], var_name="Country", value_name="Deaths")
table_democracy = table_democracy.melt(id_vars=["Year"], var_name="Country", value_name="Democracy index")
table_free_and_fair = table_free_and_fair.melt(id_vars=["Year"], var_name="Country", value_name="Free and fair elections index")
table_liberal = table_liberal.melt(id_vars=["Year"], var_name="Country", value_name="Liberal democracy index")
table_multiple = table_multiple.melt(id_vars=["Year"], var_name="Country", value_name="Multiparty elections")

In [45]:
test = df_inequality_cleaned.merge(GDP_table_filled, on=["Year", "Country"], how="outer")
test = test.merge(table_conflict, on=["Year", "Country"], how = "outer")
test = test.merge(table_death, on=["Year", "Country"], how = "outer")
test = test.merge(table_free_and_fair, on=["Year", "Country"], how = "outer")
test = test.merge(table_liberal, on=["Year", "Country"], how = "outer")
test.isna().sum()

Year                             0
Country                          0
Gini coefficient                 0
GDP                              0
Conflict                         0
Deaths                           0
Free and fair elections index    0
Liberal democracy index          0
dtype: int64

In [46]:
test

Unnamed: 0,Year,Country,Gini coefficient,GDP,Conflict,Deaths,Free and fair elections index,Liberal democracy index
0,1990,Australia,0.437959,2.057392,0,0,0.956,0.852
1,1991,Australia,0.449667,-1.643571,0,0,0.956,0.852
2,1992,Australia,0.444478,-0.690437,0,0,0.956,0.852
3,1993,Australia,0.451112,3.123738,0,0,0.958,0.852
4,1994,Australia,0.455326,2.983092,0,0,0.958,0.852
...,...,...,...,...,...,...,...,...
1253,2019,United States,0.580532,1.829668,1,8,0.856,0.737
1254,2020,United States,0.619529,-3.700953,0,0,0.852,0.729
1255,2021,United States,0.625614,5.779549,1,2,0.837,0.756
1256,2022,United States,0.626775,1.551487,1,1,0.845,0.765


In [47]:
test['Country'].unique()

array(['Australia', 'Austria', 'Belgium', 'Canada', 'Chile', 'Colombia',
       'Costa Rica', 'Czechia', 'Denmark', 'Estonia', 'Finland', 'France',
       'Germany', 'Greece', 'Hungary', 'Iceland', 'Ireland', 'Israel',
       'Italy', 'Japan', 'Latvia', 'Lithuania', 'Luxembourg', 'Mexico',
       'Netherlands', 'New Zealand', 'Norway', 'Poland', 'Portugal',
       'Slovenia', 'South Korea', 'Spain', 'Sweden', 'Switzerland',
       'Turkey', 'United Kingdom', 'United States'], dtype=object)

In [50]:
test.to_csv("econ_merged_data.csv", index=False)