In [27]:
import pandas as pd

# Define the data
data = {
    'Country': ['Afghanistan', 'Haiti', 'Nigeria', 'Egypt', 'Argentina', 'China', 'Brazil', 'Israel', 'USA', 'Ireland', 
                'UK', 'Germany', 'Canada', 'Australia', 'Sweden', 'New Zealand'],
    'Life Expectancy': [59.61, 45.00, 51.30, 70.48, 75.77, 74.87, 73.12, 81.30, 78.51, 80.15, 80.09, 80.24, 80.99, 82.09, 81.43, 80.67],
    'Top-10 Income': [23.21, 47.67, 38.23, 26.58, 32.30, 29.98, 42.93, 28.80, 29.85, 27.23, 28.49, 22.07, 24.79, 25.40, 22.18, 27.81],
    'Infant Mortality': [74.30, 73.10, 82.60, 19.60, 13.30, 13.70, 14.50, 3.60, 6.30, 3.50, 4.40, 3.50, 4.90, 4.20, 2.40, 4.90],
    'Military Spending': [4.44, 0.09, 1.07, 1.86, 0.76, 1.95, 1.43, 6.77, 4.72, 0.60, 2.59, 1.31, 1.42, 1.86, 1.27, 1.13],
    'School Years': [0.40, 3.40, 4.10, 5.30, 10.10, 6.40, 7.20, 12.50, 13.70, 11.50, 13.00, 12.00, 14.20, 11.50, 12.80, 12.30],
    'CPI': [1.5171, 1.7999, 2.4493, 2.8622, 2.9961, 3.6356, 3.7741, 5.8069, 7.1357, 7.5360, 7.7751, 8.0461, 8.6725, 8.8442, 9.2985, 9.4627]
}

# Create the DataFrame
df = pd.DataFrame(data) 

# Display the DataFrame
print(df)

        Country  Life Expectancy  Top-10 Income  Infant Mortality  \
0   Afghanistan            59.61          23.21              74.3   
1         Haiti            45.00          47.67              73.1   
2       Nigeria            51.30          38.23              82.6   
3         Egypt            70.48          26.58              19.6   
4     Argentina            75.77          32.30              13.3   
5         China            74.87          29.98              13.7   
6        Brazil            73.12          42.93              14.5   
7        Israel            81.30          28.80               3.6   
8           USA            78.51          29.85               6.3   
9       Ireland            80.15          27.23               3.5   
10           UK            80.09          28.49               4.4   
11      Germany            80.24          22.07               3.5   
12       Canada            80.99          24.79               4.9   
13    Australia            82.09  

In [28]:
columns_to_normalize = ['Life Expectancy', 'Top-10 Income', 'Infant Mortality', 'Military Spending', 'School Years']

df_normalized = df.copy() 
df_normalized[columns_to_normalize] = (df[columns_to_normalize] - df[columns_to_normalize].min()) / (df[columns_to_normalize].max() - df[columns_to_normalize].min())

print(df_normalized)

        Country  Life Expectancy  Top-10 Income  Infant Mortality  \
0   Afghanistan         0.393907       0.044531          0.896509   
1         Haiti         0.000000       1.000000          0.881546   
2       Nigeria         0.169857       0.631250          1.000000   
3         Egypt         0.686978       0.176172          0.214464   
4     Argentina         0.829604       0.399609          0.135910   
5         China         0.805338       0.308984          0.140898   
6        Brazil         0.758156       0.814844          0.150873   
7        Israel         0.978700       0.262891          0.014963   
8           USA         0.903478       0.303906          0.048628   
9       Ireland         0.947695       0.201563          0.013716   
10           UK         0.946077       0.250781          0.024938   
11      Germany         0.950121       0.000000          0.013716   
12       Canada         0.970342       0.106250          0.031172   
13    Australia         1.000000  

In [29]:
russia_features = {
    'Life Expectancy': [67.62],
    'Top-10 Income': [31.68],
    'Infant Mortality': [10.00],
    'Military Spending': [3.87],
    'School Years': [12.90]
}

print(russia_features)

{'Life Expectancy': [67.62], 'Top-10 Income': [31.68], 'Infant Mortality': [10.0], 'Military Spending': [3.87], 'School Years': [12.9]}


In [30]:
import numpy as np

df['Distance'] = np.sqrt(
    (df['Life Expectancy'] - russia_features['Life Expectancy'])**2 +
    (df['Top-10 Income'] - russia_features['Top-10 Income'])**2 +
    (df['Infant Mortality'] - russia_features['Infant Mortality'])**2 +
    (df['Military Spending'] - russia_features['Military Spending'])**2 +
    (df['School Years'] - russia_features['School Years'])**2
)

# Sort by distance to find the 3 nearest neighbors
df_sorted = df.sort_values(by='Distance')

print(df_sorted)

        Country  Life Expectancy  Top-10 Income  Infant Mortality  \
4     Argentina            75.77          32.30              13.3   
5         China            74.87          29.98              13.7   
8           USA            78.51          29.85               6.3   
3         Egypt            70.48          26.58              19.6   
10           UK            80.09          28.49               4.4   
6        Brazil            73.12          42.93              14.5   
15  New Zealand            80.67          27.81               4.9   
9       Ireland            80.15          27.23               3.5   
7        Israel            81.30          28.80               3.6   
12       Canada            80.99          24.79               4.9   
13    Australia            82.09          25.40               4.2   
11      Germany            80.24          22.07               3.5   
14       Sweden            81.43          22.18               2.4   
0   Afghanistan            59.61  

In [31]:
# Select the 3 nearest neighbors
knn_3 = df_sorted.head(3)

predicted_cpi = knn_3['CPI'].mean()

print(predicted_cpi)

4.589133333333334


In [32]:
import warnings
warnings.filterwarnings('ignore')

knn_16 = df_sorted.head(16)

# Calculate the weights as inverse distance squared
knn_16['Weight'] = 1 / (knn_16['Distance']**2)

# Calculate the weighted CPI (weight * CPI)
knn_16['Weighted CPI'] = knn_16['Weight'] * knn_16['CPI']

print(knn_16)


        Country  Life Expectancy  Top-10 Income  Infant Mortality  \
4     Argentina            75.77          32.30              13.3   
5         China            74.87          29.98              13.7   
8           USA            78.51          29.85               6.3   
3         Egypt            70.48          26.58              19.6   
10           UK            80.09          28.49               4.4   
6        Brazil            73.12          42.93              14.5   
15  New Zealand            80.67          27.81               4.9   
9       Ireland            80.15          27.23               3.5   
7        Israel            81.30          28.80               3.6   
12       Canada            80.99          24.79               4.9   
13    Australia            82.09          25.40               4.2   
11      Germany            80.24          22.07               3.5   
14       Sweden            81.43          22.18               2.4   
0   Afghanistan            59.61  

In [33]:
predicted_cpi = knn_16['Weighted CPI'].sum() / knn_16['Weight'].sum()

print(predicted_cpi)

5.908707536841121


In [34]:
# Create a DataFrame for Russia's features
df_russia_features = pd.DataFrame(russia_features)

columns_to_normalize = ['Life Expectancy', 'Top-10 Income', 'Infant Mortality', 'Military Spending', 'School Years']
df_russia_features_normalized = df_russia_features.copy()

# Normalize 
for column in columns_to_normalize:
    min_value = df[column].min() 
    max_value = df[column].max()  
    df_russia_features_normalized[column] = (df_russia_features[column] - min_value) / (max_value - min_value)

# Display the normalized Russia features
print("Normalized Russia's Features DataFrame:")
print(df_russia_features_normalized)

Normalized Russia's Features DataFrame:
   Life Expectancy  Top-10 Income  Infant Mortality  Military Spending  \
0         0.609868       0.375391          0.094763           0.565868   

   School Years  
0      0.905797  


In [35]:
# Calculate Euclidean distance for each country in the normalized DataFrame
df_normalized['Distance'] = np.sqrt(
    (df_normalized['Life Expectancy'] - df_russia_features_normalized['Life Expectancy'].values[0])**2 +
    (df_normalized['Top-10 Income'] - df_russia_features_normalized['Top-10 Income'].values[0])**2 +
    (df_normalized['Infant Mortality'] - df_russia_features_normalized['Infant Mortality'].values[0])**2 +
    (df_normalized['Military Spending'] - df_russia_features_normalized['Military Spending'].values[0])**2 +
    (df_normalized['School Years'] - df_russia_features_normalized['School Years'].values[0])**2
)

df_sorted_by_distance = df_normalized.sort_values(by='Distance')

print(df_sorted_by_distance[['Country', 'Distance']])

        Country  Distance
8           USA  0.336151
10           UK  0.412564
4     Argentina  0.555415
13    Australia  0.564308
15  New Zealand  0.566419
7        Israel  0.586832
5         China  0.590944
12       Canada  0.591451
9       Ireland  0.633115
11      Germany  0.643724
14       Sweden  0.660963
3         Egypt  0.673647
6        Brazil  0.722692
0   Afghanistan  1.275403
2       Nigeria  1.288744
1         Haiti  1.474860


In [36]:
# Calculate weights using inverse distance squared
df_normalized['Weight'] = 1 / (df_normalized['Distance']**2)

# Sort by distance and select the 16 nearest neighbors
df_sorted_by_distance = df_normalized.sort_values(by='Distance')
nearest_neighbors_3 = df_sorted_by_distance.head(3)

# Calculate weighted CPI for the 16 nearest neighbors
weighted_cpi_3 = (nearest_neighbors_3['CPI']).sum()

predicted_cpi_3 = weighted_cpi_3 / 3

In [37]:
print(predicted_cpi_3)

5.968966666666667


In [38]:
# Calculate weights using inverse distance squared
df_normalized['Weight'] = 1 / (df_normalized['Distance']**2)

# Sort by distance and select the 16 nearest neighbors
df_sorted_by_distance = df_normalized.sort_values(by='Distance')
nearest_neighbors = df_sorted_by_distance.head(16)

# Calculate weighted CPI for the 16 nearest neighbors
weighted_cpi = (nearest_neighbors['CPI'] * nearest_neighbors['Weight']).sum()
total_weight = nearest_neighbors['Weight'].sum()

predicted_cpi = weighted_cpi / total_weight

In [39]:
print(predicted_cpi)

6.634661198770241
