In [96]:
# Import my Variables

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [97]:
# Step 1: Load and preprocess the data
data = pd.read_csv("Resources/World_Bank.csv")
data_df = data
data_df.head()

Unnamed: 0,Country Name,Country Code,Year,ATM Usage,GDP,High Tech Export ($M),High Tech Import,Internet Subscription Per 100,Internet Usage Per 100,IP Income($M),Labour Force (M),Med_High Tech Manufacturing,Mobile Sub Per 100,Personal Remittance ($M),Poverty Count,Stock Traded $B,Tech Cooperation Grant($M),Unemployment Rate,Med High Tech Export
0,Algeria,DZA,2007,2.39,1.73,6.82,22.82,0.84,9.45,2.0,10.08,14.28,81.11,189.0,9.28,42.35,199.77,12.27,0.91
1,American Samoa,ASM,2007,35.77,2.67,1289.11,28.67,1.45,20.01,217.27,4.51,11.44,99.74,594.25,1.23,58.15,52.3,8.26,33.34
2,Antigua and Barbuda,ATG,2007,63.85,7.82,2573.02,28.83,2.76,34.0,0.58,7.58,18.96,137.02,19.33,0.95,86.5,0.44,6.52,31.33
3,Argentina,ARG,2007,2.88,7.92,1202.09,31.0,6.52,25.95,650.64,17.88,26.35,101.32,540.64,1.7,1.46,51.4,10.08,35.17
4,Armenia,ARM,2007,7.69,14.54,5.2,28.52,0.13,6.02,433.95,1.43,3.91,62.46,1169.17,1.5,29.8,104.15,10.0,35.35


In [98]:
print(data_df.columns)

Index(['Country Name', 'Country Code', 'Year', 'ATM Usage', 'GDP',
       'High Tech Export ($M)', 'High Tech Import',
       'Internet Subscription Per 100', 'Internet Usage Per 100',
       'IP Income($M)', 'Labour Force (M)', 'Med_High Tech Manufacturing',
       'Mobile Sub Per 100', 'Personal Remittance ($M)', 'Poverty Count',
       'Stock Traded $B', 'Tech Cooperation Grant($M)', 'Unemployment Rate',
       'Med High Tech Export'],
      dtype='object')


In [99]:
#Perform label encoding for the 'Country_Name' column
encoder = LabelEncoder()
data_df['Country Code'] = encoder.fit_transform(data_df['Country Name'])

In [100]:
grouped_data = data.groupby('Country Code')

In [101]:

def train_and_predict_country(country_data):
    country_name = encoder.inverse_transform([country_data['Country Code'].iloc[0]])[0]
    print(f"Predictions for {country_name}:")

    # Step 6: Split the data into features and target variable for this country
    columns_to_exclude = ['Country Name', 'Country Code', 'Year', 'Poverty Count']
    selected_columns = [col for col in country_data.columns if col not in columns_to_exclude]

    X = country_data[selected_columns].values
    y = country_data['Poverty Count'].values

    # Check if there are enough samples to split the data
    if len(X) < 2:
        print(f"Not enough data for {country_name}. Skipping training and prediction.")
        return country_name, None

    # Split the data into training and test sets for this country
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Use Random Forest Regressor instead of Linear Regression
    model = RandomForestRegressor(random_state=42)

    # Train the model for this country
    model.fit(X_train, y_train)

    # Step 12: Make predictions for the next 5 years for this country
    last_year_data = country_data.iloc[-1][selected_columns]
    future_years = np.arange(2023, 2028)
    future_data_list = []

    for year in future_years:
        future_data_dict = last_year_data.to_dict()
        future_data_dict['Year'] = year
        future_data_list.append(future_data_dict)

    future_data = pd.DataFrame(future_data_list)

    # Step 13: Make predictions using the model
    predictions = model.predict(future_data[selected_columns].values)

    # Collect the predictions for the next 5 years in a list
    predictions_list = []
    for year, prediction in zip(future_years, predictions):
        predictions_list.append((year, prediction))

    return country_name, predictions_list

# Step 13: Iterate over each country and make predictions
for country_code, country_data in grouped_data:
    country_name, predictions = train_and_predict_country(country_data)
    if predictions is not None:
        print(f"Predictions for {country_name}:")
        for year, prediction in predictions:
            print(f"Year {year}: {prediction:.2f}")
        print("\n")



Predictions for Albania:
Predictions for Albania:
Year 2023: 0.17
Year 2024: 0.17
Year 2025: 0.17
Year 2026: 0.17
Year 2027: 0.17


Predictions for Algeria:
Predictions for Algeria:
Year 2023: 1.83
Year 2024: 1.83
Year 2025: 1.83
Year 2026: 1.83
Year 2027: 1.83


Predictions for American Samoa:
Predictions for American Samoa:
Year 2023: 0.63
Year 2024: 0.63
Year 2025: 0.63
Year 2026: 0.63
Year 2027: 0.63


Predictions for Andorra:
Predictions for Andorra:
Year 2023: 0.36
Year 2024: 0.36
Year 2025: 0.36
Year 2026: 0.36
Year 2027: 0.36


Predictions for Antigua and Barbuda:
Predictions for Antigua and Barbuda:
Year 2023: 0.61
Year 2024: 0.61
Year 2025: 0.61
Year 2026: 0.61
Year 2027: 0.61


Predictions for Arab World:
Predictions for Arab World:
Year 2023: 0.48
Year 2024: 0.48
Year 2025: 0.48
Year 2026: 0.48
Year 2027: 0.48


Predictions for Argentina:
Predictions for Argentina:
Year 2023: 1.01
Year 2024: 1.01
Year 2025: 1.01
Year 2026: 1.01
Year 2027: 1.01


Predictions for Armenia:
Pre

Predictions for Ecuador:
Year 2023: 3.92
Year 2024: 3.92
Year 2025: 3.92
Year 2026: 3.92
Year 2027: 3.92


Predictions for Egypt:
Predictions for Egypt:
Year 2023: 3.23
Year 2024: 3.23
Year 2025: 3.23
Year 2026: 3.23
Year 2027: 3.23


Predictions for El Salvador:
Predictions for El Salvador:
Year 2023: 3.10
Year 2024: 3.10
Year 2025: 3.10
Year 2026: 3.10
Year 2027: 3.10


Predictions for Equatorial Guinea:
Predictions for Equatorial Guinea:
Year 2023: 11.52
Year 2024: 11.52
Year 2025: 11.52
Year 2026: 11.52
Year 2027: 11.52


Predictions for Eritrea:
Predictions for Eritrea:
Year 2023: 3.01
Year 2024: 3.01
Year 2025: 3.01
Year 2026: 3.01
Year 2027: 3.01


Predictions for Estonia:
Predictions for Estonia:
Year 2023: 2.87
Year 2024: 2.87
Year 2025: 2.87
Year 2026: 2.87
Year 2027: 2.87


Predictions for Eswatini:
Predictions for Eswatini:
Year 2023: 2.53
Year 2024: 2.53
Year 2025: 2.53
Year 2026: 2.53
Year 2027: 2.53


Predictions for Ethiopia:
Predictions for Ethiopia:
Year 2023: 5.28
Ye

Predictions for Kyrgyz Republic:
Year 2023: 3.28
Year 2024: 3.28
Year 2025: 3.28
Year 2026: 3.28
Year 2027: 3.28


Predictions for Lao PDR:
Predictions for Lao PDR:
Year 2023: 4.75
Year 2024: 4.75
Year 2025: 4.75
Year 2026: 4.75
Year 2027: 4.75


Predictions for Late-demographic dividend:
Predictions for Late-demographic dividend:
Year 2023: 5.30
Year 2024: 5.30
Year 2025: 5.30
Year 2026: 5.30
Year 2027: 5.30


Predictions for Latin America & Caribbean:
Predictions for Latin America & Caribbean:
Year 2023: 4.60
Year 2024: 4.60
Year 2025: 4.60
Year 2026: 4.60
Year 2027: 4.60


Predictions for Latin America & Caribbean (excluding high income):
Predictions for Latin America & Caribbean (excluding high income):
Year 2023: 4.99
Year 2024: 4.99
Year 2025: 4.99
Year 2026: 4.99
Year 2027: 4.99


Predictions for Latin America & the Caribbean (IDA & IBRD countries):
Predictions for Latin America & the Caribbean (IDA & IBRD countries):
Year 2023: 0.14
Year 2024: 0.14
Year 2025: 0.14
Year 2026: 0.

Predictions for Pacific island small states:
Year 2023: 1.32
Year 2024: 1.32
Year 2025: 1.32
Year 2026: 1.32
Year 2027: 1.32


Predictions for Pakistan:
Predictions for Pakistan:
Year 2023: 2.11
Year 2024: 2.11
Year 2025: 2.11
Year 2026: 2.11
Year 2027: 2.11


Predictions for Palau:
Predictions for Palau:
Year 2023: 3.85
Year 2024: 3.85
Year 2025: 3.85
Year 2026: 3.85
Year 2027: 3.85


Predictions for Panama:
Predictions for Panama:
Year 2023: 1.71
Year 2024: 1.71
Year 2025: 1.71
Year 2026: 1.71
Year 2027: 1.71


Predictions for Papua New Guinea:
Predictions for Papua New Guinea:
Year 2023: 2.48
Year 2024: 2.48
Year 2025: 2.48
Year 2026: 2.48
Year 2027: 2.48


Predictions for Paraguay:
Predictions for Paraguay:
Year 2023: 1.05
Year 2024: 1.05
Year 2025: 1.05
Year 2026: 1.05
Year 2027: 1.05


Predictions for Peru:
Predictions for Peru:
Year 2023: 4.17
Year 2024: 4.17
Year 2025: 4.17
Year 2026: 4.17
Year 2027: 4.17


Predictions for Philippines:
Predictions for Philippines:
Year 2023: 3.

Predictions for Turkmenistan:
Year 2023: 1.22
Year 2024: 1.22
Year 2025: 1.22
Year 2026: 1.22
Year 2027: 1.22


Predictions for Turks and Caicos Islands:
Predictions for Turks and Caicos Islands:
Year 2023: 3.91
Year 2024: 3.91
Year 2025: 3.91
Year 2026: 3.91
Year 2027: 3.91


Predictions for Tuvalu:
Predictions for Tuvalu:
Year 2023: 1.65
Year 2024: 1.65
Year 2025: 1.65
Year 2026: 1.65
Year 2027: 1.65


Predictions for Uganda:
Predictions for Uganda:
Year 2023: 7.12
Year 2024: 7.12
Year 2025: 7.12
Year 2026: 7.12
Year 2027: 7.12


Predictions for Ukraine:
Predictions for Ukraine:
Year 2023: 0.06
Year 2024: 0.06
Year 2025: 0.06
Year 2026: 0.06
Year 2027: 0.06


Predictions for United Arab Emirates:
Predictions for United Arab Emirates:
Year 2023: 0.60
Year 2024: 0.60
Year 2025: 0.60
Year 2026: 0.60
Year 2027: 0.60


Predictions for United Kingdom:
Predictions for United Kingdom:
Year 2023: 3.53
Year 2024: 3.53
Year 2025: 3.53
Year 2026: 3.53
Year 2027: 3.53


Predictions for United Sta

In [102]:

# Step 1: Convert each tuple into a DataFrame
data_frames = [country_data for _, country_data in grouped_data]

# Step 2: Combine data for all countries into a single DataFrame
combined_data = pd.concat(data_frames)

# Step 2: Split the data into features and target variable
columns_to_exclude = ['Country Name', 'Country Code', 'Year', 'Poverty Count']
selected_columns = [col for col in combined_data.columns if col not in columns_to_exclude]

X = combined_data[selected_columns].values
y = combined_data['Poverty Count'].values

# Step 3: Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Choose a machine learning model (Linear Regression)
model = model = KNeighborsRegressor(n_neighbors=5)

# Step 5: Train the model
model.fit(X_train, y_train)

# Step 6: Evaluate the model's performance on training and test sets
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
print(f"Training R^2 Score: {train_score:.4f}")
print(f"Test R^2 Score: {test_score:.4f}")

# Step 7: Make predictions for the next 5 years
last_year_data = combined_data.iloc[-1][selected_columns]
future_years = np.arange(2023, 2028)
future_data_list = []

for year in future_years:
    future_data_dict = last_year_data.to_dict()
    future_data_dict['Year'] = year
    future_data_list.append(future_data_dict)

future_data = pd.DataFrame(future_data_list)

# Step 8: Make predictions using the model
predictions = model.predict(future_data[selected_columns].values)

# Step 9: Print the overall predictions for the next 5 years
print("Overall Predictions:")
for year, prediction in zip(future_years, predictions):
    print(f"Year {year}: {prediction:.2f}")



Training R^2 Score: 0.3687
Test R^2 Score: -0.0201
Overall Predictions:
Year 2023: 19.38
Year 2024: 19.38
Year 2025: 19.38
Year 2026: 19.38
Year 2027: 19.38
