In [1]:
# Import my Variables

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Step 1: Load and preprocess the data
data = pd.read_csv("Resources/World_Bank.csv")
data_df = data
data_df.head()

Unnamed: 0,Country Name,Country Code,Year,ATM Usage,GDP,High Tech Export ($M),High Tech Import,Internet Subscription Per 100,Internet Usage Per 100,IP Income($M),Labour Force (M),Med_High Tech Manufacturing,Mobile Sub Per 100,Personal Remittance ($M),Poverty Count,Stock Traded $B,Tech Cooperation Grant($M),Unemployment Rate,Med High Tech Export
0,Afghanistan,AFG,2010,0.45,11.1,6.05,40.18,0.01,4.0,0.31,6.51,9.48,36.24,140.7,0.0,0.0,957.3,8.08,0.38
1,Africa Western and Central,AFW,2010,1.54,4.01,8.73,40.58,0.11,7.24,247.5,124.33,6.37,52.27,21884.17,0.0,1.35,510.68,4.22,0.0
2,Albania,ALB,2010,32.94,4.22,14.07,16.62,3.62,45.0,12.52,1.26,6.56,92.41,1717.7,0.2,0.0,95.93,13.67,15.42
3,Algeria,DZA,2010,5.33,1.69,5.38,59.43,2.51,12.5,10.16,10.55,9.66,91.42,150.34,0.05,0.0,191.31,10.16,0.46
4,American Samoa,ASM,2010,49.64,1.25,1521.29,22.73,0.0,36.0,511.83,4.8,12.28,163.6,730.23,0.0,0.0,21.76,8.45,0.0


In [3]:
print(data_df.columns)

Index(['Country Name', 'Country Code', 'Year', 'ATM Usage', 'GDP',
       'High Tech Export ($M)', 'High Tech Import',
       'Internet Subscription Per 100', 'Internet Usage Per 100',
       'IP Income($M)', 'Labour Force (M)', 'Med_High Tech Manufacturing',
       'Mobile Sub Per 100', 'Personal Remittance ($M)', 'Poverty Count',
       'Stock Traded $B', 'Tech Cooperation Grant($M)', 'Unemployment Rate',
       'Med High Tech Export'],
      dtype='object')


In [4]:
#Perform label encoding for the 'Country_Name' column
encoder = LabelEncoder()
data_df['Country Code'] = encoder.fit_transform(data_df['Country Name'])

In [5]:
grouped_data = data.groupby('Country Code')

In [6]:
# Step 5: Define a function to train and predict for each country
def train_and_predict_country(country_data):
    country_name = encoder.inverse_transform([country_data['Country Code'].iloc[0]])[0]
    print(f"Predictions for {country_name}:")

    # Step 6: Split the data into features and target variable for this country
    selected_columns = ['Tech Cooperation Grant($M)', 'High Tech Import', 'Med High Tech Export', 'IP Income($M)', 'Unemployment Rate']
    X = country_data[selected_columns].values
    y = country_data['Poverty Count'].values

    # Split the data into training and test sets for this country
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Choose a machine learning model for this country (Linear Regression)
    model = LinearRegression()

    # Train the model for this country
    model.fit(X_train, y_train)

    # Evaluate the model's performance for this country
    train_score = model.score(X_train, y_train)
    test_score = model.score(X_test, y_test)
    print(f"Training R^2 Score: {train_score:.4f}")
    print(f"Test R^2 Score: {test_score:.4f}")

    # Step 12: Make predictions for the next 5 years for this country
    last_year_data = country_data.iloc[-1][selected_columns]
    future_years = np.arange(2023, 2028)
    future_data_list = []

    for year in future_years:
        future_data_dict = last_year_data.to_dict()
        future_data_dict['Year'] = year
        future_data_list.append(future_data_dict)

    future_data = pd.DataFrame(future_data_list)

    # Step 13: Make predictions using the model
    predictions = model.predict(future_data[selected_columns].values)

    # Collect the predictions for the next 5 years in a list
    predictions_list = []
    for year, prediction in zip(future_years, predictions):
        predictions_list.append((year, prediction))

    # Return the country name and the list of predictions for this country
    return country_name, predictions_list

# Step 13: Iterate over each country and make predictions
for country_code, country_data in grouped_data:
    country_name, predictions = train_and_predict_country(country_data)
    print(f"Predictions for {country_name}:")
    for year, prediction in predictions:
        print(f"Year {year}: {prediction:.2f}")
    print("\n")


Predictions for Afghanistan:
Training R^2 Score: 1.0000
Test R^2 Score: 1.0000
Predictions for Afghanistan:
Year 2023: 0.00
Year 2024: 0.00
Year 2025: 0.00
Year 2026: 0.00
Year 2027: 0.00


Predictions for Africa Western and Central:
Training R^2 Score: 1.0000
Test R^2 Score: 1.0000
Predictions for Africa Western and Central:
Year 2023: 0.00
Year 2024: 0.00
Year 2025: 0.00
Year 2026: 0.00
Year 2027: 0.00


Predictions for Albania:
Training R^2 Score: 0.8388
Test R^2 Score: -259.2282
Predictions for Albania:
Year 2023: 0.10
Year 2024: 0.10
Year 2025: 0.10
Year 2026: 0.10
Year 2027: 0.10


Predictions for Algeria:
Training R^2 Score: 0.8702
Test R^2 Score: -2604899965269937465791744929955840.0000
Predictions for Algeria:
Year 2023: 0.07
Year 2024: 0.07
Year 2025: 0.07
Year 2026: 0.07
Year 2027: 0.07


Predictions for American Samoa:
Training R^2 Score: 1.0000
Test R^2 Score: 1.0000
Predictions for American Samoa:
Year 2023: 0.00
Year 2024: 0.00
Year 2025: 0.00
Year 2026: 0.00
Year 2027: 

Training R^2 Score: 0.9100
Test R^2 Score: 0.4760
Predictions for IDA total:
Year 2023: 26.15
Year 2024: 26.15
Year 2025: 26.15
Year 2026: 26.15
Year 2027: 26.15


Predictions for Iceland:
Training R^2 Score: 0.9694
Test R^2 Score: -0.5303
Predictions for Iceland:
Year 2023: 0.02
Year 2024: 0.02
Year 2025: 0.02
Year 2026: 0.02
Year 2027: 0.02


Predictions for India:
Training R^2 Score: 0.9259
Test R^2 Score: -32.8712
Predictions for India:
Year 2023: 8.35
Year 2024: 8.35
Year 2025: 8.35
Year 2026: 8.35
Year 2027: 8.35


Predictions for Indonesia:
Training R^2 Score: 0.9747
Test R^2 Score: 0.9324
Predictions for Indonesia:
Year 2023: 4.02
Year 2024: 4.02
Year 2025: 4.02
Year 2026: 4.02
Year 2027: 4.02


Predictions for Iran:
Training R^2 Score: 0.7652
Test R^2 Score: -1.8667
Predictions for Iran:
Year 2023: 0.95
Year 2024: 0.95
Year 2025: 0.95
Year 2026: 0.95
Year 2027: 0.95


Predictions for Iraq:
Training R^2 Score: 0.9324
Test R^2 Score: 0.0000
Predictions for Iraq:
Year 2023: 0.01


Training R^2 Score: 1.0000
Test R^2 Score: 1.0000
Predictions for St. Kitts and Nevis:
Year 2023: 0.00
Year 2024: 0.00
Year 2025: 0.00
Year 2026: 0.00
Year 2027: 0.00


Predictions for St. Lucia:
Training R^2 Score: 0.9635
Test R^2 Score: 0.0000
Predictions for St. Lucia:
Year 2023: 0.72
Year 2024: 0.72
Year 2025: 0.72
Year 2026: 0.72
Year 2027: 0.72


Predictions for St. Martin (French part):
Training R^2 Score: 1.0000
Test R^2 Score: 1.0000
Predictions for St. Martin (French part):
Year 2023: 0.00
Year 2024: 0.00
Year 2025: 0.00
Year 2026: 0.00
Year 2027: 0.00


Predictions for St. Vincent and the Grenadines:
Training R^2 Score: 1.0000
Test R^2 Score: 1.0000
Predictions for St. Vincent and the Grenadines:
Year 2023: 0.00
Year 2024: 0.00
Year 2025: 0.00
Year 2026: 0.00
Year 2027: 0.00


Predictions for Sub-Saharan Africa:
Training R^2 Score: 0.9229
Test R^2 Score: 0.6889
Predictions for Sub-Saharan Africa:
Year 2023: 35.10
Year 2024: 35.10
Year 2025: 35.10
Year 2026: 35.10
Year 2027: 

In [7]:
# Step 6: Split the data into features and target variable for this country
selected_columns = ['Tech Cooperation Grant($M)', 'High Tech Import', 'Med High Tech Export', 'IP Income($M)', 'Unemployment Rate']
X = country_data[selected_columns].values
y = country_data['Poverty Count'].values

#Split the data into training and test sets for this country
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
#Split the data into training and test sets for this country
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)



Shape of X_train: (8, 5)
Shape of X_test: (3, 5)


In [9]:
#Choose a machine learning model for this country (Linear Regression)
model = LinearRegression()

In [10]:
#Train the model for this country
model.fit(X_train, y_train)

LinearRegression()

In [11]:
 # Evaluate the model's performance for this country
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
print(f"Training R^2 Score: {train_score:.4f}")
print(f"Test R^2 Score: {test_score:.4f}")

Training R^2 Score: 0.8601
Test R^2 Score: -3.5470


In [32]:
# Step 12: Make predictions for the next 5 years for this country
# Step 12: Make predictions for the next 5 years for this country
last_year_data = country_data.iloc[-1][selected_columns]

future_years = np.arange(2023, 2028)
future_data_list = []

for year in future_years:
    future_data_dict = last_year_data.to_dict()
    future_data_dict['Year'] = year
    future_data_list.append(future_data_dict)

future_data = pd.DataFrame(future_data_list)

# Step 13: Make predictions using the model
predictions = model.predict(future_data[selected_columns].values)

# Print the predictions for the next 5 years
print("Predicted Poverty Count for the next 5 years:")
for year, prediction in zip(future_years, predictions):
    print(f"Year {year}: {prediction:.2f}")



Predicted Poverty Count for the next 5 years:
Year 2023: 11.81
Year 2024: 11.81
Year 2025: 11.81
Year 2026: 11.81
Year 2027: 11.81


In [None]:

data_df
 



   

