In [20]:
# Import my Variables

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [21]:
# Step 1: Load and preprocess the data
data = pd.read_csv("Resources/World_Bank.csv")
data_df = data
data_df.head()

Unnamed: 0,Country Name,Country Code,Year,ATM Usage,GDP,High Tech Export ($M),High Tech Import,Internet Subscription Per 100,Internet Usage Per 100,IP Income($M),Labour Force (M),Med_High Tech Manufacturing,Mobile Sub Per 100,Personal Remittance ($M),Poverty Count,Stock Traded $B,Tech Cooperation Grant($M),Unemployment Rate,Med High Tech Export
0,Argentina,ARG,2010,13.86,9.84,1686.22,33.65,9.8,45.0,1531.73,18.32,26.18,138.89,628.54,0.7,0.45,71.88,8.65,45.02
1,Australia,AUS,2010,154.32,0.63,4554.4,25.79,25.02,76.0,3012.86,11.47,27.82,102.18,1334.65,0.3,90.62,0.0,5.56,19.94
2,Austria,AUT,2010,112.34,1.59,15801.88,35.94,24.52,75.17,2092.66,4.29,43.78,146.37,3101.68,0.5,12.82,0.0,5.3,59.97
3,Bangladesh,BGD,2010,1.29,4.37,37.56,19.5,0.28,3.7,21.92,55.76,9.14,45.77,10520.65,18.2,2.28,196.45,5.0,2.14
4,Belgium,BEL,2010,87.37,1.93,18056.97,41.87,31.01,75.0,2023.02,4.8,34.39,111.73,11090.65,0.1,25.5,0.0,7.91,50.62


In [22]:
print(data_df.columns)

Index(['Country Name', 'Country Code', 'Year', 'ATM Usage', 'GDP',
       'High Tech Export ($M)', 'High Tech Import',
       'Internet Subscription Per 100', 'Internet Usage Per 100',
       'IP Income($M)', 'Labour Force (M)', 'Med_High Tech Manufacturing',
       'Mobile Sub Per 100', 'Personal Remittance ($M)', 'Poverty Count',
       'Stock Traded $B', 'Tech Cooperation Grant($M)', 'Unemployment Rate',
       'Med High Tech Export'],
      dtype='object')


In [23]:
#Perform label encoding for the 'Country_Name' column
encoder = LabelEncoder()
data_df['Country Code'] = encoder.fit_transform(data_df['Country Name'])

In [24]:
grouped_data = data.groupby('Country Code')

In [33]:
# Step 5: Define a function to train and predict for each country
def train_and_predict_country(country_data):
    country_name = encoder.inverse_transform([country_data['Country Code'].iloc[0]])[0]
    print(f"Predictions for {country_name}:")

    # Step 6: Split the data into features and target variable for this country
    selected_columns = ['Tech Cooperation Grant($M)', 'High Tech Import', 'Med High Tech Export', 'IP Income($M)', 'Unemployment Rate']
    X = country_data[selected_columns].values
    y = country_data['Poverty Count'].values

    # Split the data into training and test sets for this country
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Choose a machine learning model for this country (Linear Regression)
    model = LinearRegression()

    # Train the model for this country
    model.fit(X_train, y_train)

    # Evaluate the model's performance for this country
    train_score = model.score(X_train, y_train)
    test_score = model.score(X_test, y_test)
    print(f"Training R^2 Score: {train_score:.4f}")
    print(f"Test R^2 Score: {test_score:.4f}")

    # Step 12: Make predictions for the next 5 years for this country
    last_year_data = country_data.iloc[-1][selected_columns]
    future_years = np.arange(2023, 2028)
    future_data_list = []

    for year in future_years:
        future_data_dict = last_year_data.to_dict()
        future_data_dict['Year'] = year
        future_data_list.append(future_data_dict)

    future_data = pd.DataFrame(future_data_list)

    # Step 13: Make predictions using the model
    predictions = model.predict(future_data[selected_columns].values)

    # Collect the predictions for the next 5 years in a list
    predictions_list = []
    for year, prediction in zip(future_years, predictions):
        predictions_list.append((year, prediction))

    # Return the country name and the list of predictions for this country
    return country_name, predictions_list

# Step 13: Iterate over each country and make predictions
for country_code, country_data in grouped_data:
    country_name, predictions = train_and_predict_country(country_data)
    print(f"Predictions for {country_name}:")
    for year, prediction in predictions:
        print(f"Year {year}: {prediction:.2f}")
    print("\n")


Predictions for Argentina:
Training R^2 Score: 0.9562
Test R^2 Score: -8.0911
Predictions for Argentina:
Year 2023: 1.09
Year 2024: 1.09
Year 2025: 1.09
Year 2026: 1.09
Year 2027: 1.09


Predictions for Australia:
Training R^2 Score: 0.5903
Test R^2 Score: -10.8115
Predictions for Australia:
Year 2023: 0.15
Year 2024: 0.15
Year 2025: 0.15
Year 2026: 0.15
Year 2027: 0.15


Predictions for Austria:
Training R^2 Score: 0.6551
Test R^2 Score: -13.4732
Predictions for Austria:
Year 2023: 0.79
Year 2024: 0.79
Year 2025: 0.79
Year 2026: 0.79
Year 2027: 0.79


Predictions for Bangladesh:
Training R^2 Score: 0.8270
Test R^2 Score: -0.3786
Predictions for Bangladesh:
Year 2023: 1.70
Year 2024: 1.70
Year 2025: 1.70
Year 2026: 1.70
Year 2027: 1.70


Predictions for Belgium:
Training R^2 Score: 0.9400
Test R^2 Score: -19815759427942124797945767264256.0000
Predictions for Belgium:
Year 2023: 0.10
Year 2024: 0.10
Year 2025: 0.10
Year 2026: 0.10
Year 2027: 0.10


Predictions for Brazil:
Training R^2 S

In [26]:
# Step 6: Split the data into features and target variable for this country
selected_columns = ['Tech Cooperation Grant($M)', 'High Tech Import', 'Med High Tech Export', 'IP Income($M)', 'Unemployment Rate']
X = country_data[selected_columns].values
y = country_data['Poverty Count'].values

#Split the data into training and test sets for this country
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [27]:
#Split the data into training and test sets for this country
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)



Shape of X_train: (8, 5)
Shape of X_test: (3, 5)


In [28]:
#Choose a machine learning model for this country (Linear Regression)
model = LinearRegression()

In [29]:
#Train the model for this country
model.fit(X_train, y_train)

LinearRegression()

In [30]:
 # Evaluate the model's performance for this country
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
print(f"Training R^2 Score: {train_score:.4f}")
print(f"Test R^2 Score: {test_score:.4f}")

Training R^2 Score: 1.0000
Test R^2 Score: -1.9736


In [32]:
# Step 12: Make predictions for the next 5 years for this country
# Step 12: Make predictions for the next 5 years for this country
last_year_data = country_data.iloc[-1][selected_columns]

future_years = np.arange(2023, 2028)
future_data_list = []

for year in future_years:
    future_data_dict = last_year_data.to_dict()
    future_data_dict['Year'] = year
    future_data_list.append(future_data_dict)

future_data = pd.DataFrame(future_data_list)

# Step 13: Make predictions using the model
predictions = model.predict(future_data[selected_columns].values)

# Print the predictions for the next 5 years
print("Predicted Poverty Count for the next 5 years:")
for year, prediction in zip(future_years, predictions):
    print(f"Year {year}: {prediction:.2f}")



Predicted Poverty Count for the next 5 years:
Year 2023: 11.81
Year 2024: 11.81
Year 2025: 11.81
Year 2026: 11.81
Year 2027: 11.81


In [None]:

data_df
 



   

