In [32]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import joblib

from sklearn.preprocessing import StandardScaler

In [40]:
# Define required features
model_columns = [
    'const', 'Year', 'Under_five_deaths', 'Adult_mortality', 'BMI',
    'Incidents_HIV', 'GDP_per_capita', 'Schooling', 'Economy_status_Developing',
    'Region_Central America and Caribbean', 'Region_European Union',
    'Region_North America', 'Region_Oceania', 'Region_Rest of Europe',
    'Region_South America', 'log_GDP'
]

region_columns = [
    'Region_Central America and Caribbean', 'Region_European Union',
    'Region_North America', 'Region_Oceania', 'Region_Rest of Europe',
    'Region_South America'
]

# Base input features (not including computed fields or one-hot columns)
input_features = [
    'Region', 'Year', 'Under_five_deaths', 'Adult_mortality', 'BMI',
    'Incidents_HIV', 'GDP_per_capita', 'Schooling', 'Economy_status_Developing'
]

# Dictionary to collect inputs
row_data = {col: 0 for col in model_columns}
row_data['const'] = 1  # always set const

# Input loop
print("Please enter the following values:\n")

for feature in input_features:
    val = input(f"{feature}: ").strip()

    if feature == 'Region':
        region_col = f"Region_{val}"
        if region_col in region_columns:
            row_data[region_col] = 1
        else:
            print(f"⚠️ Region '{val}' not recognized, skipping region dummies.")
    else:
        # Convert numerical values safely
        try:
            row_data[feature] = float(val)
        except ValueError:
            print(f"⚠️ Invalid number for {feature}, using 0.")

# Compute log_GDP if possible
if row_data['GDP_per_capita'] > 0:
    row_data['log_GDP'] = np.log(row_data['GDP_per_capita'])
else:
    print("⚠️ GDP_per_capita must be > 0 to compute log_GDP. Setting log_GDP = 0.")
    row_data['log_GDP'] = 0

# Convert to DataFrame
user_df = pd.DataFrame([row_data])

Please enter the following values:



Region:  European Union
Year:  2015
Under_five_deaths:  3.3
Adult_mortality:  57.9025
BMI:  26
Incidents_HIV:  0.09
GDP_per_capita:  25742
Schooling:  9.7
Economy_status_Developing:  0


In [41]:
scaler = joblib.load('scaler.pkl')

scale_cols = ['Schooling', 'Adult_mortality', 'Under_five_deaths', 'GDP_per_capita', 'Year', 'log_GDP', 'BMI', 'Incidents_HIV']

def feature_eng(df, scaler, scale_cols):
    df = df.copy()
    df[scale_cols] = scaler.transform(df[scale_cols])
    return df

In [42]:
test = feature_eng(user_df, scaler, scale_cols)

In [43]:
model = joblib.load('model.pkl')

In [44]:
model.predict(test)

0    80.569886
dtype: float64