# Life Expectancy - Predictor Function

#### File Setup (import libraries)

In [1]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

## Input Metadata

|Field|Description|
|---:|:---|
|Country|Country|
|Year|Year|
|Infant deaths|Number of Infant Deaths per 1000 population|
|Under-five deaths|Number of under-five deaths per 1000 population|
|Adult Mortality|Adult Mortality Rates of both sexes (probability of dying between 15 and 60 years per 1000 population)|
|Alcohol Consumption|Alcohol, recorded per capita (15+) consumption (in litres of pure alcohol)|
|Hepatitis B|Hepatitis B (HepB) immunization coverage among 1-year-olds (%)|
|Measles|Measles - number of reported cases per 1000 population|
|BMI|Average Body Mass Index of entire population|
|Polio|Polio (Pol3) immunization coverage among 1-year-olds (%)|
|Diphtheria|Diphtheria tetanus toxoid and pertussis (DTP3) immunization coverage among 1-year-olds (%)|
|Incidents HIV|Deaths per 1 000 live births HIV/AIDS (0-4 years)|
|GDP|Gross Domestic Product per capita (in USD)|
|Population|Population of the country|
|Thinness 10-19 years|Prevalence of thinness among children and adolescents for Age 10 to 19 (%)|
|Thinness 5-9 years|Prevalence of thinness among children for Age 5 to 9 (%)|
|Schooling|Number of years of Schooling (years)|
|Economic Status|Developed or Developing status|

## Choice of Model

In [None]:
consent = input("Do you consent to using advanced population data, which may include protected information, for better accuracy? (Y/N)")

if consent == "Y":
    print('Thank you, you have given consent for advanced population data.')
elif consent == "N":
    print('Thank you, you have NOT given consent - a minimalist model will be used which may be less accurate and robust.')
else:
    print('Please try again - you must enter Y (for yes) or N (for no).')
    input("Do you consent to using advanced population data, which may include protected information, for better accuracy? (Y/N)")

## Data Input

In [None]:
columns = ['Country', 'Region', 'Year', 'Infant_deaths', 'Under_five_deaths',
'Adult_mortality', 'Alcohol_consumption', 'Hepatitis_B', 'Measles',
'BMI', 'Polio', 'Diphtheria', 'Incidents_HIV', 'GDP_per_capita',
'Population_mln', 'Thinness_ten_nineteen_years',
'Thinness_five_nine_years', 'Schooling', 'Economy_status_Developed',
'Economy_status_Developing', 'Life_expectancy']

features = []

for i in columns:
    features.append(input(f"Please enter the {i}:"))

In [None]:
features_df = pd.DataFrame([features], columns=columns)

features_df

## Input Validation

In [None]:
# column_dtypes = {
#     'Country': 'object',
#     'Region': 'object',
#     'Year': 'int64',
#     'Infant_deaths': 'int64',
#     'Under_five_deaths': 'int64',
#     'Adult_mortality': 'float64',
#     'Alcohol_consumption': 'float64',
#     'Hepatitis_B': 'float64',
#     'Measles': 'int64',
#     'BMI': 'float64',
#     'Polio': 'float64',
#     'Diphtheria': 'float64',
#     'Incidents_HIV': 'float64',
#     'GDP_per_capita': 'float64',
#     'Population_mln': 'float64',
#     'Thinness_ten_nineteen_years': 'float64',
#     'Thinness_five_nine_years': 'float64',
#     'Schooling': 'float64',
#     'Economy_status_Developed': 'int64',     # dummy variable (0 or 1)
#     'Economy_status_Developing': 'int64',    # dummy variable (0 or 1)
#     'Life_expectancy': 'float64'
# }

In [None]:
[#'const', 
 'Schooling', 
 'Adult_mortality', 
 'Under_five_deaths', 
 'Economy_status_Developing', 
 'Region_Central America and Caribbean', 'Region_South America', 'Region_Oceania', 'Region_European Union', 'Region_Rest of Europe', 'Region_North America', 
 'GDP_per_capita', 'log_GDP', 
 'Year', 
 'BMI', 
 'Incidents_HIV']


## Inputting Regions

In [None]:
## Initial chunk of code I wrote for this - which was quite long-winded 


# region_list = ['Middle East', 'European Union', 'Asia', 'South America',
#        'Central America and Caribbean', 'Rest of Europe', 'Africa',
#        'Oceania', 'North America']

# region = input(f"Please enter your Region from the list {region_list}:")

# def region_assign(text):

#     stripped_region = region.strip().lower()

#     if stripped_region == "central america and caribbean":
#         df['Region_Central America and Caribbean'] = 1
#     elif stripped_region == "south america":
#         df['Region_South America'] = 1
#     elif stripped_region == "oceania":
#         df['Region_Oceania'] = 1
#     elif stripped_region == "european union":
#         df['Region_European Union'] = 1
#     elif stripped_region == "rest of europe":
#         df['Region_Rest of Europe'] = 1
#     elif stripped_region == "north america":
#         df['Region_North America'] = 1
#     elif stripped_region in ['middle east', 'asia', 'africa']:
#         continue
#     else:
#         print("Not a valid region: please try to enter again...")

#     return


## Proposed Input Loop

In [None]:
# Define required features
model_columns = [
    'const', 'Year', 'Under_five_deaths', 'Adult_mortality', 'BMI',
    'Incidents_HIV', 'GDP_per_capita', 'Schooling', 'Economy_status_Developing',
    'Region_Central America and Caribbean', 'Region_European Union',
    'Region_North America', 'Region_Oceania', 'Region_Rest of Europe',
    'Region_South America', 'log_GDP'
]

region_columns = [
    'Region_Central America and Caribbean', 'Region_European Union',
    'Region_North America', 'Region_Oceania', 'Region_Rest of Europe',
    'Region_South America'
]

# Base input features (not including computed fields or one-hot columns)
input_features = [
    'Region', 'Year', 'Under_five_deaths', 'Adult_mortality', 'BMI',
    'Incidents_HIV', 'GDP_per_capita', 'Schooling', 'Economy_status_Developing'
]

# Dictionary to collect inputs
row_data = {col: 0 for col in model_columns}
row_data['const'] = 1  # always set const

# Input loop
print("Please enter the following values:\n")

for feature in input_features:
    val = input(f"{feature}: ").strip()

    if feature == 'Region':
        region_col = f"Region_{val}"
        if region_col in region_columns:
            row_data[region_col] = 1
        else:
            print(f"⚠️ Region '{val}' not recognized, skipping region dummies.")
    else:
        # Convert numerical values safely
        try:
            row_data[feature] = float(val)
        except ValueError:
            print(f"⚠️ Invalid number for {feature}, using 0.")

# Compute log_GDP if possible
if row_data['GDP_per_capita'] > 0:
    row_data['log_GDP'] = np.log(row_data['GDP_per_capita'])
else:
    print("⚠️ GDP_per_capita must be > 0 to compute log_GDP. Setting log_GDP = 0.")
    row_data['log_GDP'] = 0

# Convert to DataFrame
user_df = pd.DataFrame([row_data])