In [None]:
import pandas as pd
import plotly.express as px
import numpy as np
import warnings
pd.options.plotting.backend = 'plotly'
warnings.filterwarnings('ignore')

# Path to the CSV file
file_path = '../data/data.csv'
# Read the CSV file into a pandas DataFrame
data = pd.read_csv(file_path)

In [None]:
age_groups = {
    '0-4': (0, 4),
    '5-14': (5, 14),
    '15-34': (15, 34),
    '35-49': (35, 49),
    '50+': (50, float('inf'))  # '50+' will be handled as 50 to infinity
}

# Function to check if a given age range falls within a specific age group
def is_age_in_group(age_range, group_start, group_end):
    if '+' in age_range:
        age = int(age_range[:-1])
        return age >= group_start
    else:
        start, end = map(int, age_range.split('-'))
        return start >= group_start and end <= group_end

# Function to aggregate data by age group and calculate the death rate
def aggregate_data(data, age_groups):
    # Create an empty DataFrame for the aggregated data
    aggregated_data = []

    # Iterate over each year and age group
    for year in sorted(data['Time'].unique()):
        for age_group, (start, end) in age_groups.items():
            # Filter data for the current year and age group
            filtered_data = data[(data['Time'] == year) & 
                                 data['Age'].apply(lambda x: is_age_in_group(x, start, end))]
            
            # Sum the population and deaths
            total_population = filtered_data['Population'].sum()
            total_deaths = filtered_data['Deaths'].sum()

            # Calculate the death rate per 1000 population
            death_rate = (total_deaths / total_population) if total_population else 0

            # Append the aggregated data
            aggregated_data.append({
                'Year': year,
                'AgeGroup': age_group,
                'Population': total_population,
                'Deaths': total_deaths,
                'DeathRate': death_rate
            })

    # Convert the aggregated data to a DataFrame
    return pd.DataFrame(aggregated_data)

# Perform the aggregation and calculate the death rate
df = aggregate_data(data, age_groups)
df.head(10)

In [None]:
fig = px.line(df, 
              x='Year', 
              y='DeathRate', 
              color='AgeGroup', 
              title='Death Rate by Age Group and Year',
              labels={'DeathRate': 'Death Rate'})

fig.show()

In [None]:
death_rates = pd.Series(
    [0.041511, 0.004729,  0.008426,  0.011559, 0.034005], 
    index=[0, 5, 15, 35, 50]
) #year of 1950

In [None]:
upper_age = 100
ages = range(upper_age)

In [None]:
# Get the death rates applicable to each age
life_table = pd.DataFrame(index=ages, columns=['death_rate'])
for a in ages:
    idx = next((i for i, age in enumerate(death_rates.index) if age > a), 0)  # The zero is a trick to get the last element when one is subtracted
    life_table.loc[a, 'death_rate'] = death_rates.iloc[idx - 1]

In [None]:
df = life_table.reset_index()
df = df.rename(columns={'index': 'age', 'death_rate': 'qx'}) 
# qx: is the probability that a person aged exactly x dies before exact age (x+1), i.e., the probability of dying in the next year. Hence: qx= dx / lx
# lx: is the number of persons alive aged x in a life table.
df['lx'] = 100000
# dx: is the number of persons who die aged x last birthday.
for i in range(1, len(df)):
    df.loc[i, 'lx'] = df.loc[i - 1, 'lx'] - (df.loc[i - 1, 'qx'] * df.loc[i - 1, 'lx'])
    df["dx"] = 0
for x in range(100):
    if x >=99:
        df["dx"][x] = df["lx"][x]
    else:
        df["dx"][x] = df["lx"][x]-df["lx"][x+1]
#μx: is the force of mortality, i.e., represents the instantaneous rate at which people are dying.
df["μx"] = 0
df["μx"][0] = 2.0692602739726 * df["dx"][0] /(2*df["lx"][0])
for x in range(1,100):
    df["μx"][x] = (df["dx"][x-1]+df["dx"][x])/(2*df["lx"][x])

#Tx: is the total years of life to be lived by those aged exactly x (not the random variable Tx) until they all die
df["Tx"] = 0.000
for x in range(100):
    df["Tx"][x] = 0
    for y in range(x,100):
        df["Tx"][x] += df["lx"][y]
# e0x: is the complete expectation of life, i.e., the expectation of life allowing for the whole of the lifetime of the individual (years and days).
df["e0x"] = df["Tx"]/df["lx"] - 0.5
# total life expectancy, Lx: is the number of years of life lived between ages x and (x+1) of those currently aged x.
df["Lx"] = 0.000
for x in range(100):
    df["Lx"][x] = df["Tx"][x]-df["lx"][x]
# ex: is the curtate expectation of life, i.e., the expectation of life where only complete years of life count.
df["ex"] = df["Lx"]/df["lx"]
df