In [None]:
import pandas as pd
import plotly.express as px
import numpy as np
import warnings
from plotly.graph_objects import Figure
import plotly.graph_objects as go
from plotly.subplots import make_subplots
pd.options.plotting.backend = 'plotly'
warnings.filterwarnings('ignore')

In [None]:
# Path to the CSV file
file_path = './data/data.csv' #from UN Population

# Read the CSV file into a pandas DataFrame
data = pd.read_csv(file_path, usecols=['Age', 'Time', 'Population', 'Deaths'])
data = data.set_index(['Age', 'Time'])
data.index = data.index.swaplevel()
age_groups = set(data.index.get_level_values(1))
years = set(data.index.get_level_values(0))

In [None]:
def get_age_groups_in_range(age_groups, lower_limit, upper_limit):
    return [i for i in age_groups if '+' not in i and lower_limit <= int(i.split('-')[0]) <= upper_limit]

agegroup_request = [[0, 5], [5, 14], [15, 34], [35, 49], [50, 69], [70, 200]]
agegroup_map = {low: get_age_groups_in_range(age_groups, low, up) for low, up in agegroup_request}
agegroup_map[50].append('100+')

In [None]:
mapped_rates = pd.DataFrame()
for year in years:
    for agegroup in agegroup_map:
        age_mask = [i in agegroup_map[agegroup] for i in data.index.get_level_values(1)]
        age_year_data = data.loc[age_mask].loc[year, :]
        total = age_year_data.sum()
        mapped_rates.loc[year, agegroup] = total['Deaths'] / total['Population']

In [None]:
mapped_rates.plot().update_yaxes(type='log')

In [None]:
def adapt_death_rates_for_lifetable(
    upper_age: int, 
    rates: pd.Series,
) -> pd.Series:
    """Get the death rates applicable to each year of age.
    
    Args:
        upper_age: The top year of age to consider
        rates: The raw data for the death rates
    
    Returns:
        The death rates by year of age
        
    """
    ages = range(upper_age)
    revised_rates = pd.Series(index=ages)
    revised_rates.index.name = 'age'
    for a in ages:
        idx = next((i for i, age in enumerate(rates.index) if age > a), 0)  # The zero is a trick to get the last element when one is subtracted
        revised_rates.loc[a] = rates.iloc[idx - 1]
    return revised_rates


def get_lifetable_from_rates(
    rates: pd.Series,
) -> pd.Series:
    """Calculate cohort sizes - note this only works for increments of one year
    (intended to be used with adapt_death_rates_for_lifetable above).
    
    Args:
        rates: Annual death rates from adapt_death_rates_for_lifetable
    
    Returns:
        The life table
        
    """
    lifetable = pd.Series(index=rates.index)
    cohort_size = 100000
    for a in rates.index:
        lifetable[a] = cohort_size
        cohort_size -= cohort_size * rates.loc[a]
    return lifetable

In [None]:
all_rates = pd.DataFrame(columns=years)
lifetables = pd.DataFrame(columns=all_rates.columns)
for year in years:
    all_rates[year] = adapt_death_rates_for_lifetable(100, mapped_rates.loc[year])
    lifetables[year] = get_lifetable_from_rates(all_rates[year])

In [None]:
lifetables.plot(labels={'value': 'surviving'}, height=600, title='life tables by year')