<h1 style="text-align: center;"> Prelims Demography Tutorial 1 - Population Measures </h1>
<h2 style="text-align: center;"> Python Solution </h2>

# Some preamble
This Python solution file is completely an extra-curriculum thing. You will **not** in this course be assessed with using Python to calculate demographic rates, so do not feel pressured if you find the below difficult. Please feel free to ask me if you have any questions or want further clarification.

Load the required packages and set up the paths

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
from pathlib import Path

data_path = Path('').resolve().parents[0] / 'data'

# Question 1
Load the data

In [None]:
data_1 = pd.read_csv(data_path / 'mort.csv')

## a & b

In [None]:
# calculate column sums
data_1.loc[data_1.shape[0], :] = ['total'] + data_1.drop(columns=['age']).apply(np.sum, axis=0).tolist()

# deaths / person-years
data_1['coal_cdr'] = data_1['coal_deaths'] / (data_1['coal_pop'] * 5)
data_1['engine_cdr'] = data_1['engine_deaths'] / (data_1['engine_pop'] * 5)
data_1['ew_cdr'] = data_1['ew_deaths'] / (data_1['ew_pop'] * 5)

# reorder columns for better readability
data_1 = data_1[['age', 'coal_pop', 'coal_deaths', 'coal_cdr', 'engine_pop', 'engine_deaths', 'engine_cdr', 'ew_pop', 'ew_deaths', 'ew_cdr']]
data_1

Note here we should use **person-years** as the denominator

### Visualisation - seaborn

In [None]:
# bar chart showing the CDR
data_1_cdr = data_1.loc[data_1['age'] == 'total', ['coal_cdr', 'engine_cdr', 'ew_cdr']].transpose().reset_index()
data_1_cdr.columns = ['Population', 'CDR']

sns.barplot(data=data_1_cdr, x='Population', y='CDR')
plt.xticks(ticks=range(3), labels=['Coal Miners', 'Engine Drivers', 'England & Wales'])
plt.show()

In [None]:
# line chart showing the ASDR by population
# firstly, transform the data to long format
data_1_long = data_1.loc[data_1['age'] != 'total', ['age', 'coal_cdr', 'engine_cdr', 'ew_cdr']].melt(id_vars='age', 
                                                                                                     value_vars=['coal_cdr', 'engine_cdr', 'ew_cdr'], 
                                                                                                     var_name='population', 
                                                                                                     value_name='asdr')

# plot the data
sns.lineplot(data=data_1_long, x='age', y='asdr', hue='population', errorbar=None)
plt.ylabel('ASDR')
plt.xlabel('Age')
plt.legend(title='Population', labels=['Coal Miners', 'Engine Drivers', 'England & Wales'])
plt.show()

If you want to combine the two plots into one plot:

In [None]:
# combine the two plots
fig, ax = plt.subplots(1, 2, figsize=(15, 5))

# bar chart
sns.barplot(data=data_1_cdr, x='Population', y='CDR', ax=ax[0])
ax[0].set_xticks(range(3))
ax[0].set_xticklabels(['Coal Miners', 'Engine Drivers', 'England & Wales'])

# line chart
sns.lineplot(data=data_1_long, x='age', y='asdr', hue='population', ax=ax[1], errorbar=None)
ax[1].set_ylabel('ASDR')
ax[1].set_xlabel('Age')
ax[1].legend(title='Population', labels=['Coal Miners', 'Engine Drivers', 'England & Wales'])

plt.show()

# # if you want to save the plot
# fig.savefig(Path('').resolve().parents[0] / 'output' / 'q1.png')

### Visualisation - plotly
I personally prefer ``plotly`` for making graphs in python due to its better consistency with Object-Oriented Programming and its capability to produce interactive graphs. This part is extra of the extra, and you should be very proud if you can already produce the plots with ``seaborn``!

In [None]:
# bar chart showing the CDR
data_1_cdr = data_1.loc[data_1['age'] == 'total', ['coal_cdr', 'engine_cdr', 'ew_cdr']].transpose().reset_index()
data_1_cdr.columns = ['Population', 'CDR']

fig = px.bar(data_1_cdr, x='Population', y='CDR')
fig.update_xaxes(tickvals=data_1_cdr['Population'], ticktext=['Coal Miners', 'Engine Drivers', 'England & Wales'])
fig.show()

In [None]:
# line chart showing the ASDR by population
# firstly, transform the data to long format
data_1_long = data_1.loc[data_1['age'] != 'total', ['age', 'coal_cdr', 'engine_cdr', 'ew_cdr']].melt(id_vars='age',
                                                                                                     value_vars=['coal_cdr', 'engine_cdr', 'ew_cdr'],
                                                                                                     var_name='population',
                                                                                                     value_name='asdr')

# plot the data
fig = px.line(data_1_long, x='age', y='asdr', color='population')
fig.update_yaxes(title='ASDR')
fig.update_xaxes(title='Age')
fig.update_legends(title='Population')
legend_text = {'coal_cdr':'Coal Miners', 'engine_cdr': 'Engine Drivers', 'ew_cdr': 'England & Wales'}
fig.for_each_trace(lambda t: t.update(name = legend_text[t.name],
                                      legendgroup = legend_text[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, legend_text[t.name])
                                      )
                   )
fig.show()

In [None]:
# combine the two plots
fig = make_subplots(rows=1, cols=2)

# bar chart
fig1 = px.bar(data_1_cdr, x='Population', y='CDR')
for trace in fig1.data:
    fig.add_trace(trace, row=1, col=1)

# update x-axis labels on the main fig
fig.update_xaxes(title='Population', 
                 tickvals=data_1_cdr['Population'].unique().tolist(), 
                 ticktext=['Coal Miners', 'Engine Drivers', 'England & Wales'], row=1, col=1)
fig.update_yaxes(title='CDR', row=1, col=1)

# line chart
fig2 = px.line(data_1_long, x='age', y='asdr', color='population')
legend_text = {'coal_cdr':'Coal Miners', 'engine_cdr': 'Engine Drivers', 'ew_cdr': 'England & Wales'}
fig2.for_each_trace(lambda t: t.update(name = legend_text[t.name],
                                       legendgroup = legend_text[t.name],
                                       hovertemplate = t.hovertemplate.replace(t.name, legend_text[t.name])
                                       )
                    )
for trace in fig2.data:
    fig.add_trace(trace, row=1, col=2)

# update y-axis and x-axis titles on the main fig
fig.update_yaxes(title='ASDR', row=1, col=2)
fig.update_xaxes(title='Age', row=1, col=2)

fig.show()

# # if you want to save the plot
# fig.write_image(Path('').resolve().parents[0] / 'output' / 'q1_px.png')

# Question 2
Load the data

In [None]:
data_2_uk = pd.read_csv(data_path / 'fert_uk.csv')
data_2_tw = pd.read_csv(data_path / 'fert_tw.csv')

## a

In [None]:
# crude birth rate: births / person-years
cbr_uk_1977 = data_2_uk['birth_1977'].sum() / 46639000
cbr_uk_1991 = data_2_uk['birth_1991'].sum() / 47875000
cbr_uk_2011 = data_2_uk['birth_2011'].sum() / 53107000

cbr_tw_1977 = data_2_tw['birth_1977'].sum() / 17043000
cbr_tw_1991 = data_2_tw['birth_1991'].sum() / 20679000
cbr_tw_2011 = data_2_tw['birth_2011'].sum() / 23269000

cbr_uk_1977, cbr_uk_1991, cbr_uk_2011, cbr_tw_1977, cbr_tw_1991, cbr_tw_2011

## b

In [None]:
# age-specific fertility rate: births / person-years (by age group)
data_2_uk['asfr_1977'] = data_2_uk['birth_1977'] / data_2_uk['fe_1977']
data_2_uk['asfr_1991'] = data_2_uk['birth_1991'] / data_2_uk['fe_1991']
data_2_uk['asfr_2011'] = data_2_uk['birth_2011'] / data_2_uk['fe_2011']

data_2_tw['asfr_1977'] = data_2_tw['birth_1977'] / data_2_tw['fe_1977']
data_2_tw['asfr_1991'] = data_2_tw['birth_1991'] / data_2_tw['fe_1991']
data_2_tw['asfr_2011'] = data_2_tw['birth_2011'] / data_2_tw['fe_2011']

# reorder columns for better readability
data_2_uk = data_2_uk[['age', 'birth_1977', 'fe_1977', 'asfr_1977', 'birth_1991', 'fe_1991', 'asfr_1991', 'birth_2011', 'fe_2011', 'asfr_2011']]
data_2_tw = data_2_tw[['age', 'birth_1977', 'fe_1977', 'asfr_1977', 'birth_1991', 'fe_1991', 'asfr_1991', 'birth_2011', 'fe_2011', 'asfr_2011']]

In [None]:
data_2_uk

In [None]:
data_2_tw

### Visualisation - seaborn

In [None]:
# line chart showing the UK ASFR
# firstly, transform the data to long format
data_2_uk_long = data_2_uk.loc[:, ['age', 'asfr_1977', 'asfr_1991', 'asfr_2011']].melt(id_vars='age',
                                                                                       value_vars=['asfr_1977', 'asfr_1991', 'asfr_2011'],
                                                                                       var_name='year',
                                                                                       value_name='asfr')

# plot the data
sns.lineplot(data=data_2_uk_long, x='age', y='asfr', hue='year', errorbar=None)
plt.ylabel('ASFR')
plt.xlabel('Age')
plt.legend(title='Year', labels=['1977', '1991', '2011'])
plt.show()

In [None]:
# line chart showing the TW ASFR
# firstly, transform the data to long format
data_2_tw_long = data_2_tw.loc[:, ['age', 'asfr_1977', 'asfr_1991', 'asfr_2011']].melt(id_vars='age',
                                                                                       value_vars=['asfr_1977', 'asfr_1991', 'asfr_2011'],
                                                                                       var_name='year',
                                                                                       value_name='asfr')

# plot the data
sns.lineplot(data=data_2_tw_long, x='age', y='asfr', hue='year', errorbar=None)
plt.ylabel('ASFR')
plt.xlabel('Age')
plt.legend(title='Year', labels=['1977', '1991', '2011'])
plt.show()

If you want to combine the two plots into one plot:

In [None]:
# combine the two plots
fig, ax = plt.subplots(1, 2, figsize=(15, 5))

# UK ASFR
sns.lineplot(data=data_2_uk_long, x='age', y='asfr', hue='year', ax=ax[0])
ax[0].set_ylabel('ASFR')
ax[0].set_xlabel('Age')

# TW ASFR
sns.lineplot(data=data_2_tw_long, x='age', y='asfr', hue='year', ax=ax[1])
ax[1].set_ylabel('ASFR')
ax[1].set_xlabel('Age')

# Add a single legend to the figure
handles, labels = ax[1].get_legend_handles_labels()
fig.legend(handles=handles[0:], labels=['1977', '1991', '2011'], loc='right', title='Year')

# Remove the legend from the subplots
ax[0].get_legend().remove()
ax[1].get_legend().remove()

plt.show()

# # if you want to save the plot
# fig.savefig(Path('').resolve().parents[0] / 'output' / 'q2.png')

### Visualisation - plotly
Again this part is extra of the extra.

In [None]:
# line chart showing the UK ASFR by year
# firstly, transform the data to long format
data_2_uk_long = data_2_uk.loc[:, ['age', 'asfr_1977', 'asfr_1991', 'asfr_2011']].melt(id_vars='age',
                                                                                       value_vars=['asfr_1977', 'asfr_1991', 'asfr_2011'],
                                                                                       var_name='year',
                                                                                       value_name='asfr')

# plot the data
fig = px.line(data_2_uk_long, x='age', y='asfr', color='year')
fig.update_yaxes(title='ASFR')
fig.update_xaxes(title='Age')
fig.update_legends(title='Year')
legend_text = {'asfr_1977':'1977', 'asfr_1991': '1991', 'asfr_2011': '2011'}
fig.for_each_trace(lambda t: t.update(name = legend_text[t.name],
                                      legendgroup = legend_text[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, legend_text[t.name])
                                      )
                   )
fig.show()

In [None]:
# line chart showing the TW ASFR by year
# firstly, transform the data to long format
data_2_tw_long = data_2_tw.loc[:, ['age', 'asfr_1977', 'asfr_1991', 'asfr_2011']].melt(id_vars='age',
                                                                                       value_vars=['asfr_1977', 'asfr_1991', 'asfr_2011'],
                                                                                       var_name='year',
                                                                                       value_name='asfr')

# plot the data
fig = px.line(data_2_tw_long, x='age', y='asfr', color='year')
fig.update_yaxes(title='ASFR')
fig.update_xaxes(title='Age')
fig.update_legends(title='Year')
legend_text = {'asfr_1977':'1977', 'asfr_1991': '1991', 'asfr_2011': '2011'}
fig.for_each_trace(lambda t: t.update(name = legend_text[t.name],
                                      legendgroup = legend_text[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, legend_text[t.name])
                                      )
                   )
fig.show()

In [None]:
# combine the two plots
fig = make_subplots(rows=1, cols=2)

# UK ASFR
fig1 = px.line(data_2_uk_long, x='age', y='asfr', color='year')
legend_text = {'asfr_1977':'1977', 'asfr_1991': '1991', 'asfr_2011': '2011'}
fig1.for_each_trace(lambda t: t.update(name = legend_text[t.name],
                                       legendgroup = legend_text[t.name],
                                       hovertemplate = t.hovertemplate.replace(t.name, legend_text[t.name])
                                       )
                    )
for trace in fig1.data:
    fig.add_trace(trace, row=1, col=1)
    
# update y-axis and x-axis titles on the main fig
fig.update_yaxes(title='ASFR', row=1, col=1)
fig.update_xaxes(title='Age', row=1, col=1)

# TW ASFR
fig2 = px.line(data_2_tw_long, x='age', y='asfr', color='year')
legend_text = {'asfr_1977':'1977', 'asfr_1991': '1991', 'asfr_2011': '2011'}
fig2.for_each_trace(lambda t: t.update(name = legend_text[t.name],
                                       legendgroup = legend_text[t.name],
                                       hovertemplate = t.hovertemplate.replace(t.name, legend_text[t.name])
                                       )
                    )
for trace in fig2.data:
    trace.showlegend = False
    fig.add_trace(trace, row=1, col=2)
    
# update y-axis and x-axis titles on the main fig
fig.update_yaxes(title='ASFR', row=1, col=2)
fig.update_xaxes(title='Age', row=1, col=2)

fig.show()

# # if you want to save the plot
# fig.write_image(Path('').resolve().parents[0] / 'output' / 'q2_px.png')

## c

In [None]:
# total fertility rate: sum of ASFR (be aware of the age interval)
def calculate_tfr(data):
    tfr = data[0] * 3 + data[1:].sum() * 5
    return tfr

tfr_uk = data_2_uk[['asfr_1977', 'asfr_1991', 'asfr_2011']].apply(calculate_tfr, axis=0)
tfr_uk.index = ['1977', '1991', '2011']

tfr_tw = data_2_tw[['asfr_1977', 'asfr_1991', 'asfr_2011']].apply(calculate_tfr, axis=0)
tfr_tw.index = ['1977', '1991', '2011']

In [None]:
tfr_uk

In [None]:
tfr_tw

### Visualisation - seaborn

In [None]:
# line chart showing the TFR by country
tfr = pd.concat([tfr_uk, tfr_tw], axis=1).transpose().reset_index()
tfr['index'] = ['UK', 'TW']

# transform the data to long format
tfr_long = tfr.melt(id_vars='index', value_vars=['1977', '1991', '2011'], var_name='year', value_name='tfr')

# plot the data
sns.lineplot(data=tfr_long, x='year', y='tfr', hue='index', errorbar=None)
plt.ylabel('TFR')
plt.xlabel('Year')
plt.legend(title='Country')

# # if you want to save the plot
# plt.savefig(Path('').resolve().parents[0] / 'output' / 'q2_tfr.png')

plt.show()

### Visualisation - plotly
Again this part is extra of the extra.

In [None]:
# line chart showing the TFR by country
tfr = pd.concat([tfr_uk, tfr_tw], axis=1).transpose().reset_index()
tfr['index'] = ['UK', 'TW']

# transform the data to long format
tfr_long = tfr.melt(id_vars='index', value_vars=['1977', '1991', '2011'], var_name='year', value_name='tfr')

# plot the data
fig = px.line(tfr_long, x='year', y='tfr', color='index')
fig.update_yaxes(title='TFR')
fig.update_xaxes(title='Year')
fig.update_legends(title='Country')
fig.show()

# # if you want to save the plot
# fig.write_image(Path('').resolve().parents[0] / 'output' / 'q2_tfr_px.png')