# Modelling Population Dynamics

This workbook outlines a simple simulation technique to model potentia socioeconomic changes over time that may be induced by large transit infrastructure projects. The goal of these models and scenarios is not realism. Instead, they are intended as a sensitivity analysis.

In [47]:
import pandas as pd
import altair as alt
import numpy as np
import os

# Replace with your data folder paths
counts_folder = r"data/counts/input"
interim_folder = r"data/counts/interim"
output_folder = r"data/counts/output"

To run this notebook, we need baseline access scores calcualted in the `Access to Opportunities` Jupyter notebook. If you have not generated those scores yet, please work through that notebook first.

We start by loading these scores as well as the link data between traffic analysis zones and dissemination areas.

In [11]:
acc = pd.read_csv(os.path.join(output_folder, "access_change_by_taz.csv"))

taz_da = pd.read_csv(os.path.join(interim_folder, 'taz_da_link.csv'), dtype={'DAUID': int, 'taz_id': int, 'frac_da_in_taz': float})
da_demo = pd.read_csv(os.path.join(counts_folder, 'da_census_profile.csv'))
taz_da = pd.merge(taz_da, da_demo, on='DAUID')
taz_da.head()

Unnamed: 0,DAUID,taz_id,frac_da_in_taz,pop_2016,labour_unemployed,labour_total,income_lico,income_lim,income_total,vm_total,vm_minority,im_total,im_2011_2016
0,35180487,1035,0.9999997,277.0,10.0,265.0,5.0,10.0,280.0,305.0,130.0,305.0,0.0
1,35180488,1035,0.9999745,863.0,40.0,760.0,25.0,45.0,860.0,880.0,340.0,880.0,0.0
2,35180488,1036,2.53405e-05,863.0,40.0,760.0,25.0,45.0,860.0,880.0,340.0,880.0,0.0
3,35180489,1035,0.9999996,336.0,10.0,255.0,10.0,15.0,340.0,310.0,165.0,310.0,0.0
4,35180489,1036,3.828e-07,336.0,10.0,255.0,10.0,15.0,340.0,310.0,165.0,310.0,0.0


Next, we bring our TAZ and demographic data together, and join that resulting data to the access scores we loaded. Finally, we use a baseline population (in this case the 2016 population counts from the Census) to calculate the number of low-income individuals in a given zone. This `lim` column is the column that we will be adjusting the dynamics about.

In [48]:
# Let's start by grouping to TAZ and getting the low-income population that way
taz_da_inc = taz_da[['taz_id', 'DAUID', 'frac_da_in_taz', 'pop_2016', 'income_total', 'income_lim']].copy()
taz_da_inc['f_inc_total'] = taz_da_inc['income_total'] * taz_da_inc['frac_da_in_taz']
taz_da_inc['f_inc_lim'] = taz_da_inc['income_lim'] * taz_da_inc['frac_da_in_taz']
taz_inc = taz_da_inc[['taz_id', 'pop_2016', 'f_inc_total', 'f_inc_lim']].groupby("taz_id", as_index=False).sum()
taz_inc['frac_inc'] = taz_inc['f_inc_lim']/taz_inc['f_inc_total']
income = pd.merge(taz_inc, acc, left_on='taz_id', right_on='i')
income['lim'] = (income['frac_inc']*income['pop_2016']).astype(float).round()
income = income.dropna()

# Create an unadjusted copy for different scenario use
hold = income.copy()

income.head()

Unnamed: 0,taz_id,pop_2016,f_inc_total,f_inc_lim,frac_inc,i,emp_imp_BAU,emp_imp_A,emp_imp_B,emp_imp_C,delta_A,delta_B,delta_C,lim
0,1,6488.0,2174.479854,122.057804,0.056132,1,124540.0,133495.6,126273.5,124855.3,7.190995,1.391926,0.253174,364.0
1,2,2969.0,109.946478,14.944222,0.135923,2,30586.97,33008.28,30979.45,30665.24,7.916143,1.283144,0.255873,404.0
2,3,2562.0,215.812379,15.434231,0.071517,3,470720.2,518011.6,480292.3,471983.4,10.046602,2.033492,0.268347,183.0
3,4,4117.0,23.180068,3.163563,0.136478,4,2161346.0,2233066.0,2189504.0,2175641.0,3.318265,1.302786,0.661364,562.0
4,5,1830.0,26.898273,3.609354,0.134185,5,8546705.0,10857130.0,9271639.0,8683302.0,27.03294,8.482039,1.598241,246.0


## Polarization by Income

In this scenario, move low-income individuals from high-income areas to low-income areas iteratively, creating an income polarization effect. For each iteration we move 1,000 people, and then recalculate the access score ratio for these groups.

In [38]:
turns = 200
ratios = []

income = hold.copy()
baseline = np.average(income['frac_inc'], weights=income['pop_2016'])
income['classification'] = np.where(income.frac_inc < baseline, 'below', 'above')
print("Baseline:", baseline)
for turn in range(turns):
    # Choose someone from the upper zone and decrement
    uppers = income[income.classification == 'above'].sample(1000, weights='pop_2016', replace=True).index.values
    for u in uppers:
        income.loc[u, 'lim'] -= 1
    # Choose someone from the lower zone and increment
    lowers = income[income.classification == 'below'].sample(1000, weights='pop_2016', replace=True).index.values
    for l in lowers:
        income.loc[l, 'lim'] += 1

    # Recalculate the compliment
    income['lim_c'] = income['pop_2016'] - income['lim']

    # Calculate the weighted averages for access that we need, for three scenarios
    lim_acc = np.average(income['emp_imp_A'], weights=income['lim'])
    lim_c_acc = np.average(income['emp_imp_A'], weights=income['lim_c'])
    ratio = lim_acc/lim_c_acc
    ratios.append([turn+1, ratio])
polarized = pd.DataFrame(ratios, columns=['turn', 'ratio'])
polarized['scenario'] = 'Polarization'
polarized.head()

Baseline: 0.19465826282179277


Unnamed: 0,turn,ratio,scenario
0,1,1.118341,Polarization
1,2,1.118199,Polarization
2,3,1.118113,Polarization
3,4,1.118034,Polarization
4,5,1.117963,Polarization


## Gentrification Around Transit

In this scenario, areas with better *access* (rather than income) become more wealthy. In other words, we move low-income individuals from high-access areas to low-access areas.

In [39]:
turns = 200
ratios = []

income = hold.copy()
baseline = np.average(income['emp_imp_A'], weights=income['pop_2016'])
income['classification'] = np.where(income.emp_imp_A < baseline, 'below', 'above')
print("Baseline:", baseline)
for turn in range(turns):
    # Choose someone from the upper zone and decrement
    uppers = income[income.classification == 'above'].sample(1000, weights='pop_2016', replace=True).index.values
    for u in uppers:
        income.loc[u, 'lim'] -= 1
    # Choose someone from the lower zone and increment
    lowers = income[income.classification == 'below'].sample(1000, weights='pop_2016', replace=True).index.values
    for l in lowers:
        income.loc[l, 'lim'] += 1

    # Recalculate the compliment
    income['lim_c'] = income['pop_2016'] - income['lim']

    # Calculate the weighted averages for access that we need, for three scenarios
    lim_acc = np.average(income['emp_imp_A'], weights=income['lim'])
    lim_c_acc = np.average(income['emp_imp_A'], weights=income['lim_c'])
    ratio = lim_acc/lim_c_acc
    ratios.append([turn+1, ratio])

gentrified = pd.DataFrame(ratios, columns=['turn', 'ratio'])
gentrified['scenario'] = 'Gentrification'
gentrified.head()

Baseline: 645551.6005813347


Unnamed: 0,turn,ratio,scenario
0,1,1.116092,Gentrification
1,2,1.113583,Gentrification
2,3,1.111115,Gentrification
3,4,1.108749,Gentrification
4,5,1.106351,Gentrification


## Degentrification Around Transit

In this scenario, areas with better *access* (rather than income) become **less** wealthy. In other words, we move low-income individuals from low-access areas to high-access areas.

In [40]:
turns = 200
ratios = []

income = hold.copy()
baseline = np.average(income['emp_imp_A'], weights=income['pop_2016'])
income['classification'] = np.where(income.emp_imp_A < baseline, 'below', 'above')
print("Baseline:", baseline)
for turn in range(turns):
    # Choose someone from the upper zone and decrement
    uppers = income[income.classification == 'above'].sample(1000, weights='pop_2016', replace=True).index.values
    for u in uppers:
        income.loc[u, 'lim'] += 1
    # Choose someone from the lower zone and increment
    lowers = income[income.classification == 'below'].sample(1000, weights='pop_2016', replace=True).index.values
    for l in lowers:
        income.loc[l, 'lim'] -= 1

    # Recalculate the compliment
    income['lim_c'] = income['pop_2016'] - income['lim']

    # Calculate the weighted averages for access that we need, for three scenarios
    lim_acc = np.average(income['emp_imp_A'], weights=income['lim'])
    lim_c_acc = np.average(income['emp_imp_A'], weights=income['lim_c'])
    ratio = lim_acc/lim_c_acc
    ratios.append([turn+1, ratio])

degentrified = pd.DataFrame(ratios, columns=['turn', 'ratio'])
degentrified['scenario'] = 'Degentrification'
degentrified.head()

Baseline: 645551.6005813347


Unnamed: 0,turn,ratio,scenario
0,1,1.12071,Degentrification
1,2,1.123193,Degentrification
2,3,1.125702,Degentrification
3,4,1.128075,Degentrification
4,5,1.130484,Degentrification


## Visualization

Finally, we can plot our three scenarios over the iterations we've chosen to see how the ratio of access for low-income individuals compares to those of non-low-income.

In [46]:
together = pd.concat([polarized, gentrified, degentrified], axis=0)
together.head()

alt.Chart(together).mark_line(size=3).encode(
    alt.X('turn:Q', title='Iteration'),
    alt.Y('ratio:Q', title="Ratio of access for low-income and non-low-income households"),
    alt.Color('scenario:N', title="Dynamics"),
    alt.StrokeDash('scenario:N', title="")
).properties(
    width=600,
    height=500,
    title='Population Dynamics Simulation for the SmartTrack Scenario A'
).configure(font='Roboto').configure_axis(
    grid=False,
    labelFontSize=12,
    titleFontSize=16
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=18,
    anchor='start'
)