# A Look at income over time


Questions: 

1. How the wealth distribution changed over time

    a. Home Ownership

    b. Student Debt

    c. Wealth concentration on racial lines

    d. Mobility

2. How do racail factors differ? 

    a. Is there a difference between imagrants income mobility and natives for POC? 

    b. What does mobility look like along racial lines

3. How does geography play into the role of mobility and income? 
    
    a. Which locations have seen the biggest wealth growth? 

    b. Which locations have seen the most even wealth growths? 

4. How does the US compare to other countries? 



Datasets: 

[Tidy Data](https://github.com/rfordatascience/tidytuesday/blob/master/data/2021/2021-02-09/readme.md)

[Opportunity Insights](https://opportunityinsights.org/data/)
    


## Import libraries and Data

In [72]:
import csv
import requests
import pandas as pd
import io
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
from urllib.request import urlopen
import json
import plotly.io as pio

In [96]:
lifetime_earn = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-02-09/lifetime_earn.csv')
student_debt = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-02-09/student_debt.csv')
retirement = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-02-09/retirement.csv')
home_owner = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-02-09/home_owner.csv')
race_wealth = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-02-09/race_wealth.csv')
income_time = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-02-09/income_time.csv')
income_limits = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-02-09/income_limits.csv')
income_aggregate = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-02-09/income_aggregate.csv')
income_distribution = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-02-09/income_distribution.csv')
income_mean = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-02-09/income_mean.csv')

In [3]:
lifetime_earn.head()

Unnamed: 0,gender,race,lifetime_earn
0,Men,White,2706000
1,Men,Black,1780000
2,Men,Hispanic any race,2011000
3,Women,White,1518000
4,Women,Black,1260000


## How has wealth distribution changed over time?


### Home ownership

In [52]:
home_white = home_owner[home_owner['race'] == 'White']
home_white = home_white.reset_index()
home_black = home_owner[home_owner['race'] == 'Black']
home_hispanic = home_owner[home_owner['race'] == 'Hispanic']
home_poc = home_owner[home_owner['race'] != 'White']

home_poc = home_poc.groupby(by=['year']).mean()
home_poc = home_poc.reset_index()

home_owner_agg = home_owner.groupby(by=['year']).mean()
home_owner_agg = home_owner_agg.reset_index()

home_poc.head()
#home_owner_agg.head()
#home_owner.head()

Unnamed: 0,year,home_owner_pct
0,1976,0.434726
1,1977,0.43191
2,1978,0.435778
3,1979,0.470971
4,1980,0.480933


In [29]:

fig = go.Figure()
fig.add_trace(go.Scatter(x=home_white['year'], y = home_white['home_owner_pct'], name = "white"))
fig.add_trace(go.Scatter(x=home_black['year'], y = home_black['home_owner_pct'], name = "black"))
fig.add_trace(go.Scatter(x=home_hispanic['year'], y = home_hispanic['home_owner_pct'], name = "hispanic"))
fig.add_trace(go.Scatter(x=home_owner_agg['year'], y = home_owner_agg['home_owner_pct'], name = "Agg"))

fig.show()

Diff in homeownership between white and poc

In [53]:
home_white['diff'] = home_white['home_owner_pct'] - home_poc['home_owner_pct']
fig = go.Figure()
fig.add_trace(go.Scatter(x=home_white['year'], y = home_white['diff'], name = "Diff"))
fig.show()

(Look at scale of y-axis)

Home ownership had the biggest disparity in the early 90s however there are really been very litter variation in the difference in homeownership between white people and POC only varying by about 5% in the last 40 years. 

More data: 
Get data that shows geographic relationship between home ownership


### Student Debt

In [54]:
student_debt.head()

Unnamed: 0,year,race,loan_debt,loan_debt_pct
0,2016,White,11108.41,0.336751
1,2016,Black,14224.77,0.418359
2,2016,Hispanic,7493.999,0.218969
3,2013,White,8363.605,0.284555
4,2013,Black,10302.66,0.412277


In [55]:
student_debt = student_debt.sort_values(by=['year'])
student_debt.head()

Unnamed: 0,year,race,loan_debt,loan_debt_pct
29,1989,Hispanic,897.5826,0.127252
27,1989,White,1100.407,0.104712
28,1989,Black,1160.568,0.17882
26,1992,Hispanic,793.061,0.091457
25,1992,Black,927.4824,0.138277


In [60]:
student_debt_white = student_debt[student_debt['race'] == 'White']
student_debt_black = student_debt[student_debt['race'] == 'Black']
student_debt_hispanic = student_debt[student_debt['race'] == 'Hispanic']
student_debt_agg = student_debt.groupby(by=['year']).mean()
student_debt_agg = student_debt_agg.reset_index()


In [118]:
fig = make_subplots(rows=1, cols=2)

fig.update_layout(
    autosize=False,
    width=1200,
    height=500,
)


fig.add_trace(go.Scatter(x=student_debt_white['year'], y = student_debt_white['loan_debt_pct'], name = "white"), row=1,col=1)
fig.add_trace(go.Scatter(x=student_debt_black['year'], y = student_debt_black['loan_debt_pct'], name = "black"), row=1,col=1)
fig.add_trace(go.Scatter(x=student_debt_hispanic['year'], y = student_debt_hispanic['loan_debt_pct'], name = "hispanic"), row=1,col=1)
fig.add_trace(go.Scatter(x=student_debt_agg['year'], y = student_debt_agg['loan_debt_pct'], name = "Agg"), row=1,col=1)

fig.add_trace(go.Scatter(x=student_debt_white['year'], y = student_debt_white['loan_debt'], name = "white"), row=1,col=2)
fig.add_trace(go.Scatter(x=student_debt_black['year'], y = student_debt_black['loan_debt'], name = "black"), row=1,col=2)
fig.add_trace(go.Scatter(x=student_debt_hispanic['year'], y = student_debt_hispanic['loan_debt'], name = "hispanic"), row=1,col=2)
fig.add_trace(go.Scatter(x=student_debt_agg['year'], y = student_debt_agg['loan_debt'], name = "Agg"), row=1,col=2)

fig.show()

No surprises here but debt has exploded over the past 25 years. Oh to have finished collage with less then 1k debt...

With black people having the largest debt burden then white, then hispanic. This was a little surpising. I figured white people would either have the most debt (assuming a larger portion of them went to school) or the least (Because of having more means to pay off the debt). It seems neither of those assumtions were true. 

In [111]:
race_wealth = race_wealth.dropna()
race_wealth = race_wealth.sort_values(by=['year'])
race_wealth.head()

race_wealth_white = race_wealth[race_wealth['race'] == 'White']
race_wealth_black = race_wealth[race_wealth['race'] == 'Black']
race_wealth_hispanic = race_wealth[race_wealth['race'] == 'Hispanic']
race_wealth_non_white = race_wealth[race_wealth['race'] == 'Non-White']


race_wealth['type'].unique()

array(['Average', 'Median'], dtype=object)

In [117]:
fig = make_subplots(rows=1, cols=2)

fig.update_layout(
    autosize=False,
    width=1200,
    height=500,
)

fig.add_trace(go.Scatter(x=race_wealth_white[race_wealth_white['type']=='Median']['year'], y = race_wealth_white[race_wealth_white['type']=='Average']['wealth_family'], name = "White"), row=1,col=1)
fig.add_trace(go.Scatter(x=race_wealth_black[race_wealth_black['type']=='Median']['year'], y = race_wealth_black[race_wealth_black['type']=='Average']['wealth_family'], name = "Black"), row=1,col=1)
fig.add_trace(go.Scatter(x=race_wealth_hispanic[race_wealth_hispanic['type']=='Median']['year'], y = race_wealth_hispanic[race_wealth_hispanic['type']=='Average']['wealth_family'], name = "Hispanic"), row=1,col=1)
fig.add_trace(go.Scatter(x=race_wealth_non_white[race_wealth_non_white['type']=='Median']['year'], y = race_wealth_non_white[race_wealth_non_white['type']=='Median']['wealth_family'], name = "Non-White"), row=1,col=1)


fig.add_trace(go.Scatter(x=race_wealth_white[race_wealth_white['type']=='Median']['year'], y = race_wealth_white[race_wealth_white['type']=='Median']['wealth_family'], name = "White"), row=1,col=2)
fig.add_trace(go.Scatter(x=race_wealth_black[race_wealth_black['type']=='Median']['year'], y = race_wealth_black[race_wealth_black['type']=='Median']['wealth_family'], name = "Black"), row=1,col=2)
fig.add_trace(go.Scatter(x=race_wealth_hispanic[race_wealth_hispanic['type']=='Median']['year'], y = race_wealth_hispanic[race_wealth_hispanic['type']=='Median']['wealth_family'], name = "Hispanic"), row=1,col=2)
fig.add_trace(go.Scatter(x=race_wealth_non_white[race_wealth_non_white['type']=='Median']['year'], y = race_wealth_non_white[race_wealth_non_white['type']=='Median']['wealth_family'], name = "Non-White"), row=1,col=2)


fig.show()

White families on avg have almost 8-10x the amount of wealth as families of color do. 

This wealth gap has INCREASED over the years which is a very concerning trend. I suspect when we dig into the income percentile data we will see this largly caused by the explotion of wealth from the top 10% being mostly white

Similar story with the median about 8-10x more wealth being held by white families then families of color. 

In [121]:
retirement = retirement.sort_values(by=['year'])

retirement_white = retirement[retirement['race']=='White']
retirement_black = retirement[retirement['race']=='Black']
retirement_hispanic = retirement[retirement['race']=='Hispanic']


In [126]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=retirement_white['year'], y = retirement_white['retirement'], name = "white"))
fig.add_trace(go.Scatter(x=retirement_black['year'], y = retirement_black['retirement'], name = "black"))
fig.add_trace(go.Scatter(x=retirement_hispanic['year'], y = retirement_hispanic['retirement'], name = "hispanic"))
fig.show()

Again no real surpises here. POC have had little increase in retirment funds going from 5k in the 90s to 25k in 2015. White people on the other hand have gone from 32k all the way to 150k with a nice linear growth. 

## Income aggragate

In [102]:
income_quintiles = ["Lowest", "Second", "Third", "Fourth", "Highest"]
custom_sort = {'Lowest': 0, 'Second': 1, 'Third': 2, "Fourth":3, "Highest":4, "Top 5%": 5} 

income_aggregate = income_aggregate.sort_values(by=["income_quintile"], key = lambda x: x.map(custom_sort))
income_aggregate = income_aggregate.sort_values(by=["year", "race"])
income_aggregate_all = income_aggregate[income_aggregate['race']=='All Races']
#income_aggregate_all = income_aggregate_all[income_aggregate_all['income_quintile'] != "Top 5%"]

races = ["Asian Alone", "Black Alone", "Hispanic", "White, Not Hispanic"]
income_races = []
for race in races: 
    df_ = income_aggregate[income_aggregate['race']==race]
    df_ = df_[df_['income_quintile'] != "Top 5%"]
    income_races.append(df_)

income_aggregate_all.head()

Unnamed: 0,year,race,number,income_quintile,income_share
312,1967,All Races,60813000,Lowest,4.0
313,1967,All Races,60813000,Second,10.8
314,1967,All Races,60813000,Third,17.3
315,1967,All Races,60813000,Fourth,24.2
316,1967,All Races,60813000,Highest,43.6


In [98]:

fig = go.Figure()
for income_quintile in income_quintiles:

    fig.add_trace(go.Scatter(x = income_aggregate_all[income_aggregate_all['income_quintile']==income_quintile]["year"],
    y= income_aggregate_all[income_aggregate_all['income_quintile']==income_quintile]["income_share"], fill="tonexty", name = income_quintile))

fig.show()

In [15]:
fig = go.Figure()
fig.add_trace(go.Treemap(
    labels = income_quintiles,
    parents = ["", "", "", "", ""],
    values = list(income_aggregate_all[income_aggregate_all['year']==2018]['income_share'])
))

In [51]:
fig = make_subplots(
    cols = 2, rows = 2,
    column_widths = [0.4, 0.4],
    subplot_titles = ('Asain', 'Black', 'Hispanic', 'White'),
    specs = [[{'type': 'treemap'}, {'type': 'treemap'}], [{'type': 'treemap'}, {'type': 'treemap'}]]
)

fig.update_layout(
    autosize=False,
    width=1200,
    height=1000,
)
i, j = 1, 1
for count, income_race in enumerate(income_races):
    fig.add_trace(go.Treemap(
        labels = income_quintiles,
        textinfo="label+value",
        parents = ["", "", "", "", ""],
        values = list(income_race[income_race['year']==2018]['income_share'])
    ), row=i, col=j)
    
    if j > i: 
        i +=1
        j -=1
    else:
        j +=1

fig.show()    

In [104]:
years = list(income_aggregate_all['year'].unique())
income_quintiles.append("Top 5%")
fig = go.Figure(
    data=[go.Treemap(
    labels = income_quintiles,
    textinfo="label+value",
    parents = ["", "", "", "", "", "Highest"],
    values = list(income_aggregate_all[income_aggregate_all['year']==years[0]]['income_share']))
    ],
    layout=go.Layout(
        title_text="Income Share Over Time " + str(years[0]),
        updatemenus=[dict(type="buttons",
                          buttons=[dict(label="Play",
                                        method="animate",
                                        args=[None])])]
    ), 
    frames = [go.Frame(
        data=[go.Treemap(
        labels = income_quintiles,
        textinfo="label+value",
        parents = ["", "", "", "", "", "Highest"],
        values = list(income_aggregate_all[income_aggregate_all['year']==years[k]]['income_share']))
        ],
        layout=go.Layout(
        uniformtext=dict(minsize=10, mode='show'),
        title_text="Income Share Over Time " + str(years[k]))) 
        for k in range(len(years))])

fig.show()
pio.write_html(fig, file='./IncomeShare.html', auto_open=True)

In [111]:
income_time.tail()



Unnamed: 0,year,percentile,income_family
151,2015,50th,62396.59202
152,2015,90th,175716.077391
153,2016,10th,14458.717669
154,2016,50th,64959.116023
155,2016,90th,182826.035465


In [122]:
fig = go.Figure()
fig.add_trace(go.Treemap(
        labels=list(income_time['percentile'].unique()),
        parents = ["" for percent in income_time['percentile'].unique()],
        values = list(income_time[income_time['year']==2016]['income_family']),
        ))

fig.show()

In [132]:
years = list(income_time['year'].unique())
fig = go.Figure(
    data=[go.Treemap(
    labels=list(income_time['percentile'].unique()),
    textinfo="label+value",
    parents = ["", "", ""],
    values = list(income_time[income_time['year']==years[0]]['income_family']))
    ],
    layout=go.Layout(
        title_text="Income Share Over Time " + str(years[0]),
        updatemenus=[dict(type="buttons",
                          buttons=[dict(label="Play",
                                        method="animate",
                                        args=[None])])]
    ), 
    frames = [go.Frame(
        data=[go.Treemap(
        labels=list(income_time['percentile'].unique()),
        textinfo="label+value",
        parents = ["", "", "", "", "", "Highest"],
        values = list(income_time[income_time['year']==years[k]]['income_family']))
        ],
        layout=go.Layout(
        uniformtext=dict(minsize=10, mode='show'),
        title_text="Income Share Over Time " + str(years[k]))) 
        for k in range(len(years))])

fig.show()
pio.write_html(fig, file='./IncomeTime.html', auto_open=True)