In [None]:
import numpy as np
# imports
import matplotlib.pyplot as plt
import pandas as pd
import pyreadstat as prs
import seaborn as sns
from helper import *
import os, sys

# read in list of files
# files = [f'data/morg79.dta'] + [f'data/morg{j}{l}.dta' for j in [8, 9, 0, 1] for l in range(0, 10)] + [f'data/morg20.dta']
files = ['data/morg03.dta', 'data/morg04.dta', 'data/morg05.dta', 'data/morg06.dta', 'data/morg07.dta', 'data/morg08.dta', 'data/morg09.dta', 'data/morg10.dta', 'data/morg11.dta', 'data/morg12.dta', 'data/morg13.dta', 'data/morg14.dta', 'data/morg15.dta', 'data/morg16.dta', 'data/morg17.dta', 'data/morg18.dta', 'data/morg19.dta', 'data/morg20.dta', 'data/morg21.dta', 'data/morg22.dta', 'data/morg23.dta']

In [None]:
# for each year, log hourly wage, % < high school, % with college, years edu, potential exp, potential exp squared, sample size
# do this for 3 criteria: men and women age 24-65, men age 24-65, women age 24-65
# weight based on hrearnwt = dataframe['earnwt'] * dataframe['uhours']

# define dictionary of dataframes
dfs = {}

for file in files:
    print(file)
    # create dataframe
    df, meta = prs.read_dta(file)
    # create year variable
    # split .dta, take last two characters, convert to int
    year = file.split('.')[0][-2:]
    dfs[year] = df

print(dfs.keys())

In [None]:
# now we need to read in the inflation data and create df
inflation = [77.484,77.562,78.073,78.457,79.062,79.594,79.985,80.669,81.138,81.65,82.532,83.189,83.621,84.355,84.962,84.822,85.597,86.324,86.812,87.694,88.408,89.269,90.221,88.781,88.181,88.532,89.142,89.83,90.177,90.317,90.49,91.07,91.835,92.738,93.167,93.475,94.094,94.321,94.595,95.126,95.459,95.508,95.901,96.254,96.696,97.129,97.394,97.265,96.829,97.312,97.565,97.49,97.538,98.157,98.496,98.947,99.524,99.724,100.076,100.676,101.38,101.912,102.254,102.642,102.877,103.422,103.674,104.08,104.416,103.962,104.819,105.343,106.578,108.208,109.705,111.514,113.59,115.577,116.905,118.098,119.309,120.044,120.814,121.351]
# # divide into groups of 4 for each quarter within a year
inflation = [inflation[i:i+4] for i in range(0, len(inflation), 4)]
inflation_2023 = [119.309,120.044,120.814,121.351]

In [None]:
# replace nan weight, earnwt, uhours, earnhr, hrearnwt with 0
for year in dfs.keys():
    dfs[year]['ethnic'] = dfs[year]['ethnic'].fillna(0.)
    dfs[year]['weight'] = dfs[year]['weight'].fillna(0.)
    dfs[year]['earnwt'] = dfs[year]['earnwt'].fillna(0.)
    if 'uhours' in dfs[year].columns:
        dfs[year]['uhours'] = dfs[year]['uhours'].fillna(0.)
    else:
        dfs[year]['uhourse'] = dfs[year]['uhourse'].fillna(0.)

In [None]:
# now that data is loaded in dictionary, can create tables
# add to each df the new variables
for j, year in enumerate(dfs.keys()):
    df = dfs[year]
    # create new variables

    # make sure to compare the right row entries so the division makes sense
    eps = 1e-6
    if 'uhours' in df.columns:
        df['uhours'] = pd.to_numeric(df['uhours'], errors='coerce').fillna(0)
        df['earnwke'] = pd.to_numeric(df['earnwke'], errors='coerce').fillna(0)
        # get as np array
        uhours = df['uhours'].to_numpy()
        df['hrwage'] = np.where(uhours > eps, df['earnwke'] / uhours, 0)
        dfs[year]['hrearnwt'] = dfs[year]['earnwt'] * dfs[year]['uhours']
    elif 'uhourse' in df.columns:
        df['uhourse'] = pd.to_numeric(df['uhourse'], errors='coerce').fillna(0)
        # get as np array
        uhours = df['uhourse'].to_numpy()
        df['earnwke'] = pd.to_numeric(df['earnwke'], errors='coerce').fillna(0)
        df['hrwage'] = np.where(uhours > eps, df['earnwke'] / uhours, 0)
        dfs[year]['hrearnwt'] = dfs[year]['earnwt'] * uhours
    
    ## first adjust for inflation for each quarter ##
    # get the row from the inflation list
    inflation_row = inflation[j]
    realhrwage = df['hrwage'] 
    realhrwage = np.where(df['intmonth'] <= 3, realhrwage * inflation_2023[0]/inflation_row[0], realhrwage)
    realhrwage = np.where((df['intmonth'] > 3) & (df['intmonth'] <= 6), realhrwage * inflation_2023[1]/inflation_row[1], realhrwage)
    realhrwage = np.where((df['intmonth'] > 6) & (df['intmonth'] <= 9), realhrwage * inflation_2023[2]/inflation_row[2], realhrwage)
    realhrwage = np.where((df['intmonth'] > 9) & (df['intmonth'] <= 12), realhrwage * inflation_2023[3]/inflation_row[3], realhrwage)
    
    df['realhrwage'] = realhrwage
    df.loc[df['realhrwage'] < 2, 'realhrwage'] = 0
    df.loc[df['realhrwage'] > 250, 'realhrwage'] = 0

    ## get educ; treat gradeat vs grade92 separately ##
    # if grade32 exists, then need to convert that; otheerwise have to deal with gradeat and gradecp
    # create educ variable
    if 'grade92' in df.columns:
        df['educ'] = df['grade92']
        
        df.loc[df['grade92'] == 31, 'educ'] = 0
        df.loc[df['grade92'] == 32, 'educ'] = 2.5
        df.loc[df['grade92'] == 33, 'educ'] = 5.5
        df.loc[df['grade92'] == 34, 'educ'] = 7.5
        df.loc[df['grade92'] == 35, 'educ'] = 9
        df.loc[df['grade92'] == 36, 'educ'] = 10
        df.loc[df['grade92'] == 37, 'educ'] = 11
        df.loc[df['grade92'] == 38, 'educ'] = 12
        df.loc[df['grade92'] == 39, 'educ'] = 12
        df.loc[df['grade92'] == 40, 'educ'] = 13
        df.loc[df['grade92'] == 41, 'educ'] = 14
        df.loc[df['grade92'] == 42, 'educ'] = 14
        df.loc[df['grade92'] == 43, 'educ'] = 16
        df.loc[df['grade92'] == 44, 'educ'] = 18
        df.loc[df['grade92'] == 45, 'educ'] = 18
        df.loc[df['grade92'] == 46, 'educ'] = 18

    else:
        df['educ'] = df['gradeat']
        # if gradecp is 0, then subtract 1
        df.loc[df['gradecp'] == 0, 'educ'] = df['educ'] - 1
    

    logwage = dfs[year]['realhrwage'].to_numpy()
    # convert to float
    logwage = np.log(logwage.astype(float))
    # replace -inf with 0
    logwage = np.where(np.isneginf(logwage), 0, logwage)
    df['logwage'] = logwage
    logwage = np.where(np.isnan(logwage), 0, logwage)
    df['lths'] = dfs[year]['educ'] < 12
    df['hs'] = dfs[year]['educ'] >= 12
    df['college'] = dfs[year]['educ'] >= 16
    df['master'] = dfs[year]['educ'] >= 18
    df['exp'] = dfs[year]['age'] - dfs[year]['educ'] - 6
    df['exp2'] = dfs[year]['exp']**2

    # reset key value
    dfs[year] = df

In [None]:
year = '03'
cond = (dfs[year]['realhrwage'] > 0)
summarize(dfs[year]['realhrwage'][cond])


In [None]:
year = '03'
cond = (dfs[year]['realhrwage'] > 0) & (dfs[year]['ethnic'] < 1)
summarize(dfs[year]['race'][cond])


In [None]:
year = '23'
cond = (dfs[year]['realhrwage'] > 0) & (dfs[year]['lfsr94'] <= 2)
summarize(dfs[year]['realhrwage'][cond], weight=dfs[year]['earnwt'][cond])


In [None]:
weighted_means = []
medians = []

white_means = []

for j, year in enumerate(dfs.keys()):
    df = dfs[year]
    
    cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)

    weighted_means.append(np.average(df['realhrwage'][cond], weights=df['earnwt'][cond]))
    medians.append(np.median(df['realhrwage'][cond]))
    
    white_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1)
    weighted_means.append(np.average(df['realhrwage'][white_cond], weights=df['earnwt'][white_cond]))
    medians.append(np.median(df['realhrwage'][white_cond]))

    asian_cond = cond & (df['race'] == 4) & (df['prcitshp'] < 4)
    weighted_means.append(np.average(df['realhrwage'][asian_cond], weights=df['earnwt'][asian_cond]))
    medians.append(np.median(df['realhrwage'][asian_cond]))
    
    asian_foreign_cond = cond & (df['race'] == 4) & (df['prcitshp'] >= 4)
    weighted_means.append(np.average(df['realhrwage'][asian_foreign_cond], weights=df['earnwt'][asian_foreign_cond]))
    medians.append(np.median(df['realhrwage'][asian_foreign_cond]))

ts_data = np.asarray(weighted_means)
ts_data = np.concatenate([ts_data, np.asarray(medians)], -1)
ts_data = pd.DataFrame(ts_data.T, columns=['real wage'])
ts_data['type'] = ['weighted_mean'] * 4 * len(dfs.keys()) + ['median'] * 4 * len(dfs.keys())
ts_data['race'] = ['all', 'white', 'asian', 'asian foreign'] * len(dfs.keys()) + ['all', 'white', 'asian', 'asian foreign'] * len(dfs.keys())
ts_data['year'] = np.concatenate([['20'+y] * 4  for y in dfs.keys()] + [['20'+y] * 4  for y in dfs.keys()])

sns.relplot(
    data=ts_data,
    x="year",
    y="real wage",
    hue="race",
    style='type',
    estimator=None,
    kind='line',
    aspect=2
)

In [None]:
weighted_means = []
medians = []

white_means = []

for j, year in enumerate(dfs.keys()):
    df = dfs[year]
    
    cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)

    weighted_means.append(np.average(df['realhrwage'][cond], weights=df['earnwt'][cond]))
    medians.append(np.median(df['realhrwage'][cond]))
    
    white_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1) & (df['marital'] < 4)
    weighted_means.append(np.average(df['realhrwage'][white_cond], weights=df['earnwt'][white_cond]))
    medians.append(np.median(df['realhrwage'][white_cond]))

    asian_cond = cond & (df['race'] == 4) & (df['marital'] < 4)
    weighted_means.append(np.average(df['realhrwage'][asian_cond], weights=df['earnwt'][asian_cond]))
    medians.append(np.median(df['realhrwage'][asian_cond]))
    
    asian_foreign_cond = cond & (df['race'] == 4) & (df['marital'] >= 4)
    weighted_means.append(np.average(df['realhrwage'][asian_foreign_cond], weights=df['earnwt'][asian_foreign_cond]))
    medians.append(np.median(df['realhrwage'][asian_foreign_cond]))

ts_data = np.asarray(weighted_means)
ts_data = np.concatenate([ts_data, np.asarray(medians)], -1)
ts_data = pd.DataFrame(ts_data.T, columns=['real wage'])
ts_data['type'] = ['weighted_mean'] * 4 * len(dfs.keys()) + ['median'] * 4 * len(dfs.keys())
ts_data['race'] = ['all', 'white married', 'asian married', 'asian single'] * len(dfs.keys()) + ['all', 'white married', 'asian married', 'asian single'] * len(dfs.keys())
ts_data['year'] = np.concatenate([['20'+y] * 4  for y in dfs.keys()] + [['20'+y] * 4  for y in dfs.keys()])

sns.relplot(
    data=ts_data,
    x="year",
    y="real wage",
    hue="race",
    style='type',
    estimator=None,
    kind='line',
    aspect=2
)

In [None]:
year = '03'
df = dfs[year]

cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)
white_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1)

white_X, white_y, white_w = df[['hs', 'college', 'master', 'exp', 'exp2']][white_cond], df['logwage'][white_cond], df['hrearnwt'][white_cond]

white_model_03 = run_WLS(white_X.astype(float), white_y.astype(float), white_w.astype(float))

us_asian_cond = cond & (df['race'] == 4) & (df['prcitshp'] < 4)
us_asian_X, us_asian_y, us_asian_w = df[['hs', 'college', 'master', 'exp', 'exp2']][us_asian_cond], df['logwage'][us_asian_cond], df['hrearnwt'][us_asian_cond]

us_asian_model_03 = run_WLS(us_asian_X.astype(float), us_asian_y.astype(float), us_asian_w.astype(float))

non_us_asian_cond = cond & (df['race'] == 4) & (df['prcitshp'] >= 4)
nua_X, nua_y, nua_w = df[['hs', 'college', 'master', 'exp', 'exp2']][non_us_asian_cond], df['logwage'][non_us_asian_cond], df['hrearnwt'][non_us_asian_cond]

non_us_asian_model_03 = run_WLS(nua_X.astype(float), nua_y.astype(float), nua_w.astype(float))

In [None]:
## compare us asian vs white
white_X_bar = np.average(white_X[['exp', 'exp2']], axis=0, weights=white_w)
white_Z_bar = np.average(white_X[['hs', 'college', 'master']], axis=0, weights=white_w)
white_beta = white_model_03.params[4:6]
white_gamma = white_model_03.params[1:4]
us_asian_X_bar = np.average(us_asian_X[['exp', 'exp2']], axis=0, weights=us_asian_w)
us_asian_Z_bar = np.average(us_asian_X[['hs', 'college', 'master']], axis=0, weights=us_asian_w)
us_asian_beta = us_asian_model_03.params[4:6]
us_asian_gamma = us_asian_model_03.params[1:4]

print(us_asian_beta @ (us_asian_X_bar - white_X_bar))
print(white_X_bar @ (us_asian_beta - white_beta))
print(us_asian_gamma @ (us_asian_Z_bar - white_Z_bar))
print(white_Z_bar @ (us_asian_gamma - white_gamma))


In [None]:
year = '13'
df = dfs[year]

cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)
white_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1)

X, y, w = df[['hs', 'college', 'master', 'exp', 'exp2']][white_cond], df['logwage'][white_cond], df['hrearnwt'][white_cond]

white_model_13 = run_WLS(X.astype(float), y.astype(float), w.astype(float))

us_asian_cond = cond & (df['race'] == 4) & (df['prcitshp'] < 4)
X, y, w = df[['hs', 'college', 'master', 'exp', 'exp2']][us_asian_cond], df['logwage'][us_asian_cond], df['hrearnwt'][us_asian_cond]

us_asian_model_13 = run_WLS(X.astype(float), y.astype(float), w.astype(float))

non_us_asian_cond = cond & (df['race'] == 4) & (df['prcitshp'] >= 4)
X, y, w = df[['hs', 'college', 'master', 'exp', 'exp2']][non_us_asian_cond], df['logwage'][non_us_asian_cond], df['hrearnwt'][non_us_asian_cond]

non_us_asian_model_13 = run_WLS(X.astype(float), y.astype(float), w.astype(float))

In [None]:
## compare us asian vs white
white_X_bar = np.average(white_X[['exp', 'exp2']], axis=0, weights=white_w)
white_Z_bar = np.average(white_X[['hs', 'college', 'master']], axis=0, weights=white_w)
white_beta = white_model_13.params[4:6]
white_gamma = white_model_13.params[1:4]
us_asian_X_bar = np.average(us_asian_X[['exp', 'exp2']], axis=0, weights=us_asian_w)
us_asian_Z_bar = np.average(us_asian_X[['hs', 'college', 'master']], axis=0, weights=us_asian_w)
us_asian_beta = us_asian_model_13.params[4:6]
us_asian_gamma = us_asian_model_13.params[1:4]

print(us_asian_beta @ (us_asian_X_bar - white_X_bar))
print(white_X_bar @ (us_asian_beta - white_beta))
print(us_asian_gamma @ (us_asian_Z_bar - white_Z_bar))
print(white_Z_bar @ (us_asian_gamma - white_gamma))

In [None]:
year = '23'
df = dfs[year]

cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)
white_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1)

X, y, w = df[['hs', 'college', 'master', 'exp', 'exp2']][white_cond], df['logwage'][white_cond], df['hrearnwt'][white_cond]

white_model_23 = run_WLS(X.astype(float), y.astype(float), w.astype(float))

us_asian_cond = cond & (df['race'] == 4) & (df['prcitshp'] < 4)
X, y, w = df[['hs', 'college', 'master', 'exp', 'exp2']][us_asian_cond], df['logwage'][us_asian_cond], df['hrearnwt'][us_asian_cond]

us_asian_model_23 = run_WLS(X.astype(float), y.astype(float), w.astype(float))

non_us_asian_cond = cond & (df['race'] == 4) & (df['prcitshp'] >= 4)
X, y, w = df[['hs', 'college', 'master', 'exp', 'exp2']][non_us_asian_cond], df['logwage'][non_us_asian_cond], df['hrearnwt'][non_us_asian_cond]

non_us_asian_model_23 = run_WLS(X.astype(float), y.astype(float), w.astype(float))

In [None]:
## compare us asian vs white
white_X_bar = np.average(white_X[['exp', 'exp2']], axis=0, weights=white_w)
white_Z_bar = np.average(white_X[['hs', 'college', 'master']], axis=0, weights=white_w)
white_beta = white_model_23.params[4:6]
white_gamma = white_model_23.params[1:4]
us_asian_X_bar = np.average(us_asian_X[['exp', 'exp2']], axis=0, weights=us_asian_w)
us_asian_Z_bar = np.average(us_asian_X[['hs', 'college', 'master']], axis=0, weights=us_asian_w)
us_asian_beta = us_asian_model_23.params[4:6]
us_asian_gamma = us_asian_model_23.params[1:4]

print(us_asian_beta @ (us_asian_X_bar - white_X_bar))
print(white_X_bar @ (us_asian_beta - white_beta))
print(us_asian_gamma @ (us_asian_Z_bar - white_Z_bar))
print(white_Z_bar @ (us_asian_gamma - white_gamma))

In [None]:
obd_results = []

for j, year in enumerate(dfs.keys()):
    df = dfs[year]
    
    cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)
    white_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1)
    
    X, y, w = df[['hs', 'college', 'master', 'exp', 'exp2']][white_cond], df['logwage'][white_cond], df['hrearnwt'][white_cond]
    
    white_model = run_WLS(X.astype(float), y.astype(float), w.astype(float))
    
    us_asian_cond = cond & (df['race'] == 4) & (df['prcitshp'] < 4)
    X, y, w = df[['hs', 'college', 'master', 'exp', 'exp2']][us_asian_cond], df['logwage'][us_asian_cond], df['hrearnwt'][us_asian_cond]
    
    us_asian_model = run_WLS(X.astype(float), y.astype(float), w.astype(float))
    
    non_us_asian_cond = cond & (df['race'] == 4) & (df['prcitshp'] >= 4)
    X, y, w = df[['hs', 'college', 'master', 'exp', 'exp2']][non_us_asian_cond], df['logwage'][non_us_asian_cond], df['hrearnwt'][non_us_asian_cond]
    
    non_us_asian_model = run_WLS(X.astype(float), y.astype(float), w.astype(float))
    
    white_X_bar = np.average(white_X[['exp', 'exp2']], axis=0, weights=white_w)
    white_Z_bar = np.average(white_X[['hs', 'college', 'master']], axis=0, weights=white_w)
    white_beta = white_model.params[4:6]
    white_gamma = white_model.params[1:4]
    us_asian_X_bar = np.average(us_asian_X[['exp', 'exp2']], axis=0, weights=us_asian_w)
    us_asian_Z_bar = np.average(us_asian_X[['hs', 'college', 'master']], axis=0, weights=us_asian_w)
    us_asian_beta = us_asian_model.params[4:6]
    us_asian_gamma = us_asian_model.params[1:4]
    
    obd = [us_asian_beta @ (us_asian_X_bar - white_X_bar), white_X_bar @ (us_asian_beta - white_beta), us_asian_gamma @ (us_asian_Z_bar - white_Z_bar), white_Z_bar @ (us_asian_gamma - white_gamma)]
    obd_results.append(obd)
    
obd_results = np.asarray(obd_results).round(4)

In [None]:
obd1_results = []
obd2_results = []

for j, year in enumerate(dfs.keys()):
    df = dfs[year]
    
    cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)
    white_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1)
    
    white_X, white_y, white_w = df[['hs', 'college', 'master', 'exp', 'exp2']][white_cond], df['logwage'][white_cond], df['hrearnwt'][white_cond]
    
    white_model = run_WLS(white_X.astype(float), white_y.astype(float), white_w.astype(float), print_summary=False)
    
    ua_cond = cond & (df['race'] == 4) & (df['prcitshp'] < 4)
    ua_X, ua_y, ua_w = df[['hs', 'college', 'master', 'exp', 'exp2']][ua_cond], df['logwage'][ua_cond], df['hrearnwt'][ua_cond]
    
    ua_model = run_WLS(ua_X.astype(float), ua_y.astype(float), ua_w.astype(float), print_summary=False)
    
    nua_cond = cond & (df['race'] == 4) & (df['prcitshp'] >= 4)
    nua_X, nua_y, nua_w = df[['hs', 'college', 'master', 'exp', 'exp2']][nua_cond], df['logwage'][nua_cond], df['hrearnwt'][nua_cond]
    
    nua_model = run_WLS(nua_X.astype(float), nua_y.astype(float), nua_w.astype(float), print_summary=False)
    
    white_X_bar = np.average(white_X[['exp', 'exp2']], axis=0, weights=white_w)
    white_Z_bar = np.average(white_X[['hs', 'college', 'master']], axis=0, weights=white_w)
    white_beta = white_model.params[4:6]
    white_gamma = white_model.params[1:4]
    ua_X_bar = np.average(ua_X[['exp', 'exp2']], axis=0, weights=ua_w)
    ua_Z_bar = np.average(ua_X[['hs', 'college', 'master']], axis=0, weights=ua_w)
    ua_beta = ua_model.params[4:6]
    ua_gamma = ua_model.params[1:4]
    nua_X_bar = np.average(nua_X[['exp', 'exp2']], axis=0, weights=nua_w)
    nua_Z_bar = np.average(nua_X[['hs', 'college', 'master']], axis=0, weights=nua_w)
    nua_beta = nua_model.params[4:6]
    nua_gamma = nua_model.params[1:4]
    
    obd1 = [ua_beta @ (ua_X_bar - white_X_bar), white_X_bar @ (ua_beta - white_beta), ua_gamma @ (ua_Z_bar - white_Z_bar), white_Z_bar @ (ua_gamma - white_gamma)]
    obd1_results.append(obd1)
    
    obd2 = [nua_beta @ (nua_X_bar - white_X_bar), white_X_bar @ (nua_beta - white_beta), nua_gamma @ (nua_Z_bar - white_Z_bar), white_Z_bar @ (nua_gamma - white_gamma)]
    obd2_results.append(obd2)
    
obd1_results = np.asarray(obd1_results).round(4)
obd2_results = np.asarray(obd2_results).round(4)

In [None]:
ts_data = np.asarray(weighted_means)
ts_data = np.concatenate([obd1_results[:, 1], obd1_results[:, 3], obd2_results[:, 1], obd2_results[:, 3]])
ts_data = pd.DataFrame(ts_data, columns=['OBD Value'])
ts_data['type'] = ['2nd term'] * len(dfs.keys()) + ['4th term'] * len(dfs.keys()) + ['2nd term'] * len(dfs.keys()) + ['4th term'] * len(dfs.keys())
ts_data['comparision'] = ['US Asian vs White'] * 2 * len(dfs.keys()) + ['Non-US Asian vs White'] * 2 * len(dfs.keys())
ts_data['year'] = ['20'+y  for y in dfs.keys()] * 4

sns.set_style("whitegrid")
g = sns.relplot(
    data=ts_data,
    x="year",
    y="OBD Value",
    hue="comparision",
    style='type',
    estimator=None,
    kind='line',
    aspect=2
)
g.map(plt.axhline, y=0, color="0", zorder=0)

In [None]:
obd1_results = []
obd2_results = []

for j, year in enumerate(dfs.keys()):
    df = dfs[year]
    
    cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)
    
    wm_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1) & (df['marital'] < 4)
    wm_X, wm_y, wm_w = df[['hs', 'college', 'master', 'exp', 'exp2']][wm_cond], df['logwage'][wm_cond], df['hrearnwt'][wm_cond]
    wm_model = run_WLS(wm_X.astype(float), wm_y.astype(float), wm_w.astype(float), print_summary=False)
    
    ws_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1) & (df['marital'] >= 4)
    ws_X, ws_y, ws_w = df[['hs', 'college', 'master', 'exp', 'exp2']][ws_cond], df['logwage'][ws_cond], df['hrearnwt'][ws_cond]
    ws_model = run_WLS(ws_X.astype(float), ws_y.astype(float), ws_w.astype(float), print_summary=False)
    
    am_cond = cond & (df['race'] == 4) & (df['marital'] < 4)
    am_X, am_y, am_w = df[['hs', 'college', 'master', 'exp', 'exp2']][am_cond], df['logwage'][am_cond], df['hrearnwt'][am_cond]
    am_model = run_WLS(am_X.astype(float), am_y.astype(float), am_w.astype(float), print_summary=False)
    
    as_cond = cond & (df['race'] == 4) & (df['marital'] >= 4)
    as_X, as_y, as_w = df[['hs', 'college', 'master', 'exp', 'exp2']][as_cond], df['logwage'][as_cond], df['hrearnwt'][as_cond]
    as_model = run_WLS(as_X.astype(float), as_y.astype(float), as_w.astype(float), print_summary=False)
    
    wm_X_bar = np.average(wm_X[['exp', 'exp2']], axis=0, weights=wm_w)
    wm_Z_bar = np.average(wm_X[['hs', 'college', 'master']], axis=0, weights=wm_w)
    wm_beta = wm_model.params[4:6]
    wm_gamma = wm_model.params[1:4]
    ws_X_bar = np.average(ws_X[['exp', 'exp2']], axis=0, weights=ws_w)
    ws_Z_bar = np.average(ws_X[['hs', 'college', 'master']], axis=0, weights=ws_w)
    ws_beta = ws_model.params[4:6]
    ws_gamma = ws_model.params[1:4]
    am_X_bar = np.average(am_X[['exp', 'exp2']], axis=0, weights=am_w)
    am_Z_bar = np.average(am_X[['hs', 'college', 'master']], axis=0, weights=am_w)
    am_beta = am_model.params[4:6]
    am_gamma = am_model.params[1:4]
    as_X_bar = np.average(as_X[['exp', 'exp2']], axis=0, weights=as_w)
    as_Z_bar = np.average(as_X[['hs', 'college', 'master']], axis=0, weights=as_w)
    as_beta = as_model.params[4:6]
    as_gamma = as_model.params[1:4]
    
    obd1 = [am_beta @ (am_X_bar - wm_X_bar), wm_X_bar @ (am_beta - wm_beta), am_gamma @ (am_Z_bar - wm_Z_bar), wm_Z_bar @ (am_gamma - wm_gamma)]
    obd1_results.append(obd1)
    
    obd2 = [as_beta @ (as_X_bar - ws_X_bar), ws_X_bar @ (as_beta - ws_beta), as_gamma @ (as_Z_bar - ws_Z_bar), ws_Z_bar @ (as_gamma - ws_gamma)]
    obd2_results.append(obd2)
    
obd1_results = np.asarray(obd1_results).round(4)
obd2_results = np.asarray(obd2_results).round(4)

ts_data = np.concatenate([obd1_results[:, 1], obd1_results[:, 3], obd2_results[:, 1], obd2_results[:, 3]])
ts_data = pd.DataFrame(ts_data, columns=['OBD Value'])
ts_data['type'] = ['2nd term'] * len(dfs.keys()) + ['4th term'] * len(dfs.keys()) + ['2nd term'] * len(dfs.keys()) + ['4th term'] * len(dfs.keys())
ts_data['comparision'] = ['Asian Married vs White Married'] * 2 * len(dfs.keys()) + ['Asian Single vs White Single'] * 2 * len(dfs.keys())
ts_data['year'] = ['20'+y  for y in dfs.keys()] * 4

sns.set_style("whitegrid")
g = sns.relplot(
    data=ts_data,
    x="year",
    y="OBD Value",
    hue="comparision",
    style='type',
    estimator=None,
    kind='line',
    aspect=2
)
g.map(plt.axhline, y=0, color="0", zorder=0)

In [None]:
year1 = '03'
year2 = '23'

df = dfs[year1]
cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)

white_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1)
white_data = df[['educ', 'exp', 'logwage']][white_cond]
white_data['year'] = ['20'+year1] * len(white_data)

df = dfs[year2]
cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)

white_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1)
white_data2 = df[['educ', 'exp', 'logwage']][white_cond]
white_data2['year'] = ['20'+year2] * len(white_data2)

white_data = pd.concat([white_data, white_data2], axis=0)
sns.set_style('ticks')
g = sns.PairGrid(white_data, hue="year", diag_sharey=False)
g.map_upper(sns.scatterplot, s=3, alpha=0.2)
g.map_lower(sns.kdeplot, fill=True, alpha=0.6)
g.map_diag(sns.kdeplot, fill=True)
g.add_legend()


In [None]:
year1 = '03'
year2 = '23'

df = dfs[year1]
cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)

white_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1)
white_data = df[['educ', 'exp', 'logwage']][white_cond]
white_data['year'] = ['20'+year1] * len(white_data)

df = dfs[year2]
cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)

white_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1)
white_data2 = df[['educ', 'exp', 'logwage']][white_cond]
white_data2['year'] = ['20'+year2] * len(white_data2)

white_data = pd.concat([white_data, white_data2], axis=0)
sns.set_style('ticks')
g = sns.PairGrid(white_data, hue="year", diag_sharey=False)
g.map_upper(sns.scatterplot, s=3, alpha=0.2)
g.map_lower(sns.kdeplot, fill=True, alpha=0.6)
g.map_diag(sns.kdeplot, fill=True)
g.add_legend()

In [None]:
year1 = '03'
year2 = '23'

df = dfs[year1]
cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)

uw_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1) & (df['prcitshp'] < 4)
uw_data = df[['educ', 'exp', 'logwage']][uw_cond]
uw_data['year'] = ['20'+year1] * len(uw_data)

df = dfs[year2]
cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)

uw_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1) & (df['prcitshp'] < 4)
uw_data2 = df[['educ', 'exp', 'logwage']][uw_cond]
uw_data2['year'] = ['20'+year2] * len(uw_data2)

uw_data = pd.concat([uw_data, uw_data2], axis=0)
sns.set_style('ticks')
g = sns.PairGrid(uw_data, hue="year", diag_sharey=False)
g.map_upper(sns.scatterplot, s=3, alpha=0.2)
g.map_lower(sns.kdeplot, fill=True, alpha=0.6)
g.map_diag(sns.kdeplot, fill=True)
g.add_legend()

In [None]:
year1 = '03'
year2 = '23'

df = dfs[year1]
cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)

nuw_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1) & (df['prcitshp'] >= 4)
nuw_data = df[['educ', 'exp', 'logwage']][nuw_cond]
nuw_data['year'] = ['20'+year1] * len(nuw_data)

df = dfs[year2]
cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)

nuw_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1) & (df['prcitshp'] >= 4)
nuw_data2 = df[['educ', 'exp', 'logwage']][nuw_cond]
nuw_data2['year'] = ['20'+year2] * len(nuw_data2)

nuw_data = pd.concat([nuw_data, nuw_data2], axis=0)
sns.set_style('ticks')
g = sns.PairGrid(nuw_data, hue="year", diag_sharey=False)
g.map_upper(sns.scatterplot, s=3, alpha=0.2)
g.map_lower(sns.kdeplot, fill=True, alpha=0.6)
g.map_diag(sns.kdeplot, fill=True)
g.add_legend()

In [None]:
year1 = '03'
year2 = '23'

df = dfs[year1]
cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)

ua_cond = cond & (df['race'] == 4) & (df['prcitshp'] < 4)
ua_data = df[['educ', 'exp', 'logwage']][ua_cond]
ua_data['year'] = ['20'+year1] * len(ua_data)

df = dfs[year2]
cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)

ua_cond = cond & (df['race'] == 4) & (df['prcitshp'] < 4)
ua_data2 = df[['educ', 'exp', 'logwage']][ua_cond]
ua_data2['year'] = ['20'+year2] * len(ua_data2)

ua_data = pd.concat([ua_data, ua_data2], axis=0)
sns.set_style('ticks')
g = sns.PairGrid(ua_data, hue="year", diag_sharey=False)
g.map_upper(sns.scatterplot, s=3, alpha=0.2)
g.map_lower(sns.kdeplot, fill=True, alpha=0.6)
g.map_diag(sns.kdeplot, fill=True)
g.add_legend()

In [None]:
year1 = '03'
year2 = '23'

df = dfs[year1]
cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)

nua_cond = cond & (df['race'] == 4) & (df['prcitshp'] >= 4)
nua_data = df[['educ', 'exp', 'logwage']][nua_cond]
nua_data['year'] = ['20'+year1] * len(nua_data)

df = dfs[year2]
cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)

nua_cond = cond & (df['race'] == 4) & (df['prcitshp'] >= 4)
nua_data2 = df[['educ', 'exp', 'logwage']][nua_cond]
nua_data2['year'] = ['20'+year2] * len(nua_data2)

nua_data = pd.concat([nua_data, nua_data2], axis=0)
sns.set_style('ticks')
g = sns.PairGrid(nua_data, hue="year", diag_sharey=False)
g.map_upper(sns.scatterplot, s=3, alpha=0.2)
g.map_lower(sns.kdeplot, fill=True, alpha=0.6)
g.map_diag(sns.kdeplot, fill=True)
g.add_legend()

In [None]:
year1 = '03'
df1 = dfs[year1]
df1['y1'] = 1
obd1_results = []
obd2_results = []
years = []
weighted_means = []
weighted_means_dfl = []

for i, year2 in enumerate(dfs.keys()):
    if i >= 1:
        df2 = dfs[year2]
        df2['y1'] = 0
        
        df = pd.concat([df1, df2], axis=0)
        df['dfl_w'] = 0.

        cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)
        
        # compute dfl weights
        white_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1)
        dfl_w, _ = run_DFL(df[['hs', 'college', 'master', 'exp', 'exp2', 'logwage']][white_cond].astype(float), df['y1'][white_cond].astype(float), df['hrearnwt'][white_cond].astype(float), print_summary=False)
        df.loc[white_cond, 'dfl_w'] = dfl_w
        
        # run regression for year 2 with dfl weights
        white_cond = white_cond & (df['y1'] == 0)
        white_X, white_y, white_w = df[['hs', 'college', 'master', 'exp', 'exp2']][white_cond], df['logwage'][white_cond], df['dfl_w'][white_cond]
        white_model = run_WLS(white_X.astype(float), white_y.astype(float), white_w.astype(float), print_summary=False)
                
        # compute dfl weights
        ua_cond = cond & (df['race'] == 4) & (df['prcitshp'] < 4)
        dfl_w, _ = run_DFL(df[['hs', 'college', 'master', 'exp', 'exp2', 'logwage']][ua_cond].astype(float), df['y1'][ua_cond].astype(float), df['hrearnwt'][ua_cond].astype(float), print_summary=False)
        df.loc[ua_cond, 'dfl_w'] = dfl_w
        
        # run regression for year 2 with dfl weights
        ua_cond = ua_cond & (df['y1'] == 0)
        ua_X, ua_y, ua_w = df[['hs', 'college', 'master', 'exp', 'exp2']][ua_cond], df['logwage'][ua_cond], df['dfl_w'][ua_cond]
        ua_model = run_WLS(ua_X.astype(float), ua_y.astype(float), ua_w.astype(float), print_summary=False)
        
        # compute dfl weights
        nua_cond = cond & (df['race'] == 4) & (df['prcitshp'] >= 4)
        dfl_w, _ = run_DFL(df[['hs', 'college', 'master', 'exp', 'exp2', 'logwage']][nua_cond].astype(float), df['y1'][nua_cond].astype(float), df['hrearnwt'][nua_cond].astype(float), print_summary=False)
        df.loc[nua_cond, 'dfl_w'] = dfl_w
        
        # run regression for year 2 with dfl weights
        nua_cond = nua_cond & (df['y1'] == 0)
        nua_X, nua_y, nua_w = df[['hs', 'college', 'master', 'exp', 'exp2']][nua_cond], df['logwage'][nua_cond], df['dfl_w'][nua_cond]
        nua_model = run_WLS(nua_X.astype(float), nua_y.astype(float), nua_w.astype(float), print_summary=False)
        
        white_X_bar = np.average(white_X[['exp', 'exp2']], axis=0, weights=white_w)
        white_Z_bar = np.average(white_X[['hs', 'college', 'master']], axis=0, weights=white_w)
        white_beta = white_model.params[4:6]
        white_gamma = white_model.params[1:4]
        ua_X_bar = np.average(ua_X[['exp', 'exp2']], axis=0, weights=ua_w)
        ua_Z_bar = np.average(ua_X[['hs', 'college', 'master']], axis=0, weights=ua_w)
        ua_beta = ua_model.params[4:6]
        ua_gamma = ua_model.params[1:4]
        nua_X_bar = np.average(nua_X[['exp', 'exp2']], axis=0, weights=nua_w)
        nua_Z_bar = np.average(nua_X[['hs', 'college', 'master']], axis=0, weights=nua_w)
        nua_beta = nua_model.params[4:6]
        nua_gamma = nua_model.params[1:4]
        
        obd1 = [ua_beta @ (ua_X_bar - white_X_bar), white_X_bar @ (ua_beta - white_beta), ua_gamma @ (ua_Z_bar - white_Z_bar), white_Z_bar @ (ua_gamma - white_gamma)]
        obd1_results.append(obd1)
        
        obd2 = [nua_beta @ (nua_X_bar - white_X_bar), white_X_bar @ (nua_beta - white_beta), nua_gamma @ (nua_Z_bar - white_Z_bar), white_Z_bar @ (nua_gamma - white_gamma)]
        obd2_results.append(obd2)
        
        years.append(year2)
        
        weighted_means.append(np.average(df['logwage'][white_cond], weights=df['hrearnwt'][white_cond]))
        weighted_means_dfl.append(np.average(df['logwage'][white_cond], weights=df['dfl_w'][white_cond]))
    
        weighted_means.append(np.average(df['logwage'][ua_cond], weights=df['hrearnwt'][ua_cond]))
        weighted_means_dfl.append(np.average(df['logwage'][ua_cond], weights=df['dfl_w'][ua_cond]))
        
        weighted_means.append(np.average(df['logwage'][nua_cond], weights=df['hrearnwt'][nua_cond]))
        weighted_means_dfl.append(np.average(df['logwage'][nua_cond], weights=df['dfl_w'][nua_cond]))
    
obd1_results = np.asarray(obd1_results).round(4)
obd2_results = np.asarray(obd2_results).round(4)
ts_data = np.concatenate([obd1_results[:, 1], obd1_results[:, 3], obd2_results[:, 1], obd2_results[:, 3]])
ts_data = pd.DataFrame(ts_data, columns=['OBD Value'])
ts_data['type'] = ['2nd term'] * len(obd1_results) + ['4th term'] * len(obd1_results) + ['2nd term'] * len(obd2_results) + ['4th term'] * len(obd2_results)
ts_data['comparision'] = ['US Asian vs White'] * 2 * len(obd1_results) + ['Non-US Asian vs White'] * 2 * len(obd2_results)
ts_data['year'] = ['20'+y  for y in years] * 4

sns.set_style("whitegrid")
g = sns.relplot(
    data=ts_data,
    x="year",
    y="OBD Value",
    hue="comparision",
    style='type',
    estimator=None,
    kind='line',
    aspect=2
)
g.map(plt.axhline, y=0, color="0", zorder=0)


In [None]:
ts_data = np.concatenate([np.asarray(weighted_means), np.asarray(weighted_means_dfl)], -1)
ts_data = pd.DataFrame(ts_data.T, columns=['log real wage'])
ts_data['type'] = ['hrearnwt'] * 3 * len(years) + ['DFL weights'] * 3 * len(years)
ts_data['group'] = ['White', 'US Asian', 'Non-US Asian'] * len(years) + ['White', 'US Asian', 'Non-US Asian'] * len(years)
ts_data['year'] = np.concatenate([['20'+y] * 3  for y in years] + [['20'+y] * 3  for y in years])

sns.relplot(
    data=ts_data,
    x="year",
    y="log real wage",
    hue="group",
    style='type',
    estimator=None,
    kind='line',
    aspect=2
)

In [None]:
year1 = '03'
df1 = dfs[year1]
df1['y1'] = 1
obd1_results = []
obd2_results = []
years = []
weighted_means = []
weighted_means_dfl = []

for i, year2 in enumerate(dfs.keys()):
    if i >= 1:
        df2 = dfs[year2]
        df2['y1'] = 0
        
        df = pd.concat([df1, df2], axis=0)
        df['dfl_w'] = 0.

        cond = (df['realhrwage'] > 0) & (df['lfsr94'] <= 2) & (df['age'] >= 24) & (df['age'] <= 65)
        
        # compute dfl weights
        wm_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1) & (df['marital'] < 4)
        dfl_w, _ = run_DFL(df[['hs', 'college', 'master', 'exp', 'exp2', 'logwage']][wm_cond].astype(float), df['y1'][wm_cond].astype(float), df['hrearnwt'][wm_cond].astype(float), print_summary=False)
        df.loc[wm_cond, 'dfl_w'] = dfl_w
        
        # run regression for year 2 with dfl weights
        wm_cond = wm_cond & (df['y1'] == 0)
        wm_X, wm_y, wm_w = df[['hs', 'college', 'master', 'exp', 'exp2']][wm_cond], df['logwage'][wm_cond], df['dfl_w'][wm_cond]
        wm_model = run_WLS(wm_X.astype(float), wm_y.astype(float), wm_w.astype(float), print_summary=False)
        
        # compute dfl weights
        ws_cond = cond & (df['race'] == 1) & (df['ethnic'] < 1) & (df['marital'] >= 4)
        dfl_w, _ = run_DFL(df[['hs', 'college', 'master', 'exp', 'exp2', 'logwage']][ws_cond].astype(float), df['y1'][ws_cond].astype(float), df['hrearnwt'][ws_cond].astype(float), print_summary=False)
        df.loc[ws_cond, 'dfl_w'] = dfl_w
        
        # run regression for year 2 with dfl weights
        ws_cond = ws_cond & (df['y1'] == 0)
        ws_X, ws_y, ws_w = df[['hs', 'college', 'master', 'exp', 'exp2']][ws_cond], df['logwage'][ws_cond], df['dfl_w'][ws_cond]
        ws_model = run_WLS(ws_X.astype(float), ws_y.astype(float), ws_w.astype(float), print_summary=False)
        
        # compute dfl weights
        am_cond = cond & (df['race'] == 4) & (df['marital'] < 4)
        dfl_w, _ = run_DFL(df[['hs', 'college', 'master', 'exp', 'exp2', 'logwage']][am_cond].astype(float), df['y1'][am_cond].astype(float), df['hrearnwt'][am_cond].astype(float), print_summary=False)
        df.loc[am_cond, 'dfl_w'] = dfl_w
        
        # run regression for year 2 with dfl weights
        am_cond = am_cond & (df['y1'] == 0)
        am_X, am_y, am_w = df[['hs', 'college', 'master', 'exp', 'exp2']][am_cond], df['logwage'][am_cond], df['dfl_w'][am_cond]
        am_model = run_WLS(am_X.astype(float), am_y.astype(float), am_w.astype(float), print_summary=False)
        
        # compute dfl weights
        as_cond = cond & (df['race'] == 4) & (df['marital'] >= 4)
        dfl_w, _ = run_DFL(df[['hs', 'college', 'master', 'exp', 'exp2', 'logwage']][as_cond].astype(float), df['y1'][as_cond].astype(float), df['hrearnwt'][as_cond].astype(float), print_summary=False)
        df.loc[as_cond, 'dfl_w'] = dfl_w
        
        # run regression for year 2 with dfl weights
        as_cond = as_cond & (df['y1'] == 0)
        as_X, as_y, as_w = df[['hs', 'college', 'master', 'exp', 'exp2']][as_cond], df['logwage'][as_cond], df['dfl_w'][as_cond]
        as_model = run_WLS(as_X.astype(float), as_y.astype(float), as_w.astype(float), print_summary=False)
        
        wm_X_bar = np.average(wm_X[['exp', 'exp2']], axis=0, weights=wm_w)
        wm_Z_bar = np.average(wm_X[['hs', 'college', 'master']], axis=0, weights=wm_w)
        wm_beta = wm_model.params[4:6]
        wm_gamma = wm_model.params[1:4]
        ws_X_bar = np.average(ws_X[['exp', 'exp2']], axis=0, weights=ws_w)
        ws_Z_bar = np.average(ws_X[['hs', 'college', 'master']], axis=0, weights=ws_w)
        ws_beta = ws_model.params[4:6]
        ws_gamma = ws_model.params[1:4]
        am_X_bar = np.average(am_X[['exp', 'exp2']], axis=0, weights=am_w)
        am_Z_bar = np.average(am_X[['hs', 'college', 'master']], axis=0, weights=am_w)
        am_beta = am_model.params[4:6]
        am_gamma = am_model.params[1:4]
        as_X_bar = np.average(as_X[['exp', 'exp2']], axis=0, weights=as_w)
        as_Z_bar = np.average(as_X[['hs', 'college', 'master']], axis=0, weights=as_w)
        as_beta = as_model.params[4:6]
        as_gamma = as_model.params[1:4]
        
        obd1 = [am_beta @ (am_X_bar - wm_X_bar), wm_X_bar @ (am_beta - wm_beta), am_gamma @ (am_Z_bar - wm_Z_bar), wm_Z_bar @ (am_gamma - wm_gamma)]
        obd1_results.append(obd1)
        
        obd2 = [as_beta @ (as_X_bar - ws_X_bar), ws_X_bar @ (as_beta - ws_beta), as_gamma @ (as_Z_bar - ws_Z_bar), ws_Z_bar @ (as_gamma - ws_gamma)]
        obd2_results.append(obd2)
        
        years.append(year2)
        
        weighted_means.append(np.average(df['logwage'][wm_cond], weights=df['hrearnwt'][wm_cond]))
        weighted_means_dfl.append(np.average(df['logwage'][wm_cond], weights=df['dfl_w'][wm_cond]))
    
        weighted_means.append(np.average(df['logwage'][ws_cond], weights=df['hrearnwt'][ws_cond]))
        weighted_means_dfl.append(np.average(df['logwage'][ws_cond], weights=df['dfl_w'][ws_cond]))
        
        weighted_means.append(np.average(df['logwage'][am_cond], weights=df['hrearnwt'][am_cond]))
        weighted_means_dfl.append(np.average(df['logwage'][am_cond], weights=df['dfl_w'][am_cond]))
        
        weighted_means.append(np.average(df['logwage'][as_cond], weights=df['hrearnwt'][as_cond]))
        weighted_means_dfl.append(np.average(df['logwage'][as_cond], weights=df['dfl_w'][as_cond]))
    
obd1_results = np.asarray(obd1_results).round(4)
obd2_results = np.asarray(obd2_results).round(4)

ts_data = np.concatenate([obd1_results[:, 1], obd1_results[:, 3], obd2_results[:, 1], obd2_results[:, 3]])
ts_data = pd.DataFrame(ts_data, columns=['OBD Value'])
ts_data['type'] = ['2nd term'] * len(years) + ['4th term'] * len(years) + ['2nd term'] * len(years) + ['4th term'] * len(years)
ts_data['comparision'] = ['Asian Married vs White Married'] * 2 * len(years) + ['Asian Single vs White Single'] * 2 * len(years)
ts_data['year'] = ['20'+y  for y in years] * 4

sns.set_style("whitegrid")
g = sns.relplot(
    data=ts_data,
    x="year",
    y="OBD Value",
    hue="comparision",
    style='type',
    estimator=None,
    kind='line',
    aspect=2
)
g.map(plt.axhline, y=0, color="0", zorder=0)


In [None]:
ts_data = np.concatenate([np.asarray(weighted_means), np.asarray(weighted_means_dfl)], -1)
ts_data = pd.DataFrame(ts_data.T, columns=['log real wage'])
ts_data['type'] = ['hrearnwt'] * 4 * len(years) + ['DFL weights'] * 4 * len(years)
ts_data['group'] = ['White Married', 'White Single', 'Asian Married', 'Asian Single'] * len(years) + ['White Married', 'White Single', 'Asian Married', 'Asian Single'] * len(years)
ts_data['year'] = np.concatenate([['20'+y] * 4  for y in years] + [['20'+y] * 4  for y in years])

sns.relplot(
    data=ts_data,
    x="year",
    y="log real wage",
    hue="group",
    style='type',
    estimator=None,
    kind='line',
    aspect=2
)