# Compensation

## Imports

In [1]:
import analyze
import prep
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import altair as alt
from altair_saver import save
%load_ext rpy2.ipython

%load_ext autoreload
%autoreload 2

  from pandas.core.index import Index as PandasIndex


## Load and prepare data

In [2]:
outcome = "Wage"
DATA_PATH = "../data/2020/survey_results_public.csv"
data_raw = pd.read_csv(DATA_PATH)
data_2020, keep, groups, categorical, numeric, base = prep.prep(data_raw, outcome, 2020)

DATA_PATH = "../data/2019/survey_results_public.csv"
data_raw = pd.read_csv(DATA_PATH)
data_2019, keep, groups, categorical, numeric, base = prep.prep(data_raw, outcome, 2019)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


Removing 0 respondents with missing Wage
6284 developers left in the sample after cleaning
151 or 2.4% black developers in the sample with multiracial replacement
Removing 0 respondents with missing Wage
12502 developers left in the sample after cleaning
291 or 2.3% black developers in the sample with multiracial replacement


In [3]:
data = pd.concat([data_2020, data_2019], axis=0)

In [4]:
X, Y = prep.design_matrix(data, categorical, numeric, base, outcome)

Design matrix complete with 184 variables/columns


## Analysis

In [5]:
D = "Ethnicity"
results = analyze.analyze(Y, X, D)
results

Unnamed: 0,coef_ds,se_ds,t_ds,p_ds,lower_ds,upper_ds,coef_raw,se_raw,t_raw,p_raw,lower_raw,upper_raw
Ethnicity_Black_or_of_African_descent,-0.013597,0.018611,-0.730614,0.465015,-0.050074,0.022879,-0.07164,0.023054,-3.107465,0.00189,-0.116828,-0.026452


In [7]:
data_plot = pd.DataFrame({"data": [1.0, np.exp(results["coef_raw"][0]), np.exp(results["coef_ds"][0])],
                          "lower": [np.nan, np.exp(results["lower_raw"])[0], np.exp(results["lower_ds"])[0]],
                          "upper": [np.nan, np.exp(results["upper_raw"])[0], np.exp(results["upper_ds"])[0]],
                          "labels": ["Non-Black", "Black (Raw)", "Black (Adj.)"],
                          "Legend": ["95% Confidence Interval"] * 3})

In [8]:
data_plot

Unnamed: 0,data,lower,upper,labels,Legend
0,1.0,,,Non-Black,95% Confidence Interval
1,0.930866,0.889738,0.973895,Black (Raw),95% Confidence Interval
2,0.986495,0.951159,1.023143,Black (Adj.),95% Confidence Interval


In [10]:
data = []
lower = []
upper = []
labels = []
index = []

quantiles = 10

for q in range(1, quantiles):
    model = sm.QuantReg(endog=Y, exog=X[[col for col in X.columns if "Ethnicity" in col] + ["Year_2020"]].assign(const=1)).fit(q=q/quantiles)
    data.append(model.params[0])
    lower.append(model.conf_int().values[0][0])
    upper.append(model.conf_int().values[0][1])
    labels.append("Black (Raw)")
    index.append("{:.0%}".format(q/quantiles))
    
data = data + [0.0] * quantiles
lower = lower + [np.nan] * quantiles
upper = upper + [np.nan] * quantiles
labels = labels + ["Non-Black"] * quantiles
index = index + index

In [11]:
results_q_plot = pd.DataFrame(zip(index, labels, data, lower, upper), columns=["index", "labels", "data", "lower", "upper"])
results_q_plot["data"] = np.exp(results_q_plot["data"])
results_q_plot["lower"] = np.exp(results_q_plot["lower"])
results_q_plot["upper"] = np.exp(results_q_plot["upper"])
results_q_plot["label_height"] = 1.0

In [12]:
results_q_plot

Unnamed: 0,index,labels,data,lower,upper,label_height
0,10%,Black (Raw),0.929771,0.859733,1.005514,1.0
1,20%,Black (Raw),0.942858,0.889332,0.999605,1.0
2,30%,Black (Raw),0.9375,0.888746,0.988929,1.0
3,40%,Black (Raw),0.944449,0.895236,0.996367,1.0
4,50%,Black (Raw),0.933326,0.887688,0.981311,1.0
5,60%,Black (Raw),0.922853,0.876944,0.971166,1.0
6,70%,Black (Raw),0.944882,0.897253,0.995038,1.0
7,80%,Black (Raw),0.944061,0.891549,0.999667,1.0
8,90%,Black (Raw),0.885,0.817749,0.957783,1.0
9,10%,Non-Black,1.0,,,1.0


## Results

In [6]:
labels = ["Non-Black", "Black (Raw)", "Black (Adj.)"]
colors = ["#fa7026", "#14125b", "#612b8a"]

In [9]:
color_scale = alt.Scale(
    domain=labels,
    range=colors
)

points = alt.Chart(data_plot, title="Relative Compensation").mark_bar(size=70, cornerRadiusBottomRight=8, cornerRadiusTopRight=8, clip=True).encode(
    x=alt.X("data", 
            axis=alt.Axis(format="$.2f", title=None, tickCount=5, tickSize=0, labelFlush=False, labelFontSize=15),
            scale=alt.Scale(
                domain=(0.8, 1.05)
            )
    ),
    y=alt.Y("labels", 
            axis=alt.Axis(title=None, labelFontSize=15), 
            sort=labels),
    color=alt.Color(
        'labels',
        legend=None,
        scale=color_scale
    )
)

text = alt.Chart(data_plot).mark_text(
    align='left',
    baseline='middle',
    dx=10,
    dy=-25,
    fontSize=20
).encode(
    text=alt.Text("data", format="$.2f"),
    x="data",
    y=alt.Y("labels", 
            sort=labels),
    color=alt.Color(
        'labels',
        scale=color_scale
    )
)

error_bars = alt.Chart(data_plot).mark_bar(color="#BAC1B8", size=3).encode(
    x="lower",
    x2="upper",
    y=alt.Y("labels", 
        sort=labels),
)

lower = alt.Chart(data_plot).mark_tick(color="#BAC1B8", thickness=3, opacity=1).encode(
    x="lower",
    y=alt.Y("labels", 
        sort=labels),
)

upper = alt.Chart(data_plot).mark_tick(color="#BAC1B8", thickness=3, opacity=1).encode(
    x="upper",
    y=alt.Y("labels", 
        sort=labels),
)

chart = (points + text + error_bars + lower + upper).properties(
    width=500,
    height=300,
    title={
        "text": "Relative Compensation", 
        "subtitle": "Raw: No controls, Adj: With controls, 95% Confidence Interval",
        "subtitleColor": "gray"
    }
).configure_view(
    strokeWidth=0
).configure_title(
    anchor='start',
    offset=5,
    fontSize=20
).configure_axis(
    domain=False
)

chart.save("../images/compensation_relative.png", method="selenium", webdriver="chrome", scale_factor=3.0)

chart

In [None]:
labels = ["Non-Black", "Black (Raw)"]
colors = ["#fa7026", "#612b8a"]

In [13]:
color_scale = alt.Scale(
    domain=labels,
    range=colors
)

points = alt.Chart().mark_bar(size=25, cornerRadiusTopLeft=8, cornerRadiusTopRight=8, clip=True).encode(
    x=alt.X("labels", 
        axis=alt.Axis(title=None, labels=False, tickSize=0), 
        sort=labels
       ),
    y=alt.Y("data", 
            axis=alt.Axis(format="$.2f", title=None, tickCount=4, tickSize=0, labelFlush=False, labelFontSize=15),
            scale=alt.Scale(
                domain=(0.8, 1.1)
            )
    ),
    color=alt.Color("labels", 
                    scale=color_scale,
                    legend=alt.Legend(title=None, orient="bottom", labelFontSize=15))
).properties(
    width=100,
    height=200
)

text = alt.Chart().mark_text(
    align='center',
    angle=0,
    baseline='middle',
    dx=-0,
    dy=-20,
    fontSize=12
).encode(
    text=alt.Text("data", format="$.2f"),
    x=alt.X("labels", 
            sort=labels),
    y="label_height",
    color=alt.Color("labels", 
                    scale=color_scale)
)

error_bars = alt.Chart().mark_bar(color="#BAC1B8", size=2).encode(
    x=alt.X("labels",
            sort=labels),
    y="lower",
    y2="upper",
)

lower = alt.Chart().mark_tick(color="#BAC1B8", thickness=2, opacity=1).encode(
    x=alt.X("labels",
            sort=labels),
    y="lower",
)

upper = alt.Chart().mark_tick(color="#BAC1B8", thickness=2, opacity=1).encode(
    x=alt.Y("labels",
            sort=labels),
    y="upper",
)

chart = alt.layer(points, text, error_bars, lower, upper, data=results_q_plot).facet(
    column=alt.Column("index", title=None, header=alt.Header(labelOrient='bottom', labelFontSize=15)),
    title={
        "text": "Relative Compensation by Income Percentile", 
        "subtitle": "Raw: No controls, 95% Confidence Interval",
        "subtitleColor": "gray",
        "dy": 25
    },
    spacing=10
).configure_view(
    strokeWidth=0
).configure_title(
    anchor='start',
    offset=5,
    fontSize=20
).configure_axis(
    domain=False
)

chart.save("../images/compensation_relative_percentile.png", method="selenium", webdriver="chrome", scale_factor=3.0)

chart