# 01 - Introduction To Causal Inference



## What is Causal Inference



## Why we Do Causal Inference
 


## Machine Learning and Causal Inference
 


## Association and Causation
 

In [None]:
import pandas as pd
import numpy as np
from scipy.special import expit
import seaborn as sns
from matplotlib import pyplot as plt
from cycler import cycler



default_cycler = (cycler(color=['0.3', '0.5', '0.7', '0.5']) +
                  cycler(linestyle=['-', '--', ':', '-.']) + 
                  cycler(marker=['o', 'v', 'd', 'p']))

color=['0.3', '0.5', '0.7', '0.5']
linestyle=['-', '--', ':', '-.']
marker=['o', 'v', 'd', 'p']

plt.rc('axes', prop_cycle=default_cycler)
plt.rc('font', size=20)

In [None]:
data = pd.read_csv("./data/xmas_sales.csv")
data.head(6)

### The Treatment and the Outcome


### The Fundamental Problem of Causal Inference
 


In [None]:
fig, ax = plt.subplots(1,1, figsize=(10,5))
sns.boxplot(y="weekly_amount_sold", x="is_on_sale", data=data, ax=ax)

ax.set_xlabel("is_on_sale", fontsize = 20)
ax.set_ylabel("weekly_amount_sold", fontsize = 20)
ax.tick_params(axis='both', which='major', labelsize=18)

### Causal Models
 

### Interventions


### Individual Treatment Effect



### Potential Outcomes
 
### Consistency and Stable Unit Treatment Values


### Causal Quantities of Interest



### Causal Quantities: An Example
 

In [None]:
pd.DataFrame(dict(
    i= [1,2,3,4,5,6],
    y0=[200,120,300, 450,600,600],
    y1=[220,140,400, 500,600,800],
    t= [0,0,0,1,1,1],
    x= [0,0,1,0,0,1],
)).assign(
    y = lambda d: (d["t"]*d["y1"] + (1-d["t"])*d["y0"]).astype(int),
    te=lambda d: d["y1"] - d["y0"]
)

In [None]:
pd.DataFrame(dict(
    i= [1,2,3,4,5,6],
    y0=[200,120,300, np.nan, np.nan, np.nan,],
    y1=[np.nan, np.nan, np.nan, 500,600,800],
    t= [0,0,0,1,1,1],
    x= [0,0,1,0,0,1],
)).assign(
    y = lambda d: np.where(d["t"]==1, d["y1"], d["y0"]).astype(int),
    te=lambda d: d["y1"] - d["y0"]
)

## Bias
 
### The Bias Equation 


 
### A Visual Guide to Bias
 

In [None]:
plt.rc('font', size=20)
fig = plt.figure()    

sns.lmplot(data=data,
           ci=None,
           x="avg_week_sales",
           y="weekly_amount_sold",
           scatter=False,
           height=4, aspect=2)

plt.scatter(x=data.query("is_on_sale==1")["avg_week_sales"],
            y=data.query("is_on_sale==1")["weekly_amount_sold"],
            label="on sale",
            color=color[0], alpha=.8, marker=marker[0])

plt.scatter(x=data.query("is_on_sale==0")["avg_week_sales"],
            y=data.query("is_on_sale==0")["weekly_amount_sold"],
            label="not on sale",
            color=color[2], alpha=.6, marker=marker[1])
plt.legend(fontsize="14")
    


## Identifying the Treatment Effect
 
### The Independence Assumption
 

### Identification with Randomization 
 

## Key Ideas
 