In [1]:
import pandas as pd
import numpy as np

## Matching for Treatment Effect

In [2]:
data = {
    'treatment' : ['NJ', 'NJ', 'PA', 'PA', 'PA', 'PA', 'PA', 'PA'],
    'restChain' : ['BK', 'KFC', 'KFC', 'BK', 'BK', 'BK', 'BK', 'KFC'],
    'initEmpl' : [22.5, 14.0, 13.8, 26.5, 20.0, 13.5, 32.5, 21.0],
    'finalEmpl' : [30.0, 12.5, 17.0, 18.5, 19.5, 21.0, 26.5, 23.0]
}

restaurants = pd.DataFrame(data)
restaurants.head()

Unnamed: 0,treatment,restChain,initEmpl,finalEmpl
0,NJ,BK,22.5,30.0
1,NJ,KFC,14.0,12.5
2,PA,KFC,13.8,17.0
3,PA,BK,26.5,18.5
4,PA,BK,20.0,19.5


In [28]:
# divide treatment group and control group
treatment = restaurants.iloc[0:2, :]
control = restaurants.iloc[2:8, :]

### Calculate the distance between treatment group and control group

In [4]:
def distance(treatment, control):
    match = []
    for i in range(len(control)):
        if treatment['restChain'] != control['restChain'].iloc[i]:
            distance = round(100 + abs(treatment['initEmpl'] - control['initEmpl'].iloc[i]), 1)
            match.append(distance)
        else:
            distance = round(abs(treatment['initEmpl'] - control['initEmpl'].iloc[i]), 1)
            match.append(distance)  
    return match

In [23]:
all_match = []
for i in range(len(treatment)):
    match = distance(treatment.iloc[i, :], control)
    all_match.append(match)
for i in range(len(all_match)):
    print(f'Distances measure for treatment{i+1}:')
    print('------------------------------------------------------')
    for j in range(len(all_match[i])):
        print(f'The distance between treatment{i+1} and control{j+1} is {all_match[i][j]}.')
    print('\n')
    print(f'The minimum distance for treatment{i+1} is control{all_match[i].index(np.min([all_match[i]])) + 1} : {np.min([all_match[i]])}.')
    print('\n')

Distances measure for treatment1:
------------------------------------------------------
The distance between treatment1 and control1 is 108.7.
The distance between treatment1 and control2 is 4.0.
The distance between treatment1 and control3 is 2.5.
The distance between treatment1 and control4 is 9.0.
The distance between treatment1 and control5 is 10.0.
The distance between treatment1 and control6 is 101.5.


The minimum distance for treatment1 is control3 : 2.5.


Distances measure for treatment2:
------------------------------------------------------
The distance between treatment2 and control1 is 0.2.
The distance between treatment2 and control2 is 112.5.
The distance between treatment2 and control3 is 106.0.
The distance between treatment2 and control4 is 100.5.
The distance between treatment2 and control5 is 118.5.
The distance between treatment2 and control6 is 7.0.


The minimum distance for treatment2 is control1 : 0.2.




### Calculate Average Treatment Effect of the Treated (ATT)

In [27]:
ATT = np.mean((treatment1['finalEmpl'] - control['finalEmpl'].iloc[2]) + 
              (treatment2['finalEmpl'] - control['finalEmpl'].iloc[0]))
print(f'The Average Treatment Effect of the Treated with matching is {ATT}.')

The Average Treatment Effect of the Treated with matching is 6.0.


### Calculate Average Treatment Effect (ATE)

In [29]:
ATE = round(np.mean(treatment['finalEmpl'] - treatment['initEmpl']) - 
            np.mean(control['finalEmpl'] - control['initEmpl']), 1)
print(f'The Average Treatment Effect with matching is {ATE}.')

The Average Treatment Effect with matching is 3.3.


## Summary
    1. Without matching: 
        ATT = ATE = Average(yi-xi2) for treatment group - Average(yi-xi2) for control group


\begin{equation}
ATT = ATE = Average(y_{ti} - x_{ti2}) - Average(y_{ci} - x_{ci2})
\end{equation}

    2. With matching: 
        ATT = Average(yi - xi2) for each treatment observation and its matching control observation
        ATE = Average(yi-xi2) for treatment group - Average(yti-xi2) for control group

<center>
$ATT$ $=$ $Average(y_{ti} - x_{c_{match}i2})$ <br>
$ATE$ $=$ $Average(y_{ti} - x_{ti2}) - Average(y_{ci} - x_{ci2})$
</center>