In [None]:
from u_cmab import Train, qini
import pandas as pd
import torch.nn as nn
import torch

# Data

In [None]:
data = pd.read_csv("http://www.minethatdata.com/Kevin_Hillstrom_MineThatData_E-MailAnalytics_DataMiningChallenge_2008.03.20.csv")
data["segment"] = data["segment"].astype("category")
data["history_segment"] = data["history_segment"].astype("category")
data["zip_code"] = data["zip_code"].astype("category")
data["channel"] = data["channel"].astype("category")

data_size = data.shape[0]

In [None]:
def parse_data_hillstrom(data):
    context = data[["recency", "history_segment", "history", "mens", "womens", "zip_code", "newbie", "channel"]].copy()
    treatment = data["segment"]
    reward = data["visit"]

    one_hot_hs = pd.get_dummies(context["history_segment"], prefix="hs")
    one_hot_zc = pd.get_dummies(context["zip_code"], prefix="zc")
    one_hot_c = pd.get_dummies(context["channel"], prefix="c")

    context = pd.concat([context[["recency", "history", "mens", "womens", "newbie"]], one_hot_hs, one_hot_zc, one_hot_c], axis=1)

    return (context.values, treatment.values, reward.values)

# Experiment
experiment runs `10` times: trains ANN for 400 epochs and 2 URFs each time, this takes considerable time on a PC.

In [None]:
hidden = 35
context_n = 18
treatment_n = 3

forests_m = []
forests_w = []
nets_m = []
nets_w = []
treats_m = []
treats_w = []
resps_m = []
resps_w = []

In [None]:
runs = 10
for i in range(runs):
    t = Train(data, parse_data_hillstrom)
    
    # 2 RF's
    u_w = t.rf(segment="w")
    u_m = t.rf(segment="m")
    
    # 1 NET
    net = nn.Sequential(
        nn.Linear(context_n, 36),
        nn.ReLU(),
        nn.Linear(36, 36),
        nn.ReLU(),
        nn.Linear(36, 36),
        nn.ReLU(),
        nn.Linear(36, 18),
        nn.ReLU(),
        nn.Linear(18, treatment_n)
    )
    t.nn(net, epochs=400, weight_decay=0, loss_f=nn.SmoothL1Loss(), batch_size=32, learning_rate=1e-5)
    
    # Calculate results of single net for 2 groups (w and m)
    c_w, tr_w, r_w = parse_data_hillstrom(t.test_data_w)
    c_m, tr_m, r_m = parse_data_hillstrom(t.test_data_m)

    tr_w = tr_w.codes - 1
    tr_m = abs(tr_m.codes - 1)
        
    res_w = net(torch.tensor(c_w, dtype=torch.float, requires_grad=False))
    u_hat_w = (res_w[:,2] - res_w[:,1]).detach().numpy()
    res_m = net(torch.tensor(c_m, dtype=torch.float, requires_grad=False))
    u_hat_m = (res_m[:,0] - res_m[:,1]).detach().numpy()
    
    
    # Document results
    forests_w.append(u_w)
    forests_m.append(u_m)
    nets_w.append(u_hat_w)
    nets_m.append(u_hat_m)
    treats_w.append(tr_w)
    treats_m.append(tr_m)
    resps_w.append(r_w)
    resps_m.append(r_m)
    
    print(f"END OF RUN {i+1}")

In [None]:
ax1 = qini([treats_w, treats_m], [resps_w, resps_m], [nets_w, nets_m], [forests_w, forests_m], 
               urf_label=['URF (T=1)', 'URF (T=2)'], 
               urf_colors=['firebrick', 'goldenrod'], 
               urf_colors_bands=['deeppink', 'orange'],
               treatment_names=['T=1', "T=2"],
               yticks=[0, .04, .08])