In [1]:
%%time

import tensorflow as tf
print(tf.__version__)
import sys
sys.path.append("..")
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np
from catenets.models.jax import TNet, SNet, OFFSET_NAME, FlexTENet, OffsetNet, SNet1, SNet2
from catenets.experiment_utils.simulation_utils import simulate_treatment_setup

#catenets (SNet, FlexTENet, OffsetNet, TNet, SNet1 (TARNet), SNet2 (DragonNet)
def plot_loss(history, *losses):
    for loss in losses:
        plt.plot(history.history[loss], label=loss)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.show()

def scaling(x, min, max):
    return np.where(x < min, 0.0, np.where(x > max, 1.0, (x - min) / (max - min)))

early_stopping = EarlyStopping(
    monitor='val_loss',  # 
    patience=500,        # 
    verbose=1,          # 
    mode='min',         # 
    restore_best_weights=True  # 
)

2024-02-24 23:02:39.760280: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-24 23:02:39.816332: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-24 23:02:39.816362: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-24 23:02:39.816398: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-24 23:02:39.824699: I tensorflow/core/platform/cpu_feature_g

2.14.0
CPU times: user 4.59 s, sys: 1.87 s, total: 6.46 s
Wall time: 8.59 s


In [2]:
SAVE_DIR = "../data"
file_criteo = SAVE_DIR + "/criteo-uplift-v2.1.csv"
df_criteo_ori = pd.read_csv(file_criteo, sep=',')

In [4]:
%%time

sample = 0.15
random_state=20220720
df_criteo=df_criteo_ori.sample(frac=sample, random_state=random_state).reset_index(drop=True)
X = df_criteo[['f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11']].values

X[:, 0] = scaling(X[:, 0], min=np.min(X[:, 0]), max=np.max(X[:, 0]))
X[:, 1] = scaling(X[:, 1], min=np.min(X[:, 1]), max=np.max(X[:, 1]))
X[:, 2] = scaling(X[:, 2], min=np.min(X[:, 2]), max=np.max(X[:, 2]))
X[:, 3] = scaling(X[:, 3], min=np.min(X[:, 3]), max=np.max(X[:, 3]))
X[:, 4] = scaling(X[:, 4], min=np.min(X[:, 4]), max=np.max(X[:, 4]))
X[:, 5] = scaling(X[:, 5], min=np.min(X[:, 5]), max=np.max(X[:, 5]))
X[:, 6] = scaling(X[:, 6], min=np.min(X[:, 6]), max=np.max(X[:, 6]))
X[:, 7] = scaling(X[:, 7], min=np.min(X[:, 7]), max=np.max(X[:, 7]))
X[:, 8] = scaling(X[:, 8], min=np.min(X[:, 8]), max=np.max(X[:, 8]))
X[:, 9] = scaling(X[:, 9], min=np.min(X[:, 9]), max=np.max(X[:, 9]))
X[:, 10] = scaling(X[:, 10], min=np.min(X[:, 10]), max=np.max(X[:, 10]))
X[:, 11] = scaling(X[:, 11], min=np.min(X[:, 11]), max=np.max(X[:, 11]))

T = df_criteo['treatment'].values.reshape(-1, 1)
Y_visit = df_criteo['visit'].values.reshape(-1, 1)
Y_conv = df_criteo['conversion'].values.reshape(-1, 1)

T.shape, Y_visit.shape, Y_conv.shape


# calculate len
train_len = int(len(X) * 0.70)
cali_len = int(len(X) * 0.05)
test_len = len(X) - train_len - cali_len

# obtain train set
X_train = X[:train_len, :]
T_train = T[:train_len, :]
Y_visit_train = Y_visit[:train_len, :]
Y_conv_train = Y_conv[:train_len, :]

# obtain calibration set
X_cali = X[train_len:train_len+cali_len, :]
T_cali = T[train_len:train_len+cali_len, :]
Y_visit_cali = Y_visit[train_len:train_len+cali_len, :]
Y_conv_cali = Y_conv[train_len:train_len+cali_len, :]

# obtain test set
X_test = X[train_len+cali_len:, :]
T_test = T[train_len+cali_len:, :]
Y_visit_test = Y_visit[train_len+cali_len:, :]
Y_conv_test = Y_conv[train_len+cali_len:, :]

print(train_len, X_train.shape, X_test.shape, len(X), X_cali.shape)

# make covariate shift
condition_cali = (X_cali[:, 0] > 0.3) & (X_cali[:, 1] < 0.7)
X_cali = X_cali[condition_cali]
T_cali = T_cali[condition_cali]
Y_visit_cali = Y_visit_cali[condition_cali]
Y_conv_cali = Y_conv_cali[condition_cali]

condition_test = (X_test[:, 0] > 0.3) & (X_test[:, 1] < 0.7)
X_test = X_test[condition_test]
T_test = T_test[condition_test]
Y_visit_test = Y_visit_test[condition_test]
Y_conv_test = Y_conv_test[condition_test]

print(train_len, X_train.shape, X_test.shape, len(X), X_cali.shape)

1467857 (1467857, 12) (524236, 12) 2096939 (104846, 12)
1467857 (1467857, 12) (327160, 12) 2096939 (65226, 12)
CPU times: user 1.03 s, sys: 342 ms, total: 1.38 s
Wall time: 2.28 s


### SNET

In [21]:
# visit
s = SNet(batch_size=10000,n_iter=300)
s.fit(X_train, Y_visit_train.reshape(-1), T_train.reshape(-1))
cate_pred_s_visit = s.predict(X_test)

In [29]:
# conv
s = SNet(batch_size=10000,n_iter=300)
s.fit(X_train, Y_conv_train.reshape(-1), T_train.reshape(-1))
cate_pred_s_conv = s.predict(X_test)

In [30]:
import sklearn 
import sklearn.metrics
from metric.Metric import *

direct_ratio_SL_aucc_list = []
roi_slearner_pre = cate_pred_s_conv / np.where(abs(cate_pred_s_visit) < 1e-6, 1e-6, cate_pred_s_visit)

direct_ratio_SL_aucc = get_uplift_model_aucc_no_show(t=(T_test > 0.5).flatten(), y_reward=Y_conv_test.flatten(), y_cost=Y_visit_test.flatten(), roi_pred=roi_slearner_pre.flatten(), quantile=200)
direct_ratio_SL_aucc_list.append(direct_ratio_SL_aucc)


AUCC =  0.5504909236631894


In [31]:
# store test aucc for pic 
import pandas as pd

def get_aucc_cost_curve(aucc_list):
    delta_cost_list_group = np.array([aucc[1] for aucc in aucc_list])
    delta_reward_list_group = np.array([aucc[2] for aucc in aucc_list])
    
    avg_delta_cost_list = np.mean(delta_cost_list_group, axis=0)
    avg_delta_reward_list = np.mean(delta_reward_list_group, axis=0)
    
    df_aucc_cost_curve = pd.DataFrame(avg_delta_cost_list, columns=['delta_cost'])
    df_aucc_cost_curve['delta_reward'] = avg_delta_reward_list
    
    return df_aucc_cost_curve

A_direct_ratio_SL_aucc_list = get_aucc_cost_curve(direct_ratio_SL_aucc_list)
print("aucc = ", np.sum(A_direct_ratio_SL_aucc_list['delta_reward'].values) / (A_direct_ratio_SL_aucc_list['delta_reward'].values[-1] * 201))
A_direct_ratio_SL_aucc_list.to_csv("../figure/CRI_d_snet_aucc_list.csv")



aucc =  0.5504909236631894


### OffsetNet

In [32]:

# visit
s = OffsetNet(batch_size=10000,n_iter=300)
s.fit(X_train, Y_visit_train.reshape(-1), T_train.reshape(-1))
cate_pred_s_visit = s.predict(X_test)

# conv
s = OffsetNet(batch_size=10000,n_iter=300)
s.fit(X_train, Y_conv_train.reshape(-1), T_train.reshape(-1))
cate_pred_s_conv = s.predict(X_test)


import sklearn 
import sklearn.metrics
from metric.Metric import *

direct_ratio_SL_aucc_list = []
roi_slearner_pre = cate_pred_s_conv / np.where(abs(cate_pred_s_visit) < 1e-6, 1e-6, cate_pred_s_visit)

direct_ratio_SL_aucc = get_uplift_model_aucc_no_show(t=(T_test > 0.5).flatten(), y_reward=Y_conv_test.flatten(), y_cost=Y_visit_test.flatten(), roi_pred=roi_slearner_pre.flatten(), quantile=200)
direct_ratio_SL_aucc_list.append(direct_ratio_SL_aucc)

# store test aucc for pic 
import pandas as pd

def get_aucc_cost_curve(aucc_list):
    delta_cost_list_group = np.array([aucc[1] for aucc in aucc_list])
    delta_reward_list_group = np.array([aucc[2] for aucc in aucc_list])
    
    avg_delta_cost_list = np.mean(delta_cost_list_group, axis=0)
    avg_delta_reward_list = np.mean(delta_reward_list_group, axis=0)
    
    df_aucc_cost_curve = pd.DataFrame(avg_delta_cost_list, columns=['delta_cost'])
    df_aucc_cost_curve['delta_reward'] = avg_delta_reward_list
    
    return df_aucc_cost_curve

A_direct_ratio_SL_aucc_list = get_aucc_cost_curve(direct_ratio_SL_aucc_list)
print("aucc = ", np.sum(A_direct_ratio_SL_aucc_list['delta_reward'].values) / (A_direct_ratio_SL_aucc_list['delta_reward'].values[-1] * 201))
A_direct_ratio_SL_aucc_list.to_csv("../figure/CRI_d_OffsetNet_aucc_list.csv")



AUCC =  0.5196008712453385
aucc =  0.5196008712453385


### SNet1 (TARNet)

In [33]:

# visit
s = SNet1(batch_size=10000,n_iter=300)
s.fit(X_train, Y_visit_train.reshape(-1), T_train.reshape(-1))
cate_pred_s_visit = s.predict(X_test)

# conv
s = SNet1(batch_size=10000,n_iter=300)
s.fit(X_train, Y_conv_train.reshape(-1), T_train.reshape(-1))
cate_pred_s_conv = s.predict(X_test)


import sklearn 
import sklearn.metrics
from metric.Metric import *

direct_ratio_SL_aucc_list = []
roi_slearner_pre = cate_pred_s_conv / np.where(abs(cate_pred_s_visit) < 1e-6, 1e-6, cate_pred_s_visit)

direct_ratio_SL_aucc = get_uplift_model_aucc_no_show(t=(T_test > 0.5).flatten(), y_reward=Y_conv_test.flatten(), y_cost=Y_visit_test.flatten(), roi_pred=roi_slearner_pre.flatten(), quantile=200)
direct_ratio_SL_aucc_list.append(direct_ratio_SL_aucc)

# store test aucc for pic 
import pandas as pd

def get_aucc_cost_curve(aucc_list):
    delta_cost_list_group = np.array([aucc[1] for aucc in aucc_list])
    delta_reward_list_group = np.array([aucc[2] for aucc in aucc_list])
    
    avg_delta_cost_list = np.mean(delta_cost_list_group, axis=0)
    avg_delta_reward_list = np.mean(delta_reward_list_group, axis=0)
    
    df_aucc_cost_curve = pd.DataFrame(avg_delta_cost_list, columns=['delta_cost'])
    df_aucc_cost_curve['delta_reward'] = avg_delta_reward_list
    
    return df_aucc_cost_curve

A_direct_ratio_SL_aucc_list = get_aucc_cost_curve(direct_ratio_SL_aucc_list)
print("aucc = ", np.sum(A_direct_ratio_SL_aucc_list['delta_reward'].values) / (A_direct_ratio_SL_aucc_list['delta_reward'].values[-1] * 201))
A_direct_ratio_SL_aucc_list.to_csv("../figure/CRI_d_SNet1_aucc_list.csv")


AUCC =  0.5371991364330178
aucc =  0.5371991364330178


### SNet2 (DragonNet)

In [12]:
%%time
# visit
s = SNet2(batch_size=10000,n_iter=300)
s.fit(X_train, Y_visit_train.reshape(-1), T_train.reshape(-1))
cate_pred_s_visit = s.predict(X_test)

# conv
s = SNet2(batch_size=10000,n_iter=300)
s.fit(X_train, Y_conv_train.reshape(-1), T_train.reshape(-1))
cate_pred_s_conv = s.predict(X_test)


import sklearn 
import sklearn.metrics
from metric.Metric import *

direct_ratio_SL_aucc_list = []
roi_slearner_pre = cate_pred_s_conv / np.where(abs(cate_pred_s_visit) < 1e-6, 1e-6, cate_pred_s_visit)

direct_ratio_SL_aucc = get_uplift_model_aucc_no_show(t=(T_test > 0.5).flatten(), y_reward=Y_conv_test.flatten(), y_cost=Y_visit_test.flatten(), roi_pred=roi_slearner_pre.flatten(), quantile=200)
direct_ratio_SL_aucc_list.append(direct_ratio_SL_aucc)

# store test aucc for pic 
import pandas as pd

def get_aucc_cost_curve(aucc_list):
    delta_cost_list_group = np.array([aucc[1] for aucc in aucc_list])
    delta_reward_list_group = np.array([aucc[2] for aucc in aucc_list])
    
    avg_delta_cost_list = np.mean(delta_cost_list_group, axis=0)
    avg_delta_reward_list = np.mean(delta_reward_list_group, axis=0)
    
    df_aucc_cost_curve = pd.DataFrame(avg_delta_cost_list, columns=['delta_cost'])
    df_aucc_cost_curve['delta_reward'] = avg_delta_reward_list
    
    return df_aucc_cost_curve

A_direct_ratio_SL_aucc_list = get_aucc_cost_curve(direct_ratio_SL_aucc_list)
print("aucc = ", np.sum(A_direct_ratio_SL_aucc_list['delta_reward'].values) / (A_direct_ratio_SL_aucc_list['delta_reward'].values[-1] * 201))
A_direct_ratio_SL_aucc_list.to_csv("../figure/CRI_d_SNet2_aucc_list.csv")

AUCC =  0.5374919126341383
aucc =  0.5374919126341383
CPU times: user 44min 7s, sys: 35min 44s, total: 1h 19min 52s
Wall time: 8min 52s
