In [None]:
import random
import numpy as np
import pandas as pd

from efin import *
from functools import partial
from esn_tarnet import *
from feature_select import *
from s_learner import *
from t_learner import *
from tarnet import *

def set_seed(seed):
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
set_seed(42)

In [None]:
#读取特征列表
import pickle
with open('./feature_list_bonus_order_submit_id.pkl', 'rb') as f:
    feature_list = pickle.load(f)
print(len(feature_list))

In [None]:
df = pd.read_parquet('/mlx_devbox/users/wangyuxin.huoshan/playground/bonus_train_data/bonus_order_submit_id_train_valid_id',columns=feature_list+['is_treatment','gmv','roas_label']).fillna(0)
print(df.groupby('is_treatment').size())
print(df.groupby('roas_label').size())

In [None]:
top_features = features_select(df,feature_list,'roas_label',100,'./feature_list_bonus_order_submit_id_selected.pkl')
top_features

In [None]:
with open('./feature_list_bonus_order_submit_id.pkl', 'rb') as f:
    feature_list = pickle.load(f)
print(len(feature_list))
df = pd.read_parquet('/mlx_devbox/users/wangyuxin.huoshan/playground/bonus_train_data/bonus_order_submit_id_train_valid_id',columns=feature_list+['is_treatment','gmv','roas_label']).fillna(0)

In [None]:
discrete_size_cols = []
feature_list_discrete = []
for each in feature_list:
    class_num = df[each].nunique()
    if class_num <= 40:
        if df[each].min() == 0:
            print(f"Feature {each}: min index={df[each].min()}, max index={df[each].max()}, embedding num_embeddings={class_num}")
            discrete_size_cols.append(class_num+1)
            feature_list_discrete.append(each)

print(len(feature_list_discrete))
import pickle
# 将列表保存到文件
with open('./feature_list_bonus_order_submit_id_selected_discrete.pkl', 'wb') as f:
    pickle.dump(feature_list_discrete, f)

with open('./feature_list_bonus_order_submit_id_selected_discrete_size.pkl', 'wb') as f:
    pickle.dump(discrete_size_cols, f)


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
model = Slearner(
    input_dim=len(feature_list), discrete_size_cols=discrete_size_cols,embedding_dim=3,
                 base_hidden_dims=[64,32,32,16],output_activation_base=None,base_hidden_func = torch.nn.ELU(), 
                 task = 'classification',classi_nums=2, treatment_label_list=[0,1,2,3],model_type='Slearner',device=device
).to(device)
loss_f = partial(slearn_loss)

In [None]:
model = Tlearner(
    input_dim=len(feature_list), discrete_size_cols=discrete_size_cols,embedding_dim=3,
                 base_hidden_dims=[64,32,32,16],output_activation_base=None,base_hidden_func = torch.nn.ELU(), 
                 task = 'classification',classi_nums=2, treatment_label_list=[0,1,2,3],model_type='Tlearner',device=device
).to(device)
loss_f = partial(tlearn_loss)

In [None]:
model = Tarnet(
    input_dim=len(feature_list), discrete_size_cols=discrete_size_cols,embedding_dim=3,share_dim=64,
                 share_hidden_dims =[256,128,64,64],
                 base_hidden_dims=[64,32,32,16],output_activation_base=torch.nn.Sigmoid(),
                 share_hidden_func = torch.nn.ELU(),base_hidden_func = torch.nn.ELU(), 
                 task = 'classification',classi_nums=2, treatment_label_list=[0,1,2,3],model_type='Tarnet',device=device
).to(device)
loss_f = partial(tarnet_loss)

In [None]:
model = ESN_Tarnet(
    input_dim=len(feature_list), discrete_size_cols=discrete_size_cols,embedding_dim=3,share_dim=64,
                 share_hidden_dims =[256,128,64,64],
                 base_hidden_dims=[64,32,32,16],output_activation_base=torch.nn.Sigmoid(),
                 ipw_hidden_dims=[256,128,64,64],output_activation_ipw=None,
                 share_hidden_func = torch.nn.ELU(),base_hidden_func = torch.nn.ELU(), ipw_hidden_func = torch.nn.ELU(),
                 task = 'classification',classi_nums=2, treatment_label_list=[0,1,2,3],model_type='ESN_Tarnet',device=device
).to(device)
loss_f = partial(esn_tarnet_loss)


In [None]:
model.fit(
    df=df,
    feature_list=feature_list,
    discrete_cols=feature_list_discrete,
    epochs=300,
    batch_size=256,
    learning_rate=1e-5,
    loss_f=loss_f,
    tensorboard=False,
    num_workers=40,
    pin_memory=True,
    device=device,
    valid_perc=True,
    label_y='roas_label',
    label_treatment='is_treatment',
    task='classification',
    loss_type='BCELoss',
    treatment_label_list=[0, 1, 2],
    checkpoint_path='./model_checkpoint.pth',
    if_continued_train = 0
)

In [None]:
# 保存模型参数
torch.save(model.state_dict(), "./model.pth")
