In [1]:
import pandas as pd
import torch
from torch_rechub.models.multi_task import single_task
from torch_rechub.trainers import MTLTrainer
from torch_rechub.basic.features import DenseFeature, SparseFeature
from torch_rechub.utils.data import DataGenerator

In [None]:
model_name = 'single_task'
# data_path='/openbayes/input/input0'
epoch = 3
learning_rate = 1e-3
batch_size = 32768
weight_decay = 1e-5
save_dir = './save_dir'
seed = 2024
gpus = None
torch.manual_seed(seed)

In [None]:
device = 'cpu'
if torch.cuda.is_available():
    print('cuda ready...')
    device = 'cuda:1'

In [4]:
task_select = 0  #0: conversion, 1:click
def get_aliexpress_data_dict(task_select = task_select, data_path='data/aliexpress'):
    df_train = pd.read_csv(data_path + '/train.csv')
    df_test = pd.read_csv(data_path + '/test.csv')
    print("train : test = %d %d" % (len(df_train), len(df_test)))
    train_idx = df_train.shape[0]
    data = pd.concat([df_train, df_test], axis=0)
    col_names = data.columns.values.tolist()
    sparse_cols = [name for name in col_names if name.startswith("categorical")]  #categorical
    
    dense_cols = [name for name in col_names if name.startswith("numerical")]  #numerical
    print("sparse cols:%d dense cols:%d" % (len(sparse_cols), len(dense_cols)))
    label_cols = ["conversion", "click"]

    label_col_single = [label_cols[task_select]]
    
    used_cols = sparse_cols + dense_cols
    features = [SparseFeature(col, data[col].max()+1, embed_dim=5)for col in sparse_cols] \
                + [DenseFeature(col) for col in dense_cols]
    x_train, y_train = {name: data[name].values[:train_idx] for name in used_cols}, data[label_col_single].values[:train_idx]
    x_test, y_test = {name: data[name].values[train_idx:] for name in used_cols}, data[label_col_single].values[train_idx:]
    return features, x_train, y_train, x_test, y_test

In [None]:
if model_name == "single_task":
    task_types = ["classification"]
    features, x_train, y_train, x_test, y_test = get_aliexpress_data_dict()
    # Set model hyperparameters
    model = single_task(features, task_types, hidden_units = {"dims": [128, 64]}, tower_params_list = {"dims": [32]})

dg = DataGenerator(x_train, y_train)
train_dataloader, val_dataloader, test_dataloader = dg.generate_dataloader(x_val=x_test, y_val=y_test, x_test=x_test, y_test=y_test, batch_size=batch_size)

In [7]:
# started training
mtl_trainer = MTLTrainer(model, task_types=task_types, optimizer_params={"lr": learning_rate, "weight_decay": weight_decay}, n_epoch=epoch, earlystop_patience=10, device=device, gpus=gpus, model_path=save_dir)

file_path = 'path/to/save/output/result_{}_{}.txt'.format(model_name, task_select)
mtl_trainer.fit(train_dataloader, val_dataloader, mode = 'mark1', seed = 'mark2', file_path = file_path)

In [None]:
auc = mtl_trainer.evaluate(mtl_trainer.model, test_dataloader)
print(f'test auc: {auc}')

epo = ['test_result']
my_list = epo + auc
my_list = ', '.join(map(str, my_list))

try:
    with open(file_path, 'a') as file:
        file.write(my_list + '\n')
except Exception as e:  
    print(f"An error occurred while adding to the file: {e}")