In [1]:
import sys
import dgl
import dgl.function as fn
import os
import multiprocessing as mp
from tqdm import tqdm
import pdb
import numpy as np
import torch
import torch.nn as nn
import logging
from utils.parser import parse_args
from utils.dataloader import Dataloader
from utils.utils import config, construct_negative_graph, choose_model, load_mf_model, NegativeGraph
from utils.tester import Tester
from models.sampler import NegativeSampler
import wandb

In [2]:
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', default = 'TaoBao', type = str,
                    help = 'Dataset to use')
parser.add_argument('--seed', default = 2022, type = int,
                    help = 'seed for experiment')
parser.add_argument('--embed_size', default = 32, type = int,
                    help = 'embedding size for all layer')
parser.add_argument('--lr', default = 0.05, type = float,
                    help = 'learning rate')
parser.add_argument('--weight_decay', default = 8e-8, type = float,
                    help = "weight decay for adam optimizer")
#######################################################################
parser.add_argument('--model', default ='moe',type = str,
                    help = 'model selection')#dgrec base moe test
parser.add_argument('--epoch', default = 1000, type = int,
                    help = 'epoch number')
parser.add_argument('--patience', default = 10, type = int,
                    help = 'early_stop validation')
parser.add_argument('--batch_size', default = 2048, type = int,
                    help = 'batch size')
parser.add_argument('--layers', default = 1, type = int,
                    help = 'layer number')
parser.add_argument('--gpu', default = 0, type = int,
                    help = '-1 for cpu, 0 for gpu:0')
parser.add_argument('--k_list', default = [100, 300], type = list,
                    help = 'topk evaluation')
parser.add_argument('--k', default = 20, type = int,
                    help = 'neighbor number in each GNN aggregation')
parser.add_argument('--neg_number', default = 4, type = int,
                    help = 'negative sampler number for each positive pair')
parser.add_argument('--metrics', default = ['recall', 'hit_ratio', 'coverage'])


parser.add_argument('--sigma', default = 1.0, type = float,
                    help = 'sigma for gaussian kernel')
parser.add_argument('--gamma', default = 2.0, type = float,
                    help = 'gamma for gaussian kernel')
################################################################################
parser.add_argument('--category_balance', default = True, type = bool,
                    help = 'whether make loss category balance')
parser.add_argument('--beta_class', default = 0.9, type = float,
                    help = 'class re-balanced loss beta')
parser.add_argument('--context_code_dim', default = 32, type = int,
                    help = 'interest num')
parser.add_argument('--num_context_codes', default = 32, type = int,
                    help = 'interest dim')
parser.add_argument('--n_experts', default = 5, type = int,
                    help = 'n_experts')
##########################################################################################
parser.add_argument('--wandb_enable', default = True, type = bool,
                    help = 'layer number')
parser.add_argument('--hidden_size', default = 32, type = int,
                        help = 'n_experts')
parser.add_argument('--k_experts', default = 2, type = int,
                            help = 'n_experts')
parser.add_argument('--moe', default = True, type = bool,
                            help = 'layer number')
################################################################################
parser.add_argument('--sub', default = 'rand', type = str,
                            help = 'layer number')

args = parser.parse_args([])

In [3]:
if args.gpu >= 0 and torch.cuda.is_available():
        device = 'cuda:{}'.format(args.gpu)
else:
        device = 'cpu'
device = torch.device(device)
args.device = device

data = args.dataset
dataloader = Dataloader(args, data, device)
# NegativeGraphConstructor = NegativeGraph(dataloader.historical_dict)
sample_weight = dataloader.sample_weight.to(device)

model = choose_model(args, dataloader)
model = model.to(device)
print("model already setting")
opt = torch.optim.Adam(model.parameters(), lr = args.lr, weight_decay = args.weight_decay)


100%|██████████| 136710/136710 [00:07<00:00, 18539.66it/s]
100%|██████████| 2571752/2571752 [00:03<00:00, 815601.85it/s] 
100%|██████████| 845781/845781 [00:00<00:00, 869088.03it/s] 
100%|██████████| 136710/136710 [00:00<00:00, 4662118.67it/s]

model already setting





In [4]:
model.train()

loss_train = torch.zeros(1).to(device)

graph_pos = dataloader.train_graph
for i in range(args.neg_number):
    graph_neg = construct_negative_graph(graph_pos, ('user', 'rate', 'item'))
    loss_moe=0
    if args.moe:
            score_pos, score_neg,loss_moe = model(graph_pos, graph_neg)
    else:
            score_pos, score_neg = model(graph_pos, graph_neg)
            
    if not args.category_balance:
        loss_train += -(score_pos - score_neg).sigmoid().log().mean()+0.2*loss_moe
    else:
        loss = -(score_pos - score_neg).sigmoid().log()
        items = graph_pos.edges(etype = 'rate')[1]
        weight = sample_weight[items]
        loss_train += (weight * loss.squeeze(1)).mean()+0.2*loss_moe
        print((weight * loss.squeeze(1)).mean(),loss_moe)

tensor(0.2064, device='cuda:0', grad_fn=<MeanBackward0>) 0
tensor(0.2066, device='cuda:0', grad_fn=<MeanBackward0>) 0
tensor(0.2063, device='cuda:0', grad_fn=<MeanBackward0>) 0
tensor(0.2065, device='cuda:0', grad_fn=<MeanBackward0>) 0
