In [1]:
import numpy  as np
import pandas as pd
from torch.optim import Adam
from torch.nn import MSELoss,BCELoss
from torch.nn.functional import binary_cross_entropy
from torch import LongTensor
import torch
from sklearn.metrics import roc_auc_score
from tqdm import tqdm

In [14]:
from Dataset import *
from torch.utils.data import DataLoader
np.random.seed(2022)
train = DebiasDataset(datatype='train',name = 'yahoo')
test = DebiasDataset(datatype='test',name = 'yahoo')

train_loader = DataLoader(dataset=train,batch_size=2048,shuffle=True)
test_loader = DataLoader(dataset=test,batch_size=54000,shuffle=True)


===>Load from yahoo data set<===
[train] num data: 311704
[test]  num data: 54000
===>Load from yahoo data set<===
[train] num data: 311704
[test]  num data: 54000


In [26]:
#’---------MF----------------‘

from MF import *
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = MF(15401,1001,device = device,use_mul = True)
optimizer = Adam(model.parameters(),lr = 0.01,weight_decay=0.001)
func_loss = BCELoss()
early_stop = 0
last_loss = 1e10
for epoch in tqdm(range(500)):
    epoch_loss = 0
    for step,((user_idx,item_idx),rates) in enumerate(train_loader):


        rates_pre = model(user_idx,item_idx)

        loss = func_loss(rates_pre,rates.float())

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()
        epoch_loss += loss.detach().numpy()

    relative_loss_div = (last_loss-epoch_loss)/(last_loss+1e-10)
    if  abs(relative_loss_div) < 1e-5:
        if early_stop > 5:
            print("[MF-IPS] epoch:{}, xent:{}".format(epoch, epoch_loss))
            break
        early_stop += 1
    last_loss = epoch_loss

        # earlyStop(loss.item(),model)

    # print('epoch is {}, loss is {}'.format(epoch,loss))

#-------------test------------------
mse = 0.0
mse_func = lambda x,y: torch.mean((x-y)**2)
with torch.no_grad():
    for step,((user_idx,item_idx),rates) in enumerate(test_loader):

        rates_pre = model(user_idx,item_idx)
        mse = mse_func(rates_pre,rates.float())
        auc = roc_auc_score(rates,rates_pre)
        print('mse is {}'.format(mse))
        print('auc is {}'.format(auc))
    

100%|██████████| 500/500 [10:14<00:00,  1.23s/it]


mse is 0.3900812268257141
auc is 0.647725664554786


In [20]:
#----MF-IPS  IPS from MAR--------'
from Propensity import *
from MF import *
#cauculate Inverse Propensity Score

device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
config = {'num_users':290,'num_items':300,'propensity_model':'popularity'}

propensity = Propensity(config)
data = propensity.get_data(name = 'yahoo')
propensity.fit(data)
score = propensity.predict(data)
last_loss = 1e10


model = MF(15401,1001,device = device)
optimizer = Adam(model.parameters(),lr = 0.01,weight_decay=0.001)
func_loss = MSELoss(reduction= 'none')
early_stop = 0
for epoch in tqdm(range(500)):
    
    for step,((user_idx,item_idx),rates) in enumerate(train_loader):
        prop_score = torch.FloatTensor(score[item_idx])

        rates_pre = model(user_idx,item_idx)


        loss = binary_cross_entropy(rates_pre,rates.float(),weight=prop_score)
        loss = torch.mean(loss)

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()
        epoch_loss += loss.detach().numpy()

    relative_loss_div = (last_loss-epoch_loss)/(last_loss+1e-10)
    if  abs(relative_loss_div) < 1e-4:
        if early_stop > 5:
            print("[MF-IPS] epoch:{}, xent:{}".format(epoch, epoch_loss))
            break
        early_stop += 1
    last_loss = epoch_loss


#-------------test------------------
mse = 0.0
mse_func = lambda x,y: torch.mean((x-y)**2)
with torch.no_grad():
    for step,((user_idx,item_idx),rates) in enumerate(test_loader):

        rates_pre = model(user_idx,item_idx)
        mse = mse_func(rates_pre,rates.float())
        auc = roc_auc_score(rates,rates_pre)
        print('mse is {}'.format(mse))
        print('auc is {}'.format(auc))


===>Load from yahoo data set<===
[train] num data: 311704
[test]  num data: 54000


100%|██████████| 500/500 [11:10<00:00,  1.34s/it]


mse is 0.25
auc is 0.5007794781473397


In [24]:
# --- MF_IPS  IPS from MNAR----
from Propensity import *
from MF import *
#cauculate Inverse Prop
# ensity Score
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
config = {'num_users':290,'num_items':300,'propensity_model':'popularity'}

propensity = Propensity(config)
data = propensity.get_data(data_type='train',name='yahoo')
propensity.fit(data)
score = propensity.predict(data)


model = MF(15401,1001,device = device)
optimizer = Adam(model.parameters(),lr = 0.01,weight_decay=0)
func_loss = MSELoss(reduction= 'none')
early_stop = 0
last_loss = 1e10

for epoch in tqdm(range(500)):
    # if epoch != 0:
    #     data = model.prediect().numpy()
    #     data = propensity.get_data(datas=data)
    # propensity.fit(data)
    # score = propensity.predict(data)
    model.train()
    epoch_loss = 0
    for step,((user_idx,item_idx),rates) in enumerate(train_loader):

        prop_score = torch.FloatTensor(score[item_idx])

        rates_pre = model(user_idx,item_idx)

        loss = binary_cross_entropy(rates_pre,rates.float(),weight = prop_score) 
        loss = torch.mean(loss)

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()
        epoch_loss += loss.detach().numpy()

    relative_loss_div = (last_loss-epoch_loss)/(last_loss+1e-10)
    if  abs(relative_loss_div) < 1e-5:
        if early_stop > 5:
            print("[MF-IPS] epoch:{}, xent:{}".format(epoch, epoch_loss))
            break
        early_stop += 1
    last_loss = epoch_loss


#-------------test------------------
mse = 0.0
mse_func = lambda x,y: torch.mean((x-y)**2)
with torch.no_grad():
    for step,((user_idx,item_idx),rates) in enumerate(test_loader):
        print(len(rates))

        rates_pre = model(user_idx,item_idx)
        mse = mse_func(rates_pre,rates.float())
        auc = roc_auc_score(rates,rates_pre)
        print('mse is {}'.format(mse))
        print('auc is {}'.format(auc))


===>Load from yahoo data set<===
[train] num data: 311704
[test]  num data: 54000


100%|██████████| 500/500 [10:37<00:00,  1.27s/it]


54000
mse is 0.607001543045044
auc is 0.5850173303946195
