In [3]:
import pickle
import pandas as pd
import numpy as np

In [4]:
df_rate = pd.read_csv("data/rating_train.csv")
df = df_rate.drop("date", axis=1)
df = df.drop_duplicates()
df['label'] = 1

In [3]:
%store -r eaten_dict
%store -r val_dict

In [4]:
def predict(userid):
    dense = model.user_embed.weight.detach().cpu().numpy() @ model.food_embed.weight.detach().cpu().numpy().T
    rank = 0
    rank += dense[userid]
    rank += model.food_bias.weight.detach().cpu().numpy().flatten()
    rank = np.argsort(rank)[::-1]
    already_have = eaten_dict[userid]
    y_pred = []
    for j in rank:
        if j not in already_have:
            y_pred.append(j)
        if len(y_pred) >= 20:
            break
    return y_pred

In [5]:
def cal(M):
    
    dense = model.user_embed.weight.detach().cpu().numpy() @ model.food_embed.weight.detach().cpu().numpy().T
    score = []
    for i in df.userid.unique():
        userid = i
        already_have = eaten_dict[userid]
        y_true = val_dict[userid]
        y_pred = []
        
        rank = 0
        rank += dense[userid]
        rank += M.food_bias.weight.detach().cpu().numpy().flatten()
        rank = np.argsort(rank)[::-1]
        
        for j in rank:
            if j not in already_have:
                y_pred.append(j)
            if len(y_pred) >= 20:
                break

        assert not len(set(y_pred) & set(already_have))
        s = apk(y_true, list(y_pred), 20)
        score.append(s)
        

    return np.mean(score)

# model

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

In [7]:
train_loader = DataLoader(
        dataset=TensorDataset(
            torch.from_numpy(df.userid.values),
            torch.from_numpy(df.foodid.values),
            torch.from_numpy(df.label.values)),
        batch_size=1024,
        shuffle=True,
        num_workers=8)

In [8]:
from sklearn.preprocessing import scale
food_count = np.zeros((df.foodid.max()+1))
for i, j in zip(df.foodid.value_counts().index.values, df.foodid.value_counts().values):
    food_count[i] = j
food_count = scale(food_count)

user_count = np.zeros((df_rate.userid.max()+1))
for i, j in zip(df_rate.userid.value_counts().index.values, df_rate.userid.value_counts().values):
    user_count[i] = j
user_count = scale(user_count)

In [9]:
class Model(nn.Module):
    def __init__(self, user_size, food_size, hidden=10, global_mean=0):
        super(Model, self).__init__()
        self.user_embed = nn.Embedding(user_size, hidden, padding_idx=0)
        self.food_embed = nn.Embedding(food_size, hidden, padding_idx=0)
        self.user_bias = nn.Embedding(user_size, 1, padding_idx=0)
        self.food_bias = nn.Embedding(food_size, 1, padding_idx=0)
        
        self.global_mean = global_mean
        self.user_bias.weight.data.fill_(0)
        self.food_bias.weight.data.fill_(0)
        nn.init.xavier_uniform_(self.user_embed.weight)
        nn.init.xavier_uniform_(self.food_embed.weight)
        
    
    def forward(self, user, food, test=False):
        u = self.user_embed(user)
        u_b = self.user_bias(user).squeeze()
        b = self.food_embed(food)
        b_b = self.food_bias(food).squeeze()
        output = (u*b).sum(1) + u_b + b_b + self.global_mean
        if test:
            return output
        lamb = 0.01
        reg1 = lamb * torch.norm(torch.cat([x.view(-1) for x in self.parameters()]), 1)
        reg2 = lamb * torch.norm(torch.cat([x.view(-1) for x in self.parameters()]), 2)
        return output, reg1+reg2

In [10]:
model = Model(
        user_size=int(df.userid.max()+1),
        food_size=int(df.foodid.max()+1),
        global_mean=df.label.mean().item()).cuda()

opt_u = torch.optim.Adam(list(model.user_embed.parameters()), lr=0.01, amsgrad=True)
opt_f = torch.optim.Adam(list(model.food_embed.parameters()), lr=0.01, amsgrad=True)
opt_b = torch.optim.Adam(list(model.food_bias.parameters())+list(model.user_bias.parameters()), lr=0.001, amsgrad=True)

In [11]:
for epoch in range(30):
    print('Epoch:', epoch)
    total_loss = []
    for i ,(user, food, rate) in enumerate(train_loader):
        user, food, rate = user.cuda(), food.cuda(), rate.cuda().float()
        output, regu_loss = model(user, food)

        
        loss = F.kl_div(output, rate, size_average=False) + regu_loss
        total_loss.append(loss.item())

        opt_u.zero_grad()
        opt_f.zero_grad()
        opt_b.zero_grad()
        loss.backward()
        opt_u.step()
        opt_f.step()
        opt_b.step()
    

    print("MAP@20 : ", cal(model))

Epoch: 0
0.035475911744778585
Epoch: 1
0.03846926822474494
Epoch: 2
0.039174650582575146
Epoch: 3
0.039254666672694706
Epoch: 4
0.039431061235198364
Epoch: 5
0.03955615908075906
Epoch: 6
0.03956886698120877
Epoch: 7
0.03961175422778148
Epoch: 8
0.03964961839333488
Epoch: 9
0.03968965972646005
Epoch: 10
0.039760064847000405
Epoch: 11
0.03972770474877665
Epoch: 12
0.03967539965173022
Epoch: 13
0.03975505697681986
Epoch: 14
0.03971959787886967
Epoch: 15
0.0397371921793029
Epoch: 16
0.03972421985697998
Epoch: 17
0.03980516707588773
Epoch: 18
0.03981643950131006
Epoch: 19
0.039847266026262186
Epoch: 20
0.039846297826978995
Epoch: 21
0.03987943624105102
Epoch: 22
0.03986398545298272
Epoch: 23
0.03986461185183799
Epoch: 24
0.03986672758605897
Epoch: 25
0.039766449199549185
Epoch: 26
0.039751421672502944
Epoch: 27
0.03977101708738424
Epoch: 28
0.039750394830654394
Epoch: 29
0.039756907461905265


# submit