In [1]:
import pandas as pd
import numpy as np
import math
from tqdm.notebook import tqdm
import scipy.sparse as sp
import pickle

In [None]:
!unzip data.zip

# 1. Датасет

Взяла датасет из первой домашки (movielens1m), так как его часто используют, и он небольшой.

In [2]:
ratings = pd.read_csv('data/ratings.dat', delimiter='::', header=None, 
        names=['user_id', 'movie_id', 'rating', 'timestamp'], 
        usecols=['user_id', 'movie_id', 'rating'], engine='python')

In [3]:
movie_info = pd.read_csv('data/movies.dat', delimiter='::', header=None, 
        names=['movie_id', 'name', 'category'], engine='python')

In [4]:
ratings.head(5)

Unnamed: 0,user_id,movie_id,rating
0,1,1193,5
1,1,661,3
2,1,914,3
3,1,3408,4
4,1,2355,5


### Functions

In [5]:
get_similars = lambda item_id, model : [' '.join(movie_info[movie_info["movie_id"] == x[0]][['movie_id', 'name']].values[0].astype(str).tolist())
                                        for x in model.similar_items(item_id)]

In [6]:
get_recommendations = lambda user_id, model, n: [' '.join(movie_info[movie_info["movie_id"] == x[0]][['movie_id', 'name']].values[0].astype(str).tolist())
                                               for x in model.recommend(user_id, user_item_csr, n)]

In [7]:
get_user_history = lambda user_id, implicit_ratings : [' '.join(movie_info[movie_info["movie_id"] == x][['movie_id', 'name']].values[0].astype(str).tolist())
                                            for x in implicit_ratings[implicit_ratings["user_id"] == user_id]["movie_id"]]

### Preprocess

In [8]:
implicit_ratings = ratings.loc[(ratings['rating'] >= 4)]

In [9]:
users = implicit_ratings["user_id"]
movies = implicit_ratings["movie_id"]
user_item = sp.coo_matrix((np.ones_like(users), (users, movies)))
user_item_t_csr = user_item.T.tocsr()
user_item_csr = user_item.tocsr()

Разделим данные на train, val и test. В качестве test и val возьмем по последнему фильму, которые посмотрел пользователь.

In [None]:
def train_test_split(explicit, implicit, matrix): 
    users = explicit['user_id'].unique()
    items = explicit['movie_id'].unique()
    train = matrix.tolil()
    test, val, negative, val_neg, test_neg = {}, {}, {}, {}, {}
    for u in tqdm(users):
        train_h = [int(h.split()[0]) for h in get_user_history(u, implicit)]
        if len(train_h) >= 10:
            neg = [i for i in items if i not in train_h]
            test_h = train_h.pop()
            val_h = train_h.pop()
            train[u, test_h] = 0
            train[u, val_h] = 0
            test[u] = test_h
            val[u] = val_h
            val_neg[u] = np.random.choice(neg, 99)
            test_neg[u] = np.random.choice(neg, 99)
    return train, val, test, val_neg, test_neg

In [None]:
train, val, test, val_neg, test_neg = train_test_split(ratings, implicit_ratings, user_item)

HBox(children=(FloatProgress(value=0.0, max=6040.0), HTML(value='')))




In [None]:
val_neg_ = {u: [val[u]] + val_neg[u].tolist() for u in val_neg.keys()}
test_neg_ = {u: [test[u]] + test_neg[u].tolist() for u in test_neg.keys()}

In [None]:
train_t_csr = train.T.tocsr()
train_csr = train.tocsr()

### Save

In [None]:
def save(obj, name):
    with open('data/' + name + '.pkl', 'wb') as f:
        pickle.dump(obj, f)

In [None]:
# sp.save_npz('data/train_csr.npz', train_csr)
# sp.save_npz('data/train_t_csr.npz', train_t_csr)
# save(val, 'val')
# save(test, 'test')
# save(val_neg, 'val_neg')
# save(test_neg, 'test_neg')
# save(val_neg_, 'val_neg_100')
# save(test_neg_, 'test_neg_100')

### Load

In [10]:
def load(name):
    with open('data/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [29]:
train = sp.load_npz('data/train_csr.npz')
train_csr = sp.load_npz('data/train_t_csr.npz')
val = load('val')
test = load('test')
val_neg = load('val_neg')
test_neg = load('test_neg')
val_neg_ = load('val_neg_100')
test_neg_ = load('test_neg_100')

## Evaluation

In [12]:
def HitRatio(predicted, real, k):
    for item in predicted[:k]:
        if item == real:
            return 1
    return 0

In [13]:
def nDCG(predicted, real, k):
    for i in range(len(predicted[:k])):
        item = predicted[i]
        if item == real:
            return math.log(2) / math.log(i+2)
    return 0

In [16]:
def evaluate(model, test, negative, k=10):
    hrs, ndcgs = [], []
    for u in test.keys():
        user_vect = np.ones(100) * u
        try:
            pred = model.predict(user_vect, negative[u]).cpu().detach().numpy()
        except AttributeError:
            pred = model.predict(user_vect, negative[u])
        rank = np.array(negative[u])[np.argsort(pred)[::-1]]
        y = test[u]
        hr = HitRatio(rank, y, k)
        ndcg = nDCG(rank, y, k)
        hrs.append(hr)
        ndcgs.append(ndcg)
    return sum(hrs) / len(hrs), np.mean(ndcgs)

## Testing

In [116]:
get_user_history(4, implicit_ratings)

['3468 Hustler, The (1961)',
 '2951 Fistful of Dollars, A (1964)',
 '1214 Alien (1979)',
 '1036 Die Hard (1988)',
 '260 Star Wars: Episode IV - A New Hope (1977)',
 '2028 Saving Private Ryan (1998)',
 '480 Jurassic Park (1993)',
 '1198 Raiders of the Lost Ark (1981)',
 '1954 Rocky (1976)',
 '1097 E.T. the Extra-Terrestrial (1982)',
 '3418 Thelma & Louise (1991)',
 '3702 Mad Max (1979)',
 '2366 King Kong (1933)',
 '1387 Jaws (1975)',
 '1201 Good, The Bad and The Ugly, The (1966)',
 '2692 Run Lola Run (Lola rennt) (1998)',
 '2947 Goldfinger (1964)',
 '1240 Terminator, The (1984)']

In [70]:
def testing(model, name, test, negative):
    print('MODEL:', name)
    hr, ndcg = evaluate(model, test, negative)
    print(f'Metrics HR: {hr:.2f}, nDCG: {ndcg:.2f}')
    movie_id = 1
    movie = movie_info[movie_info['movie_id'] == movie_id].to_dict('records')[0]
    items = model.similar_items(movie_id)
    df = pd.DataFrame()
    display(movie_info[movie_info['movie_id'] == 0])
    for item in items: 
        df = df.append(movie_info[movie_info['movie_id'] == item])
    display(df)
    user_id = 4
    print(f'RECOMMENDATIONS for user {user_id}')
    recs = model.recommend(user_id)
    df = pd.DataFrame()
    for item in recs: 
        df = df.append(movie_info[movie_info['movie_id'] == item])
    display(df)

# 2. BPR

In [17]:
from lightfm import LightFM



In [18]:
class RecommenderBPR:
    def __init__(self, model, negative):
        self.model = model
        self.u = model.user_embeddings
        self.v = model.item_embeddings
        self.model.negative = negative
        
    def fit(self, data, epochs):
        self.model.fit_partial(data, epochs=epochs)
        self.u = self.model.user_embeddings
        self.v = self.model.item_embeddings
        
    def predict(self, user,  item):
        return self.model.predict(user, item)

    def recommend(self, user_id, k=10):
        neg = self.model.negative[user_id]
        scores = self.v[neg] @ self.u[user_id]
        ids = np.argsort(scores)[::-1][:k]
        return np.array(neg)[ids]
    
    def similar_items(self, item_id, N=10):
        scores = self.v @ self.v[item_id] / np.linalg.norm(self.v, axis=-1)
        ind = np.argsort(scores)[::-1][:N]
        return ind

In [72]:
bpr_params = {
    'n_components': 64, 
    'lr': 1e-3,
    'schedule': 'adadelta',
    'alpha': 1e-3, 
    'epochs': 100
}

In [73]:
bpr = LightFM(loss='bpr', 
              no_components=bpr_params['n_components'], 
              learning_rate=bpr_params['lr'], 
              learning_schedule=bpr_params['schedule'],
              user_alpha=bpr_params['alpha'],
              item_alpha=bpr_params['alpha'])
bpr_rec = RecommenderBPR(bpr, val_neg_)
bpr_rec.fit(train, epochs=bpr_params['epochs'])

In [74]:
testing(bpr_rec, 'BPR', val, val_neg_)

MODEL: BPR
Metrics HR: 0.49, nDCG: 0.28




Unnamed: 0,movie_id,name,category


Unnamed: 0,movie_id,name,category
690,699,To Cross the Rubicon (1991),Drama
1520,1559,"Next Step, The (1995)",Drama
694,703,Boys (1996),Drama
1519,1558,Sudden Manhattan (1996),Comedy
1518,1557,Squeeze (1996),Drama
2911,2980,Men Cry Bullets (1997),Drama
703,712,Captives (1994),Drama
704,713,Of Love and Shadows (1994),Drama


RECOMMENDATIONS for user 4


Unnamed: 0,movie_id,name,category
1162,1178,Paths of Glory (1957),Drama|War
108,110,Braveheart (1995),Action|Drama|War
2788,2857,Yellow Submarine (1968),Animation|Musical
2052,2121,Cujo (1983),Horror|Thriller
2004,2073,Fandango (1985),Comedy
589,593,"Silence of the Lambs, The (1991)",Drama|Thriller
3407,3476,Jacob's Ladder (1990),Horror|Mystery|Thriller
1186,1204,Lawrence of Arabia (1962),Adventure|War
2878,2947,Goldfinger (1964),Action
2176,2245,Working Girl (1988),Comedy|Drama


# 2. NCF

https://arxiv.org/pdf/1708.05031.pdf

In [30]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [31]:
class GFM(nn.Module):
    def __init__(self, n_users, n_items, dim):
        super().__init__()
        self.users_embeddings = nn.Embedding(n_users, dim)
        self.items_embeddings = nn.Embedding(n_items, dim)
        self.f = nn.Sequential(nn.Linear(dim, 1), nn.Sigmoid())

        self.u = self.users_embeddings.weight.detach().cpu().numpy()
        self.v = self.items_embeddings.weight.detach().cpu().numpy()
    
    def forward(self, user, item):
        user = self.users_embeddings(user)
        item = self.items_embeddings(item)
        out = user * item
        out = self.f(out).view(-1)
        return out

    def predict(self, user, item):
        user = torch.tensor(user).to(device).long()
        item = torch.tensor(item).to(device).long()
        return self(user, item)

In [32]:
class MLP(nn.Module):
    def __init__(self, n_users, n_items, embed_dim, hidden_dim):
        super().__init__()
        self.users_embeddings = nn.Embedding(n_users, embed_dim)
        self.items_embeddings = nn.Embedding(n_items, embed_dim)
        
        layers = [nn.Linear(embed_dim * 2, hidden_dim[0])]
        for dim_in, dim_out in zip(hidden_dim[:-1], hidden_dim[1:]):
            layers.append(nn.Linear(dim_in, dim_out))
        layers.append(nn.ReLU())
        self.layers = nn.Sequential(*layers)
        
        self.f = nn.Sequential(nn.Linear(hidden_dim[-1], 1), nn.Sigmoid())

        self.u = self.users_embeddings.weight.detach().cpu().numpy()
        self.v = self.items_embeddings.weight.detach().cpu().numpy()
    
    def forward(self, user, item):
        user = self.users_embeddings(user)
        item = self.items_embeddings(item)
        out =  torch.cat((user, item), -1)
        out = self.layers(out)
        out = self.f(out).view(-1)
        return out
    
    def predict(self, user, item):
        user = torch.tensor(user).to(device).long()
        item = torch.tensor(item).to(device).long()
        return self(user, item)

In [45]:
class NeuCF(nn.Module):
    def __init__(self, n_users, n_items, dim_gfm, dim_mlp, hidden_dim, gfm_path, mlp_path):
        super().__init__()
        self.gfm = GFM(n_users, n_items, dim_gfm).to(device)
        self.mlp = MLP(n_users, n_items, dim_mlp, hidden_dim).to(device)
        self.f = nn.Sequential(nn.Linear(hidden_dim[-1] + dim_gfm, 1), nn.Sigmoid())
        
        self.gfm.load_state_dict(torch.load(gfm_path, map_location=torch.device('cpu')))
        self.mlp.load_state_dict(torch.load(mlp_path, map_location=torch.device('cpu')))

        self.u = np.hstack((self.gfm.users_embeddings.weight.detach().cpu().numpy(), 
                            self.mlp.users_embeddings.weight.detach().cpu().numpy()))
        self.v = np.hstack((self.gfm.items_embeddings.weight.detach().cpu().numpy(), 
                            self.mlp.items_embeddings.weight.detach().cpu().numpy()))
        
    def forward(self, user, item):
        user_gfm = self.gfm.users_embeddings(user)
        item_gfm = self.gfm.items_embeddings(item)
        out_gfm = user_gfm * item_gfm
    
        user_mlp = self.mlp.users_embeddings(user)
        item_mlp = self.mlp.items_embeddings(item)
        out_mlp = torch.cat((user_mlp, item_mlp), -1)
        out_mlp = self.mlp.layers(out_mlp)
        
        out = torch.cat((out_gfm, out_mlp), -1)
        out = self.f(out).view(-1)
        return out

    def predict(self, user, item):
        user = torch.tensor(user).to(device).long()
        item = torch.tensor(item).to(device).long()
        return self(user, item)
    

In [34]:
class RecommenderNCF(nn.Module):
    def __init__(self, model, negative):
        super().__init__()
        self.model = model.to(device)
        self.negative = negative
        self.model.negative = negative

    def fit(self, train, test, params):
        epochs, lr, batch_size = params['epochs'], params['lr'], params['batch_size']
        criterion = nn.BCELoss()
        optimizer = optim.Adam(self.model.parameters(), lr)
        trainloader = torch.utils.data.DataLoader(train, 
                                                  batch_size=batch_size, 
                                                  shuffle=True)
        for epoch in tqdm(range(epochs)):
            losses = []
            self.model.train()
            for x in trainloader:
                user = x[:, 0].to(device)
                item = x[:, 1].to(device)
                label = x[:, 2].float().to(device)

                self.model.zero_grad()
                prediction = self.model(user, item)
                loss = criterion(prediction, label)
                loss.backward()
                optimizer.step()
                losses.append(loss.item())
            
            print(f'Epoch {epoch} - Loss: {np.mean(losses)}')

    def predict(self, user, item):
        return self.model.predict(user, item)

    def evaluate(self, test, k=10):
        return evaluate(self.model, test, self.negative)
    
    def recommend(self, user_id, k=10):
        neg = self.model.negative[user_id]
        scores = self.model.v[neg] @ self.model.u[user_id]
        ids = np.argsort(scores)[::-1][:k]
        return np.array(neg)[ids]

    def similar_items(self, item_id, k=10):
        scores = self.model.v @ self.model.v[item_id] / np.linalg.norm(self.model.v, axis=-1)
        ids = np.argsort(scores)[::-1][:k]
        return ids

In [35]:
def get_data_ncf(data):
    new_data = []
    for i, user in enumerate(data):
        pos = user.indices
        positive = [(i, pos_, 1) for pos_ in user.indices]
        if len(positive) > 0:
            neg = np.setdiff1d(np.arange(data.shape[1]), pos)
            negative = np.random.choice(neg, len(pos))
            negative = [(i, neg, 0) for neg in negative]
            result = np.vstack((positive, negative))
            new_data.append(result)
    return np.vstack(new_data)

In [70]:
train_data = get_data_ncf(train)

In [36]:
gfm_params = {
    'epochs': 40, 
    'lr': 1e-3, 
    'batch_size': 256,
    'dim': 64
}

In [111]:
gfm = GFM(max(users) + 1, max(movies) + 1, gfm_params['dim'])
gfm_rec = RecommenderNCF(gfm, val_neg_)
gfm_rec.fit(train_data, val, gfm_params)

HBox(children=(FloatProgress(value=0.0, max=40.0), HTML(value='')))

Epoch 0 - Loss: 0.6942116227517178
Epoch 1 - Loss: 0.693325893410982
Epoch 2 - Loss: 0.6885083969724769
Epoch 3 - Loss: 0.6380461905285316
Epoch 4 - Loss: 0.5235764944163414
Epoch 5 - Loss: 0.4471036328382787
Epoch 6 - Loss: 0.4077387634717243
Epoch 7 - Loss: 0.3809677195760241
Epoch 8 - Loss: 0.3570705567575866
Epoch 9 - Loss: 0.3334843856483522
Epoch 10 - Loss: 0.31036335337519483
Epoch 11 - Loss: 0.2884281526832947
Epoch 12 - Loss: 0.26805929745167073
Epoch 13 - Loss: 0.24936053977736772
Epoch 14 - Loss: 0.2321708651021739
Epoch 15 - Loss: 0.2164127679016172
Epoch 16 - Loss: 0.2018886013519271
Epoch 17 - Loss: 0.18847016688911875
Epoch 18 - Loss: 0.17609593509503896
Epoch 19 - Loss: 0.16457870394763272
Epoch 20 - Loss: 0.1539133639006737
Epoch 21 - Loss: 0.14395964915175727
Epoch 22 - Loss: 0.1346926526705997
Epoch 23 - Loss: 0.12599670665608595
Epoch 24 - Loss: 0.11796115648961106
Epoch 25 - Loss: 0.11030222744727909
Epoch 26 - Loss: 0.10314540490880439
Epoch 27 - Loss: 0.096384494

In [117]:
torch.save(gfm.state_dict(), 'gfm.pcl')

In [37]:
gfm = GFM(max(users) + 1, max(movies) + 1, gfm_params['dim'])
gfm_rec = RecommenderNCF(gfm, val_neg_)
gfm.load_state_dict((torch.load('models/gfm.pcl', map_location=torch.device('cpu'))))

<All keys matched successfully>

In [112]:
testing(gfm_rec, 'GFM', val, val_neg_)

MODEL: GFM
Metrics HR: 0.46, nDCG: 0.35
SIMILARS for Toy Story (1995)


Unnamed: 0,movie_id,name,category
0,1,Toy Story (1995),Animation|Children's|Comedy
1396,1420,Message to Love: The Isle of Wight Festival (1...,Documentary
2626,2695,"Boys, The (1997)",Drama
2371,2440,Another Day in Paradise (1998),Drama
3176,3245,I Am Cuba (Soy Cuba/Ya Kuba) (1964),Drama
367,371,"Paper, The (1994)",Comedy|Drama
1457,1487,Selena (1997),Drama|Musical
3676,3745,Titan A.E. (2000),Adventure|Animation|Sci-Fi
347,351,"Corrina, Corrina (1994)",Comedy|Drama|Romance
1841,1910,I Went Down (1997),Action|Comedy|Crime


RECOMMENDATIONS for user 4


Unnamed: 0,movie_id,name,category
3271,3340,Bride of the Monster (1956),Horror|Sci-Fi
3498,3567,Bossa Nova (1999),Comedy
2004,2073,Fandango (1985),Comedy
981,993,Infinity (1996),Drama
3428,3497,Max Dugan Returns (1983),Comedy
210,212,Bushwhacked (1995),Comedy
2176,2245,Working Girl (1988),Comedy|Drama
2566,2635,"Mummy's Curse, The (1944)",Horror
1241,1261,Evil Dead II (Dead By Dawn) (1987),Action|Adventure|Comedy|Horror
494,498,Mr. Jones (1993),Drama|Romance


In [39]:
mlp_params = {
    'epochs': 50, 
    'lr': 1e-3, 
    'batch_size': 512,
    'dim': 32,
    'hidden': [32, 64, 32]
}

In [41]:
mlp.load_state_dict((torch.load('models/mlp.pcl', map_location=torch.device('cpu'))))

<All keys matched successfully>

In [144]:
mlp = MLP(max(users) + 1, max(movies) + 1, mlp_params['dim'], hidden_dim=mlp_params['hidden'])
mlp_rec = RecommenderNCF(mlp, val_neg_)
mlp_rec.fit(train_data, val, mlp_params)

HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))

Epoch 0 - Loss: 0.485973902029755
Epoch 1 - Loss: 0.42894440129744577
Epoch 2 - Loss: 0.42366890934909923
Epoch 3 - Loss: 0.4186856179603497
Epoch 4 - Loss: 0.41551534034673976
Epoch 5 - Loss: 0.4135228179087156
Epoch 6 - Loss: 0.4120006374151584
Epoch 7 - Loss: 0.41079068190669105
Epoch 8 - Loss: 0.4097852252359447
Epoch 9 - Loss: 0.4087192204690749
Epoch 10 - Loss: 0.40756241076104155
Epoch 11 - Loss: 0.4065418306641663
Epoch 12 - Loss: 0.40536035668411236
Epoch 13 - Loss: 0.40418860229618275
Epoch 14 - Loss: 0.40281118116071146
Epoch 15 - Loss: 0.4013593032218174
Epoch 16 - Loss: 0.3997511412543852
Epoch 17 - Loss: 0.3979722398186423
Epoch 18 - Loss: 0.39571417212107135
Epoch 19 - Loss: 0.393110670024078
Epoch 20 - Loss: 0.38986074680742594
Epoch 21 - Loss: 0.3862402212587285
Epoch 22 - Loss: 0.3821697990794661
Epoch 23 - Loss: 0.37808046991962674
Epoch 24 - Loss: 0.37419942743070883
Epoch 25 - Loss: 0.3706341032031881
Epoch 26 - Loss: 0.36712963550754807
Epoch 27 - Loss: 0.36428209

In [146]:
torch.save(mlp.state_dict(), 'mlp.pcl')

In [145]:
testing(mlp_rec, 'MLP', val, val_neg_)

MODEL: MLP
Metrics HR: 0.60, nDCG: 0.32
SIMILARS for Toy Story (1995)


Unnamed: 0,movie_id,name,category
0,1,Toy Story (1995),Animation|Children's|Comedy
3280,3349,"Perils of Pauline, The (1947)",Comedy
945,957,"Scarlet Letter, The (1926)",Drama
1718,1774,Mass Transit (1998),Comedy|Drama
37,38,It Takes Two (1995),Comedy
258,261,Little Women (1994),Drama
3195,3264,Buffy the Vampire Slayer (1992),Comedy|Horror
1517,1556,Speed 2: Cruise Control (1997),Action|Romance|Thriller
2890,2959,Fight Club (1999),Drama
621,626,"Thin Line Between Love and Hate, A (1996)",Comedy


RECOMMENDATIONS for user 4


Unnamed: 0,movie_id,name,category
3800,3870,Our Town (1940),Drama
2878,2947,Goldfinger (1964),Action
1028,1041,Secrets & Lies (1996),Drama
1975,2044,"Devil and Max Devlin, The (1981)",Comedy
2028,2097,Something Wicked This Way Comes (1983),Children's|Horror
2263,2332,Belly (1998),Crime|Drama
3121,3190,Supernova (2000),Adventure|Sci-Fi
2983,3052,Dogma (1999),Comedy
2582,2651,Frankenstein Meets the Wolf Man (1943),Horror
3220,3289,Not One Less (Yi ge dou bu neng shao) (1999),Drama


In [42]:
neucf_params = {
    'epochs': 50, 
    'lr': 1e-3, 
    'batch_size': 512
}

In [47]:
neucf.load_state_dict((torch.load('models/neucf.pcl', map_location=torch.device('cpu'))))

<All keys matched successfully>

In [175]:
neucf = NeuCF(max(users) + 1, max(movies) + 1, gfm_params['dim'], mlp_params['dim'], 
              mlp_params['hidden'], 'gfm.pcl', 'mlp.pcl')
neucf_rec = RecommenderNCF(neucf, val_neg_)
neucf_rec.fit(train_data, val, neucf_params)

HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))

Epoch 0 - Loss: 0.12285264727677718
Epoch 1 - Loss: 0.04820799645627157
Epoch 2 - Loss: 0.03680428043126329
Epoch 3 - Loss: 0.03134199684576344
Epoch 4 - Loss: 0.027121150848802275
Epoch 5 - Loss: 0.023655867119654436
Epoch 6 - Loss: 0.02040692615339271
Epoch 7 - Loss: 0.01753057661104096
Epoch 8 - Loss: 0.015009544657137937
Epoch 9 - Loss: 0.012881798573220821
Epoch 10 - Loss: 0.010696970867517759
Epoch 11 - Loss: 0.008944657900756135
Epoch 12 - Loss: 0.007396555377641394
Epoch 13 - Loss: 0.006175111641028159
Epoch 14 - Loss: 0.005096906569243909
Epoch 15 - Loss: 0.004273468996323573
Epoch 16 - Loss: 0.003609426757796261
Epoch 17 - Loss: 0.0031434090497081796
Epoch 18 - Loss: 0.002708900010942954
Epoch 19 - Loss: 0.0024365567099372825
Epoch 20 - Loss: 0.0022775197689256345
Epoch 21 - Loss: 0.0020782579414579602
Epoch 22 - Loss: 0.00197083242368112
Epoch 23 - Loss: 0.0018811681891641257
Epoch 24 - Loss: 0.0018279055967129342
Epoch 25 - Loss: 0.001776070792911692
Epoch 26 - Loss: 0.0017

In [176]:
testing(neucf_rec, 'NeuCF', val, val_neg_)

MODEL: NeuCF
Metrics HR: 0.45, nDCG: 0.45
SIMILARS for Toy Story (1995)


Unnamed: 0,movie_id,name,category
0,1,Toy Story (1995),Animation|Children's|Comedy
2327,2396,Shakespeare in Love (1998),Comedy|Romance
1250,1270,Back to the Future (1985),Comedy|Sci-Fi
591,595,Beauty and the Beast (1991),Animation|Children's|Musical
1058,1073,Willy Wonka and the Chocolate Factory (1971),Adventure|Children's|Comedy|Fantasy
2502,2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller
1081,1097,E.T. the Extra-Terrestrial (1982),Children's|Drama|Fantasy|Sci-Fi
1111,1127,"Abyss, The (1989)",Action|Adventure|Sci-Fi|Thriller
1656,1704,Good Will Hunting (1997),Drama
585,589,Terminator 2: Judgment Day (1991),Action|Sci-Fi|Thriller


RECOMMENDATIONS for user 4


Unnamed: 0,movie_id,name,category
3868,3938,"Slumber Party Massacre, The (1982)",Horror
3800,3870,Our Town (1940),Drama
1244,1264,Diva (1981),Action|Drama|Mystery|Romance|Thriller
1479,1514,Temptress Moon (Feng Yue) (1996),Romance
1831,1900,"Children of Heaven, The (Bacheha-Ye Aseman) (1...",Drama
2263,2332,Belly (1998),Crime|Drama
2828,2897,And the Ship Sails On (E la nave va) (1984),Comedy|War
1214,1233,"Boat, The (Das Boot) (1981)",Action|Drama|War
284,287,Nina Takes a Lover (1994),Comedy|Romance
210,212,Bushwhacked (1995),Comedy


In [177]:
torch.save(neucf.state_dict(), 'neucf.pcl')

# Результаты

Сравним результаты на test

In [48]:
def new_negative(model, negative): 
    model.negative = negative
    model.model.negative = negative
    return model

In [49]:
bpr_rec = new_negative(bpr_rec, test_neg_)
gfm_rec = new_negative(gfm_rec, test_neg_)
mlp_rec = new_negative(mlp_rec, test_neg_)
neucf_rec = new_negative(neucf_rec, test_neg_)

Еще раз посмотрим на фильмы, которые смотрел пользователь 4

In [50]:
get_user_history(4, implicit_ratings)

['3468 Hustler, The (1961)',
 '2951 Fistful of Dollars, A (1964)',
 '1214 Alien (1979)',
 '1036 Die Hard (1988)',
 '260 Star Wars: Episode IV - A New Hope (1977)',
 '2028 Saving Private Ryan (1998)',
 '480 Jurassic Park (1993)',
 '1198 Raiders of the Lost Ark (1981)',
 '1954 Rocky (1976)',
 '1097 E.T. the Extra-Terrestrial (1982)',
 '3418 Thelma & Louise (1991)',
 '3702 Mad Max (1979)',
 '2366 King Kong (1933)',
 '1387 Jaws (1975)',
 '1201 Good, The Bad and The Ugly, The (1966)',
 '2692 Run Lola Run (Lola rennt) (1998)',
 '2947 Goldfinger (1964)',
 '1240 Terminator, The (1984)']

### BPR

In [51]:
testing(bpr_rec, 'BPR', test, test_neg_)

MODEL: BPR
Metrics HR: 0.42, nDCG: 0.27
SIMILARS for Toy Story (1995)




Unnamed: 0,movie_id,name,category
541,545,Harlem (1993),Drama
2550,2619,Mascara (1999),Drama
3342,3411,Babymother (1998),Drama
3338,3407,"Carriers Are Waiting, The (Les Convoyeurs Atte...",Comedy|Drama
2562,2631,Frogs for Snakes (1998),Comedy|Film-Noir|Thriller
1365,1386,Terror in a Texas Town (1958),Western
1362,1383,Adrenalin: Fear the Rush (1996),Action|Sci-Fi
572,576,Fausto (1993),Comedy
575,579,"Scorta, La (1993)",Thriller


RECOMMENDATIONS for user 4


Unnamed: 0,movie_id,name,category
3444,3513,Rules of Engagement (2000),Drama|Thriller
3141,3210,Fast Times at Ridgemont High (1982),Comedy
329,333,Tommy Boy (1995),Comedy
543,547,Surviving the Game (1994),Action|Adventure|Thriller
1708,1762,Deep Rising (1998),Action|Horror|Sci-Fi
2711,2780,"Raven, The (1963)",Comedy|Horror
2462,2531,Battle for the Planet of the Apes (1973),Action|Sci-Fi
139,141,"Birdcage, The (1996)",Comedy
3276,3345,"Charlie, the Lonesome Cougar (1967)",Adventure|Children's
1663,1711,Midnight in the Garden of Good and Evil (1997),Comedy|Crime|Drama|Mystery


### GFM

In [52]:
testing(gfm_rec, 'GFM', test, test_neg_)

MODEL: GFM
Metrics HR: 0.41, nDCG: 0.31
SIMILARS for Toy Story (1995)


Unnamed: 0,movie_id,name,category
0,1,Toy Story (1995),Animation|Children's|Comedy
2327,2396,Shakespeare in Love (1998),Comedy|Romance
10,11,"American President, The (1995)",Comedy|Drama|Romance
1058,1073,Willy Wonka and the Chocolate Factory (1971),Adventure|Children's|Comedy|Fantasy
591,595,Beauty and the Beast (1991),Animation|Children's|Musical
33,34,Babe (1995),Children's|Comedy|Drama
1250,1270,Back to the Future (1985),Comedy|Sci-Fi
2530,2599,Election (1999),Comedy
1388,1411,Hamlet (1996),Drama
907,919,"Wizard of Oz, The (1939)",Adventure|Children's|Drama|Musical


RECOMMENDATIONS for user 4


Unnamed: 0,movie_id,name,category
1708,1762,Deep Rising (1998),Action|Horror|Sci-Fi
3613,3682,Magnum Force (1973),Western
73,74,Bed of Roses (1996),Drama|Romance
67,68,French Twist (Gazon maudit) (1995),Comedy|Romance
1831,1900,"Children of Heaven, The (Bacheha-Ye Aseman) (1...",Drama
2277,2346,"Stepford Wives, The (1975)",Sci-Fi|Thriller
3863,3933,"Killer Shrews, The (1959)",Horror|Sci-Fi
337,341,Double Happiness (1994),Drama
1692,1743,Arguing the World (1996),Documentary
3872,3942,Sorority House Massacre II (1990),Horror


### MLP

In [54]:
testing(mlp_rec, 'MLP', test, test_neg_)

MODEL: MLP
Metrics HR: 0.51, nDCG: 0.26
SIMILARS for Toy Story (1995)


Unnamed: 0,movie_id,name,category
0,1,Toy Story (1995),Animation|Children's|Comedy
2890,2959,Fight Club (1999),Drama
912,924,2001: A Space Odyssey (1968),Drama|Mystery|Sci-Fi|Thriller
373,377,Speed (1994),Action|Romance|Thriller
1335,1356,Star Trek: First Contact (1996),Action|Adventure|Sci-Fi
1533,1573,Face/Off (1997),Action|Sci-Fi|Thriller
2502,2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller
1656,1704,Good Will Hunting (1997),Drama
159,161,Crimson Tide (1995),Drama|Thriller|War
1202,1220,"Blues Brothers, The (1980)",Action|Comedy|Musical


RECOMMENDATIONS for user 4


Unnamed: 0,movie_id,name,category
1671,1720,Time Tracers (1995),Action|Adventure|Sci-Fi
3015,3084,Home Page (1999),Documentary
955,967,"Outlaw, The (1943)",Western
1278,1298,Pink Floyd - The Wall (1982),Drama|Musical|War
765,775,Spirits of the Dead (Tre Passi nel Delirio) (1...,Horror
1679,1728,"Winter Guest, The (1997)",Drama
71,72,Kicking and Screaming (1995),Comedy|Drama
2780,2849,Queens Logic (1991),Comedy|Drama
1239,1259,Stand by Me (1986),Adventure|Comedy|Drama
3142,3211,"Cry in the Dark, A (1988)",Drama


### NeuCF

In [55]:
testing(neucf_rec, 'NeuCF', test, test_neg_)

MODEL: NeuCF
Metrics HR: 0.41, nDCG: 0.40
SIMILARS for Toy Story (1995)


Unnamed: 0,movie_id,name,category
0,1,Toy Story (1995),Animation|Children's|Comedy
2327,2396,Shakespeare in Love (1998),Comedy|Romance
1250,1270,Back to the Future (1985),Comedy|Sci-Fi
591,595,Beauty and the Beast (1991),Animation|Children's|Musical
1058,1073,Willy Wonka and the Chocolate Factory (1971),Adventure|Children's|Comedy|Fantasy
2502,2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller
1081,1097,E.T. the Extra-Terrestrial (1982),Children's|Drama|Fantasy|Sci-Fi
1111,1127,"Abyss, The (1989)",Action|Adventure|Sci-Fi|Thriller
1656,1704,Good Will Hunting (1997),Drama
585,589,Terminator 2: Judgment Day (1991),Action|Sci-Fi|Thriller


RECOMMENDATIONS for user 4


Unnamed: 0,movie_id,name,category
1708,1762,Deep Rising (1998),Action|Horror|Sci-Fi
1831,1900,"Children of Heaven, The (Bacheha-Ye Aseman) (1...",Drama
1692,1743,Arguing the World (1996),Documentary
1671,1720,Time Tracers (1995),Action|Adventure|Sci-Fi
2277,2346,"Stepford Wives, The (1975)",Sci-Fi|Thriller
73,74,Bed of Roses (1996),Drama|Romance
1134,1150,"Return of Martin Guerre, The (Retour de Martin...",Drama
67,68,French Twist (Gazon maudit) (1995),Comedy|Romance
3872,3942,Sorority House Massacre II (1990),Horror
2711,2780,"Raven, The (1963)",Comedy|Horror


У NeuCF лучший nDCG, но я где-то налажала и везде симилары и рекомендации не очень, но я пыталась(.  Мб стоило подобрать другие параметры