# Matrix Factorization и Factorization Machines

План семинара:
1. Теоретическое введение в Factorization Machines
2. Построение рекомендательной модели с помощью VW
3. Построение рекомендательной модели на основе SVD

In [1]:
import numpy as np
import pandas as pd
from scipy.sparse import coo_matrix, csr_matrix, csc_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity

%matplotlib inline

## Подготовка данных

In [2]:
df = pd.read_csv("ml-20m/ratings.csv")

In [3]:
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580


In [4]:
# Выбросим фильмы с < 5 рейтингами
group_sizes = df.groupby("movieId").size().reset_index()
blacklist = group_sizes[group_sizes[0] < 5].movieId
df = df[~df.movieId.isin(blacklist)]

In [5]:
print("Unique user count", len(df["userId"].unique()))
print("Unique movie count", len(df["movieId"].unique()))

Unique user count 138493
Unique movie count 18345


In [None]:
index_to_item_id_mapping = [x for x in df["movieId"].sort_values().unique()]
item_id_to_index_mapping = {}
for index, value in enumerate(index_to_item_id_mapping, start=1):
    item_id_to_index_mapping[int(value)] = index

assert len(index_to_item_id_mapping) == 18345

In [None]:
df = pd.DataFrame.from_records(
    {"userId": row.userId, "movieId": item_id_to_index_mapping[int(row.movieId)], "rating": row.rating}
    for row in df.itertuples())

In [None]:
# Разобьем выборку на train & test выбросив случайные 7 рейтингов для каждого пользователя

df["random"] = np.random.random(size=(df.shape[0], 1))
idx = df.sort_values(by="random") \
    .reset_index() \
    .groupby("userId") \
    .head(n=7)["index"]

mask = df.index.isin(idx)
train_df = df[~mask]
test_df = df[mask]

In [None]:
train_df[["userId", "movieId", "rating"]].to_csv("ml20m_train.csv", header=False, index=False)
test_df[["userId", "movieId", "rating"]].to_csv("ml20m_test.csv", header=False, index=False)

## Сборка и установка VW

1. Можно собрать из исходников, они тут:
https://github.com/JohnLangford/vowpal_wabbit/releases
2. Можно установить готовые бинарники:

  Команда для Ubuntu: **apt-get install vowpal-wabbit**

  Команада для Mac OS X: **brew install vowpal-wabbit** (предварительно надо установить brew: http://brew.sh/)

После установки нужно проверить, что бинарник есть:

In [4]:
!vw --version

8.5.0


## Обучим VW

In [27]:
!awk -F"," '{printf "%f |u %d |i %d\n", $3,$1,$2}' < ml20m_train.csv | \
  vw /dev/stdin -b 18 -q ui --rank 30 --l2 0.001 --learning_rate 0.015 \
    --passes 5 --decay_learning_rate 0.97 --power_t 0 \
    --loss_function squared -f movielens.reg --cache_file movielens.cache

creating quadratic features for pairs: ui 
using l2 regularization = 0.001
final_regressor = movielens.reg
Num weight bits = 18
learning rate = 0.015
initial_t = 1
power_t = 0
decay_learning_rate = 0.97
using cache_file = movielens.cache
ignoring text input in favor of cache input
num sources = 1
average  since         example        example  current  current  current
loss     last          counter         weight    label  predict features
10.627020 10.627020            1            1.0   3.5000   0.2401       63
10.706633 10.786245            2            2.0   3.5000   0.2158       63
10.197121 9.687609            4            4.0   3.5000   0.4220       63
10.961847 11.726574            8            8.0   4.0000   0.6873       63
8.993563 7.025279           16           16.0   3.5000   1.1249       63
7.049418 5.105272           32           32.0   3.5000   1.8424       63
4.498373 1.947328           64           64.0   4.0000   2.8178       63
2.487681 0.476990          128        

In [28]:
!awk -F"," '{printf "%f |u %d |i %d\n", $3,$1,$2}' < ml20m_test.csv | \
  vw /dev/stdin -i movielens.reg -t

creating quadratic features for pairs: ui 
only testing
Num weight bits = 18
learning rate = 10
initial_t = 1
power_t = 0.5
using no cache
Reading datafile = /dev/stdin
num sources = 1
average  since         example        example  current  current  current
loss     last          counter         weight    label  predict features
0.049604 0.049604            1            1.0   3.5000   3.7227       63
0.063143 0.076681            2            2.0   3.5000   3.7769       63
0.117095 0.171048            4            4.0   4.0000   3.6406       63
0.148731 0.180367            8            8.0   5.0000   4.2999       63
0.257157 0.365583           16           16.0   5.0000   4.6183       63
0.291124 0.325091           32           32.0   5.0000   4.7100       63
0.668339 1.045555           64           64.0   4.0000   4.2800       63
0.659109 0.649879          128          128.0   2.0000   4.1230       63
0.724413 0.789716          256          256.0   3.0000   3.9349       63
0.734581 0.7

In [29]:
np.sqrt(0.739978)

0.86021973936895912

RMSE ~0.86, что хорошо!

## Сравним с простым SVD

In [42]:
user_means = pd.DataFrame(train_df.groupby("userId").mean()["rating"]).reset_index()
work_df = pd.merge(train_df, user_means, on="userId")

In [43]:
work_df.head()

Unnamed: 0,movieId,rating_x,userId,random,rating_y
0,29,3.5,1,0.535317,3.741071
1,32,3.5,1,0.533745,3.741071
2,47,3.5,1,0.559406,3.741071
3,50,3.5,1,0.588092,3.741071
4,150,4.0,1,0.513574,3.741071


In [44]:
work_df["r"] = work_df["rating_x"] - work_df["rating_y"]

In [45]:
train_M = csr_matrix(
    (work_df["r"], (work_df["userId"] - 1, work_df["movieId"] - 1)), shape=(138493, 18345))

In [46]:
from scipy.sparse.linalg import svds

U, S, Vt = svds(train_M, k=30)

sum_ = 0
count = 0

for user_id, group in test_df.groupby("userId"):
    uvec = U[user_id - 1, :] * S
    
    movie_ids = group["movieId"].values - 1
    rating = group["rating"].values
    
    predicted = uvec.dot(Vt[:, movie_ids]) + \
        user_means[user_means["userId"] == user_id]["rating"].values[0]

    diff = (predicted - rating) ** 2
    sum_ += np.sum(diff)
    count += len(diff)

In [47]:
np.sqrt(sum_ / count)

0.95067281590394248

## Рекомендации

Нам понадобится g++ (и boost), чтобы собрать один примерчик. Установка boost аналогична установке VW:

  Команда для Ubuntu: **apt-get install libboost-all-dev**

  Команада для Mac OS X: **brew install boost** (предварительно надо установить brew: http://brew.sh/)

In [51]:
!g++ --version

Configured with: --prefix=/Library/Developer/CommandLineTools/usr --with-gxx-include-dir=/usr/include/c++/4.2.1
Apple LLVM version 8.1.0 (clang-802.0.42)
Target: x86_64-apple-darwin16.6.0
Thread model: posix
InstalledDir: /Library/Developer/CommandLineTools/usr/bin


In [30]:
!wget https://github.com/JohnLangford/vowpal_wabbit/archive/8.5.0.zip

--2017-12-11 15:28:10--  https://github.com/JohnLangford/vowpal_wabbit/archive/8.5.0.zip
Resolving github.com... 192.30.253.112, 192.30.253.113
Connecting to github.com|192.30.253.112|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://codeload.github.com/JohnLangford/vowpal_wabbit/zip/8.5.0 [following]
--2017-12-11 15:28:11--  https://codeload.github.com/JohnLangford/vowpal_wabbit/zip/8.5.0
Resolving codeload.github.com... 192.30.253.121, 192.30.253.120
Connecting to codeload.github.com|192.30.253.121|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘8.5.0.zip’

8.5.0.zip               [   <=>              ]  18.45M  44.9KB/s    in 3m 40s  

2017-12-11 15:31:51 (86.1 KB/s) - ‘8.5.0.zip’ saved [19350154]



In [32]:
!unzip 8.5.0.zip

Archive:  8.5.0.zip
769cba5b44450401500f39ee70c0950353368cb0
   creating: vowpal_wabbit-8.5.0/
  inflating: vowpal_wabbit-8.5.0/.editorconfig  
  inflating: vowpal_wabbit-8.5.0/.gitignore  
 extracting: vowpal_wabbit-8.5.0/.gitkeep  
  inflating: vowpal_wabbit-8.5.0/.travis.yml  
  inflating: vowpal_wabbit-8.5.0/AUTHORS  
  inflating: vowpal_wabbit-8.5.0/COPYING  
  inflating: vowpal_wabbit-8.5.0/INSTALL  
  inflating: vowpal_wabbit-8.5.0/LICENSE  
  inflating: vowpal_wabbit-8.5.0/Makefile.am  
  inflating: vowpal_wabbit-8.5.0/Makefile.in  
   creating: vowpal_wabbit-8.5.0/R/
  inflating: vowpal_wabbit-8.5.0/R/example.yaml  
   creating: vowpal_wabbit-8.5.0/R/examples/
  inflating: vowpal_wabbit-8.5.0/R/examples/vw_example.R  
  inflating: vowpal_wabbit-8.5.0/R/examples/vw_example_2.R  
   creating: vowpal_wabbit-8.5.0/R/r.vw/
  inflating: vowpal_wabbit-8.5.0/R/r.vw/DESCRIPTION  
  inflating: vowpal_wabbit-8.5.0/R/r.vw/NAMESPACE  
   creating: vowpal_wabbit-8.5.0/R/r.vw/R/
  inflating:

  inflating: vowpal_wabbit-8.5.0/cs/cli/vw_settings.h  
   creating: vowpal_wabbit-8.5.0/cs/common/
  inflating: vowpal_wabbit-8.5.0/cs/common/Bag.cs  
   creating: vowpal_wabbit-8.5.0/cs/common/Properties/
  inflating: vowpal_wabbit-8.5.0/cs/common/Properties/AssemblyInfo.cs  
   creating: vowpal_wabbit-8.5.0/cs/common/Reflection/
  inflating: vowpal_wabbit-8.5.0/cs/common/Reflection/InspectionHelper.cs  
  inflating: vowpal_wabbit-8.5.0/cs/common/Reflection/ReflectionHelper.cs  
  inflating: vowpal_wabbit-8.5.0/cs/common/Reflection/TypeDistance.cs  
  inflating: vowpal_wabbit-8.5.0/cs/common/Reflection/TypeMatch.cs  
   creating: vowpal_wabbit-8.5.0/cs/common/Serializer/
  inflating: vowpal_wabbit-8.5.0/cs/common/Serializer/FeatureExpression.cs  
  inflating: vowpal_wabbit-8.5.0/cs/common/Serializer/LabelExpression.cs  
  inflating: vowpal_wabbit-8.5.0/cs/common/Serializer/PropertyConfiguration.cs  
  inflating: vowpal_wabbit-8.5.0/cs/common/Serializer/Schema.cs  
  in

  inflating: vowpal_wabbit-8.5.0/cs/unittest/TestAllReduce.cs  
  inflating: vowpal_wabbit-8.5.0/cs/unittest/TestAntlr.cs  
  inflating: vowpal_wabbit-8.5.0/cs/unittest/TestArguments.cs  
  inflating: vowpal_wabbit-8.5.0/cs/unittest/TestAzure.cs  
  inflating: vowpal_wabbit-8.5.0/cs/unittest/TestBase.cs  
  inflating: vowpal_wabbit-8.5.0/cs/unittest/TestCbAdf.cs  
  inflating: vowpal_wabbit-8.5.0/cs/unittest/TestConfidence.cs  
  inflating: vowpal_wabbit-8.5.0/cs/unittest/TestConfigInspector.cs  
  inflating: vowpal_wabbit-8.5.0/cs/unittest/TestConsole.cs  
  inflating: vowpal_wabbit-8.5.0/cs/unittest/TestDynamic.cs  
  inflating: vowpal_wabbit-8.5.0/cs/unittest/TestErrorListener.cs  
  inflating: vowpal_wabbit-8.5.0/cs/unittest/TestExampleCache.cs  
  inflating: vowpal_wabbit-8.5.0/cs/unittest/TestExpansion.cs  
  inflating: vowpal_wabbit-8.5.0/cs/unittest/TestFeatureExtraction.cs  
  inflating: vowpal_wabbit-8.5.0/cs/unittest/TestJson.cs  
  inflating: vowpal_wabbit-8.

  inflating: vowpal_wabbit-8.5.0/demo/recall_tree/ltcb/wikifil.pl  
   creating: vowpal_wabbit-8.5.0/demo/recall_tree/odp/
  inflating: vowpal_wabbit-8.5.0/demo/recall_tree/odp/Makefile  
  inflating: vowpal_wabbit-8.5.0/demo/recall_tree/odp/README.md  
  inflating: vowpal_wabbit-8.5.0/demo/recall_tree/odp/do-oaa-hogwild  
  inflating: vowpal_wabbit-8.5.0/demo/recall_tree/odp/map  
  inflating: vowpal_wabbit-8.5.0/demo/recall_tree/odp/minishuf  
   creating: vowpal_wabbit-8.5.0/demo/recall_tree/wikipara/
  inflating: vowpal_wabbit-8.5.0/demo/recall_tree/wikipara/.gitignore  
  inflating: vowpal_wabbit-8.5.0/demo/recall_tree/wikipara/DocGenerator.py  
  inflating: vowpal_wabbit-8.5.0/demo/recall_tree/wikipara/Makefile  
  inflating: vowpal_wabbit-8.5.0/demo/recall_tree/wikipara/README.md  
  inflating: vowpal_wabbit-8.5.0/demo/recall_tree/wikipara/WikiExtractor.py  
  inflating: vowpal_wabbit-8.5.0/demo/recall_tree/wikipara/makeparadata.py  
  inflating: vowpal_wabbit-8.5.

  inflating: vowpal_wabbit-8.5.0/logo_assets/vowpal-wabbits-brand-image.jpg  
  inflating: vowpal_wabbit-8.5.0/logo_assets/vowpal-wabbits-color-palette.png  
  inflating: vowpal_wabbit-8.5.0/logo_assets/vowpal-wabbits-github-logo.png  
  inflating: vowpal_wabbit-8.5.0/logo_assets/vowpal-wabbits-github-logo@2x.png  
  inflating: vowpal_wabbit-8.5.0/logo_assets/vowpal-wabbits-github-logo@3x.png  
  inflating: vowpal_wabbit-8.5.0/logo_assets/vowpal-wabbits-icon-400x400.png  
  inflating: vowpal_wabbit-8.5.0/logo_assets/vowpal-wabbits-icon-800x800.png  
  inflating: vowpal_wabbit-8.5.0/logo_assets/vowpal-wabbits-icon.eps  
  inflating: vowpal_wabbit-8.5.0/logo_assets/vowpal-wabbits-icon.pdf  
  inflating: vowpal_wabbit-8.5.0/logo_assets/vowpal-wabbits-icon.svg  
  inflating: vowpal_wabbit-8.5.0/logo_assets/vowpal-wabbits-logomark-1000x1000.png  
  inflating: vowpal_wabbit-8.5.0/logo_assets/vowpal-wabbits-logomark-2000x2000.png  
  inflating: vowpal_wabbit-8.5.0/logo_assets/vowpal-wabbits-l

  inflating: vowpal_wabbit-8.5.0/rapidjson/doc/stream.md  
  inflating: vowpal_wabbit-8.5.0/rapidjson/doc/stream.zh-cn.md  
  inflating: vowpal_wabbit-8.5.0/rapidjson/doc/tutorial.md  
  inflating: vowpal_wabbit-8.5.0/rapidjson/doc/tutorial.zh-cn.md  
   creating: vowpal_wabbit-8.5.0/rapidjson/docker/
   creating: vowpal_wabbit-8.5.0/rapidjson/docker/debian/
  inflating: vowpal_wabbit-8.5.0/rapidjson/docker/debian/Dockerfile  
   creating: vowpal_wabbit-8.5.0/rapidjson/example/
  inflating: vowpal_wabbit-8.5.0/rapidjson/example/CMakeLists.txt  
   creating: vowpal_wabbit-8.5.0/rapidjson/example/capitalize/
  inflating: vowpal_wabbit-8.5.0/rapidjson/example/capitalize/capitalize.cpp  
   creating: vowpal_wabbit-8.5.0/rapidjson/example/condense/
  inflating: vowpal_wabbit-8.5.0/rapidjson/example/condense/condense.cpp  
   creating: vowpal_wabbit-8.5.0/rapidjson/example/filterkey/
  inflating: vowpal_wabbit-8.5.0/rapidjson/example/filterkey/filterkey.cpp  
   creating: vowp

  inflating: vowpal_wabbit-8.5.0/test/test-sets/rcv1_small_test.data  
   creating: vowpal_wabbit-8.5.0/test/test-sets/ref/
  inflating: vowpal_wabbit-8.5.0/test/test-sets/ref/0001.stderr  
  inflating: vowpal_wabbit-8.5.0/test/test-sets/ref/0001_ftrl.stderr  
  inflating: vowpal_wabbit-8.5.0/test/test-sets/ref/0001_ftrl_holdout.stderr  
  inflating: vowpal_wabbit-8.5.0/test/test-sets/ref/0001_ftrl_holdout_106.stderr  
  inflating: vowpal_wabbit-8.5.0/test/test-sets/ref/0001_ftrl_holdout_off.stderr  
  inflating: vowpal_wabbit-8.5.0/test/test-sets/ref/0002b.stderr  
  inflating: vowpal_wabbit-8.5.0/test/test-sets/ref/0002c.stderr  
  inflating: vowpal_wabbit-8.5.0/test/test-sets/ref/0098.stderr  
  inflating: vowpal_wabbit-8.5.0/test/test-sets/ref/0099.stderr  
  inflating: vowpal_wabbit-8.5.0/test/test-sets/ref/active_cover.stderr  
  inflating: vowpal_wabbit-8.5.0/test/test-sets/ref/backwards.stderr  
  inflating: vowpal_wabbit-8.5.0/test/test-sets/ref/backwards.stdout  
  inflating:

  inflating: vowpal_wabbit-8.5.0/test/train-sets/ref/csoaa.audit_regr  
  inflating: vowpal_wabbit-8.5.0/test/train-sets/ref/csoaa_audit_regr.stderr  
  inflating: vowpal_wabbit-8.5.0/test/train-sets/ref/csoaa_ldf_probabilities.stderr  
  inflating: vowpal_wabbit-8.5.0/test/train-sets/ref/decisionservice.stderr  
  inflating: vowpal_wabbit-8.5.0/test/train-sets/ref/dictionary_test.stderr  
  inflating: vowpal_wabbit-8.5.0/test/train-sets/ref/dictionary_test.stderr-mswin  
  inflating: vowpal_wabbit-8.5.0/test/train-sets/ref/empty-set.stderr  
  inflating: vowpal_wabbit-8.5.0/test/train-sets/ref/explore_eval.stderr  
  inflating: vowpal_wabbit-8.5.0/test/train-sets/ref/ezexample_predict.stderr  
 extracting: vowpal_wabbit-8.5.0/test/train-sets/ref/ezexample_predict.stdout  
  inflating: vowpal_wabbit-8.5.0/test/train-sets/ref/frank.stderr  
 extracting: vowpal_wabbit-8.5.0/test/train-sets/ref/frank.stdout  
  inflating: vowpal_wabbit-8.5.0/test/train-sets/ref/ftrl.audit_regr

  inflating: vowpal_wabbit-8.5.0/utl/vw-lda  
  inflating: vowpal_wabbit-8.5.0/utl/vw-regr  
  inflating: vowpal_wabbit-8.5.0/utl/vw-top-errors  
  inflating: vowpal_wabbit-8.5.0/utl/vw-validate.html  
  inflating: vowpal_wabbit-8.5.0/utl/vw-varinfo  
  inflating: vowpal_wabbit-8.5.0/utl/vw2csv  
   creating: vowpal_wabbit-8.5.0/vowpalwabbit/
   creating: vowpal_wabbit-8.5.0/vowpalwabbit/.nuget/
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/.nuget/NuGet.Config  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/.nuget/NuGet.exe  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/.nuget/NuGet.targets  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/Build.props  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/Makefile.am  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/Makefile.in  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/OjaNewton.cc  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/OjaNewton.h  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/accumulate.cc  
  inflating: vowpal_wabbit-8.5.0/vow

  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/search_sequencetask.cc  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/search_sequencetask.h  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/sender.cc  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/sender.h  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/simple_label.cc  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/simple_label.h  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/spanning_tree.cc  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/spanning_tree.h  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/stagewise_poly.cc  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/stagewise_poly.h  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/svrg.cc  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/svrg.h  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/topk.cc  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/topk.h  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/unique_sort.cc  
  inflating: vowpal_wabbit-8.5.0/vowpalwabbit/unique_sort

In [48]:
!g++ vowpal_wabbit-8.5.0/library/recommend.cc \
    -o recommend \
    -std=c++0x \
    -lvw -lboost_program_options-mt

In [49]:
df[df["userId"] == 120].head()

Unnamed: 0,movieId,rating,userId,random
14407,2,1.0,120,0.538618
14408,11,3.0,120,0.698799
14409,19,3.0,120,0.157578
14410,21,5.0,120,0.420503
14411,36,4.0,120,0.061272


In [50]:
with open("blacklist.vw", "w") as f:
    for row in df[df["userId"] == 120].itertuples():
        f.write("|u 120 |i " + str(row.movieId) + "\n")

In [51]:
!head blacklist.vw -n 2

|u 120 |i 2
|u 120 |i 11


In [52]:
with open("items.vw", "w") as f:
    for i in range(1, len(df["movieId"].unique()) + 1):
        f.write("|i " + str(i) + "\n")

In [53]:
!head items.vw -n 5

|i 1
|i 2
|i 3
|i 4
|i 5


In [54]:
!echo '|u 120' | ./recommend \
    --topk 10 \
    -B blacklist.vw \
    -U /dev/stdin \
    -I items.vw \
    --vwparams '-i movielens.reg' >recommendations

creating quadratic features for pairs: ui 
Num weight bits = 18
learning rate = 10
initial_t = 1
power_t = 0.5
using no cache
Reading datafile = 
num sources = 1

finished run
number of examples = 0
weighted example sum = 0.000000
weighted label sum = 0.000000
average loss = n.a.
total feature number = 0


In [55]:
!cat recommendations

4.48818	|u 120|i 844
4.49538	|u 120|i 3481
4.54713	|u 120|i 524
4.57014	|u 120|i 1243
4.58537	|u 120|i 7316
4.60624	|u 120|i 1945
4.64483	|u 120|i 1662
4.68453	|u 120|i 2677
4.69195	|u 120|i 316
4.85388	|u 120|i 353


In [56]:
movies = []

for line in open("recommendations"):
    movie_id = line.split()[-1]
    movies.append(int(movie_id))

In [57]:
movies

[844, 3481, 524, 1243, 7316, 1945, 1662, 2677, 316, 353]

In [58]:
df_movies = pd.read_csv("ml-20m/movies.csv")

In [59]:
df_movies[df_movies["movieId"].isin([index_to_item_id_mapping[t - 1] for t in movies])]

Unnamed: 0,movieId,title,genres
315,318,"Shawshank Redemption, The (1994)",Crime|Drama
352,356,Forrest Gump (1994),Comedy|Drama|Romance|War
523,527,Schindler's List (1993),Drama|War
843,858,"Godfather, The (1972)",Crime|Drama
1242,1270,Back to the Future (1985),Adventure|Comedy|Sci-Fi
1661,1721,Titanic (1997),Drama|Romance
1944,2028,Saving Private Ryan (1998),Action|Drama|War
2676,2762,"Sixth Sense, The (1999)",Drama|Horror|Mystery
3487,3578,Gladiator (2000),Action|Adventure|Drama
7356,7502,Band of Brothers (2001),Action|Drama|War


## Заключение

1. Разобрались с VW и построили модель для рекомендаций на основе Factrorization Machines
2. Построили простую рекомендательную модель на основе SVD

Ссылка для обратной связи: https://goo.gl/forms/SIa0Elv5PrrEnwwa2