# Ugulava George Assignment 5 part 2: recommendation systems.

# Factorization Machines Model

## Factorization machines (FM) are a generic approach that allows to mimic most factorization models by feature engineering.Model uses stochastic gradient descent with adaptive regularization as a learning method, which adapts the regularization automatically while training the model parameters.This way, factorization machines combine the generality of feature engineering with the superiority of factorization models in estimating interactions between categorical variables of large domain.

In [1]:
from pyfm import pylibfm
import numpy as np
from sklearn.feature_extraction import DictVectorizer
from sklearn.metrics import mean_squared_error

# Manual preprocessing

In [2]:
def loadData(filename,path="ml-100k/"):
    data = []
    y = []
    users=set()
    items=set()
    with open(path+filename) as f:
        for line in f:
            (user,movieid,rating,ts)=line.split('\t')
            data.append({ "user_id": str(user), "movie_id": str(movieid)})
            y.append(float(rating))
            users.add(user)
            items.add(movieid)

    return (data, np.array(y), users, items)

# Build model and evaluate. 

In [3]:
from math import sqrt
def predict_recommendations(file_train, file_test):
    (train_data, y_train, train_users, train_items) = loadData(file_train)
    vectorizer = DictVectorizer()
    X_train = vectorizer.fit_transform(train_data)
    fm = pylibfm.FM(num_factors = 10, num_iter = 120, verbose = False, 
            task = "regression", initial_learning_rate = 0.002,
            learning_rate_schedule = "optimal")
    fm.fit(X_train, y_train)
    (test_data, y_test, test_users, test_items) = loadData(file_test)
    X_test = vectorizer.transform(test_data)
    predictions = fm.predict(X_test)
    return sqrt(mean_squared_error(y_test,predictions))

# Run test for each fold. 

In [4]:
tests = [('u1.base','u1.test'),('u2.base','u2.test'),
        ('u3.base','u3.test'),('u4.base','u4.test'),
        ('u5.base','u5.test')]
rmse = 0
i = 0
for train,test in tests:
    i += 1
    pred = predict_recommendations(train,test)
    rmse += pred
    print('rmse fold ', i, '= ', pred)
rmse = rmse/5
print("RMSE = %.4f" %rmse)

rmse fold  1 =  0.9314198672688792
rmse fold  2 =  0.9213435090036229
rmse fold  3 =  0.9136178277018069
rmse fold  4 =  0.9157348931885633
rmse fold  5 =  0.916640313089146
RMSE = 0.9198
