## Description：
这里是通过一个简单的小案例来看看如何通过掉包的方式使用FM模型

In [1]:
# 导入包
from pyfm import pylibfm
from sklearn.feature_extraction import DictVectorizer
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import pandas as pd

使用这个类最简单的方式就是把数据存成字典的形式， 然后用DictVectorizer进行one-hot

In [2]:
train = [
    {'user': '1', 'item': '5', 'age': 19},
    {'user': '2', 'item': '43', 'age': 33},
    {'user': '3', 'item': '20', 'age': 55},
    {'user': '4', 'item': '10', 'age': 20}
]
v = DictVectorizer()
X = v.fit_transform(train)      # 本身被压缩了

In [3]:
X.toarray()

array([[19.,  0.,  0.,  0.,  1.,  1.,  0.,  0.,  0.],
       [33.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.],
       [55.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.],
       [20.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]])

In [8]:
y = np.repeat(1, X.shape[0])

In [9]:
fm = pylibfm.FM()
fm.fit(X, y)

Creating validation dataset of 0.01 of training for adaptive regularization
-- Epoch 1
Training log loss: 0.38084


In [10]:
# 测试集
test = v.transform({'user': "1", 'item': "10", 'age': 24})
fm.predict(test)

array([0.99212296])

## 预测Alice的评分
这个还是用前面协同过滤和矩阵分解的那个例子玩一下

In [4]:
def loadData():
    rating_data={1: {'A': 5, 'B': 3, 'C': 4, 'D': 4},
           2: {'A': 3, 'B': 1, 'C': 2, 'D': 3, 'E': 3},
           3: {'A': 4, 'B': 3, 'C': 4, 'D': 3, 'E': 5},
           4: {'A': 3, 'B': 3, 'C': 1, 'D': 5, 'E': 4},
           5: {'A': 1, 'B': 5, 'C': 5, 'D': 2, 'E': 1}
          }
    return rating_data

In [5]:
rating_data = loadData()

In [6]:
df = pd.DataFrame(rating_data).T
df = df.stack().reset_index()
df.columns = ['user', 'item', 'rating']
df['user'] = df['user'].astype('str')

In [15]:
item_map = {item: str(idx) for idx, item in enumerate(set(df['item']))}
df['item'] = df['item'].map(item_map)

In [9]:
train_data = df[['user', 'item']]
y = df['rating']

In [10]:
one = OneHotEncoder()
x = one.fit_transform(train_data)

In [13]:
x

<24x10 sparse matrix of type '<class 'numpy.float64'>'
	with 48 stored elements in Compressed Sparse Row format>

In [22]:
# 建立模型
fm = pylibfm.FM(num_factors=10, num_iter=100, verbose=True, task='regression', initial_learning_rate=0.001, learning_rate_schedule='optimal')

In [23]:
fm.fit(x, y)

Creating validation dataset of 0.01 of training for adaptive regularization
-- Epoch 1
Training MSE: 3.36957
-- Epoch 2
Training MSE: 3.36957
-- Epoch 3
Training MSE: 3.36957
-- Epoch 4
Training MSE: 3.36957
-- Epoch 5
Training MSE: 3.36957
-- Epoch 6
Training MSE: 3.36957
-- Epoch 7
Training MSE: 3.36824
-- Epoch 8
Training MSE: 3.18354
-- Epoch 9
Training MSE: 2.90356
-- Epoch 10
Training MSE: 2.65204
-- Epoch 11
Training MSE: 2.43094
-- Epoch 12
Training MSE: 2.23572
-- Epoch 13
Training MSE: 2.06420
-- Epoch 14
Training MSE: 1.91343
-- Epoch 15
Training MSE: 1.78005
-- Epoch 16
Training MSE: 1.66318
-- Epoch 17
Training MSE: 1.56070
-- Epoch 18
Training MSE: 1.46989
-- Epoch 19
Training MSE: 1.38988
-- Epoch 20
Training MSE: 1.31918
-- Epoch 21
Training MSE: 1.25672
-- Epoch 22
Training MSE: 1.20164
-- Epoch 23
Training MSE: 1.15306
-- Epoch 24
Training MSE: 1.10965
-- Epoch 25
Training MSE: 1.07162
-- Epoch 26
Training MSE: 1.03799
-- Epoch 27
Training MSE: 1.00777
-- Epoch 28
Tra

In [25]:
# 测试集
test = {'user': '1', 'item': '4'}
x_test = one.transform(pd.DataFrame(test, index=[0]))

In [29]:
pred_rating = fm.predict(x_test)
print('FM的预测评分:{}'.format(pred_rating[0]))

FM的预测评分:3.513755892491899
